X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/b752e4c0bf35563ab1cae8af2940258327f1d415..a01e72b3aab192e7f8a4e2f26e9cde6ecc9a94ec:/src/xbt/xbt_str.c diff --git a/src/xbt/xbt_str.c b/src/xbt/xbt_str.c index 0897546e6c..221f7f4b16 100644 --- a/src/xbt/xbt_str.c +++ b/src/xbt/xbt_str.c @@ -7,12 +7,6 @@ /* This program is free software; you can redistribute it and/or modify it * under the terms of the license (GNU LGPL) which comes with this package. */ - -/* Returns the string without leading whitespaces (xbt_str_ltrim), - * trailing whitespaces (xbt_str_rtrim), - * or both leading and trailing whitespaces (xbt_str_trim). - * (in-place modification of the string) - */ #include "xbt/misc.h" #include "xbt/sysdep.h" @@ -20,10 +14,6 @@ #include "portable.h" #include "xbt/matrix.h" /* for the diff */ -static void free_string(void *d){ - free(*(void**)d); -} - /** @brief Strip whitespace (or other characters) from the end of a string. * * Strips the whitespaces from the end of s. @@ -108,7 +98,7 @@ xbt_str_ltrim( char* s, const char* char_list) while(*cur && white_char[(unsigned char)*cur]) ++cur; - memmove(s,cur, strlen(cur)); + memmove(s,cur, strlen(cur)+1); } /** @brief Strip whitespace (or other characters) from the end and the begining of a string. @@ -196,10 +186,9 @@ xbt_str_strip_spaces(char *s) { * - "\x0B" (ASCII 11 (0x0B)) vertical tab. */ -xbt_dynar_t xbt_str_split(char *s, const char *sep) { - char *str=xbt_strdup(s); - xbt_dynar_t res = xbt_dynar_new(sizeof(char*), free_string); - char *p, *q; +xbt_dynar_t xbt_str_split(const char *s, const char *sep) { + xbt_dynar_t res = xbt_dynar_new(sizeof(char*), xbt_free_ref); + const char *p, *q; int done; const char* sep_dflt = " \t\n\r\x0B"; char is_sep[256] = {1,0}; @@ -216,58 +205,285 @@ xbt_dynar_t xbt_str_split(char *s, const char *sep) { is_sep[0] = 1; /* End of string is also separator */ /* Do the job */ - p=q=str; + p=q=s; done=0; + + if (s[0] == '\0') + return res; + while (!done) { char *topush; while (!is_sep[(unsigned char)*q]) { q++; } - if (*q == '\0') { -#ifdef UNDEF - if (p==q) { - /* do not push last empty line */ - free(str); - return res; - } -#endif + if (*q == '\0') done = 1; - } else { - *q='\0'; - } - topush=xbt_strdup(p); + + topush=xbt_malloc(q-p+1); + memcpy(topush,p,q-p); + topush[q - p] = '\0'; xbt_dynar_push(res,&topush); p = ++q; } - free (str); + + return res; +} + +/** + * \brief This functions splits a string after using another string as separator + * For example A!!B!!C splitted after !! will return the dynar {A,B,C} + * \return An array of dynars containing the string tokens +*/ +xbt_dynar_t xbt_str_split_str(const char *s, const char *sep) { + xbt_dynar_t res = xbt_dynar_new(sizeof(char*), xbt_free_ref); + int done; + const char *p, *q; + + p = q = s; + done = 0; + + if (s[0] == '\0') + return res; + if (sep[0] == '\0') { + s = xbt_strdup(s); + xbt_dynar_push(res, &s); + return res; + } + + while (!done) { + char *to_push; + int v = 0; + //get the start of the first occurence of the substring + q = strstr(p, sep); + //if substring was not found add the entire string + if (NULL == q) { + v = strlen(p); + to_push = malloc(v + 1); + memcpy(to_push, p, v); + to_push[v] = '\0'; + xbt_dynar_push(res, &to_push); + done = 1; + } + else { + //get the appearance + to_push = malloc(q - p + 1); + memcpy(to_push, p, q - p); + //add string terminator + to_push[q - p] = '\0'; + xbt_dynar_push(res, &to_push); + p = q +strlen(sep); + } + } + return res; +} + +/** @brief Splits a string into a dynar of strings, taking quotes into account + * + * It basically does the same argument separation than the shell, where white + * spaces can be escaped and where arguments are never splitted within a + * quote group. + * Several subsequent spaces are ignored (unless within quotes, of course). + * + */ + +xbt_dynar_t xbt_str_split_quoted(const char *s) { + xbt_dynar_t res = xbt_dynar_new(sizeof(char*), xbt_free_ref); + char *str; /* we have to copy the string before, to handle backslashes */ + char *beg, *end; /* pointers around the parsed chunk */ + int in_simple_quote=0, in_double_quote=0; + int done = 0; + int ctn = 0; /* Got something in this block */ + + if (s[0] == '\0') + return res; + beg = str = xbt_strdup(s); + + /* trim leading spaces */ + xbt_str_ltrim(beg," "); + end=beg; + + while (!done) { + + + switch (*end) { + case '\\': + ctn = 1; + /* Protected char; move it closer */ + memmove(end,end+1,strlen(end)); + if (*end=='\0') + THROW0(arg_error,0,"String ends with \\"); + end++; /* Pass the protected char */ + break; + + case '\'': + ctn = 1; + if (!in_double_quote) { + in_simple_quote = !in_simple_quote; + memmove(end,end+1,strlen(end)); + } else { + /* simple quote protected by double ones */ + end++; + } + break; + case '"': + ctn = 1; + if (!in_simple_quote) { + in_double_quote = !in_double_quote; + memmove(end,end+1,strlen(end)); + } else { + /* double quote protected by simple ones */ + end++; + } + break; + + case ' ': + case '\t': + case '\n': + case '\0': + if (*end == '\0' && (in_simple_quote || in_double_quote)) { + THROW2(arg_error,0, + "End of string found while searching for %c in %s", + (in_simple_quote?'\'':'"'), + s); + } + if (in_simple_quote || in_double_quote) { + end++; + } else { + if (ctn) { + /* Found a separator. Push the string if contains something */ + char *topush=xbt_malloc(end-beg+1); + memcpy(topush,beg,end-beg); + topush[end - beg] = '\0'; + xbt_dynar_push(res,&topush); + } + ctn= 0; + + if (*end == '\0') { + done = 1; + break; + } + + beg=++end; + xbt_str_ltrim(beg," "); + end=beg; + } + break; + + default: + ctn = 1; + end++; + } + } + free(str); return res; } +#ifdef SIMGRID_TEST +#include "xbt/str.h" + +#define mytest(name, input, expected) \ + xbt_test_add0(name); \ + d=xbt_str_split_quoted(input); \ + s=xbt_str_join(d,"XXX"); \ + xbt_test_assert3(!strcmp(s,expected),\ + "Input (%s) leads to (%s) instead of (%s)", \ + input,s,expected);\ + free(s); \ + xbt_dynar_free(&d); + +XBT_TEST_SUITE("xbt_str","String Handling"); +XBT_TEST_UNIT("xbt_str_split_quoted",test_split_quoted, "test the function xbt_str_split_quoted") { + xbt_dynar_t d; + char *s; + + mytest("Empty", "", ""); + mytest("Basic test", "toto tutu", "totoXXXtutu"); + mytest("Useless backslashes", "\\t\\o\\t\\o \\t\\u\\t\\u", "totoXXXtutu"); + mytest("Protected space", "toto\\ tutu", "toto tutu"); + mytest("Several spaces", "toto tutu", "totoXXXtutu"); + mytest("LTriming", " toto tatu", "totoXXXtatu"); + mytest("Triming", " toto tutu ", "totoXXXtutu"); + mytest("Single quotes", "'toto tutu' tata", "toto tutuXXXtata"); + mytest("Double quotes", "\"toto tutu\" tata", "toto tutuXXXtata"); + mytest("Mixed quotes", "\"toto' 'tutu\" tata", "toto' 'tutuXXXtata"); + mytest("Backslashed quotes", "\\'toto tutu\\' tata", "'totoXXXtutu'XXXtata"); + mytest("Backslashed quotes + quotes", "'toto \\'tutu' tata", "toto 'tutuXXXtata"); + +} + +#define mytest_str(name, input, separator, expected) \ + xbt_test_add0(name); \ + d=xbt_str_split_str(input, separator); \ + s=xbt_str_join(d,"XXX"); \ + xbt_test_assert3(!strcmp(s,expected),\ + "Input (%s) leads to (%s) instead of (%s)", \ + input,s,expected);\ + free(s); \ + xbt_dynar_free(&d); + +XBT_TEST_UNIT("xbt_str_split_str",test_split_str, "test the function xbt_str_split_str") { + xbt_dynar_t d; + char *s; + + mytest_str("Empty string and separator", "", "", ""); + mytest_str("Empty string", "", "##", ""); + mytest_str("Empty separator", "toto", "", "toto"); + mytest_str("String with no separator in it", "toto", "##", "toto"); + mytest_str("Basic test", "toto##tutu", "##", "totoXXXtutu"); +} +#endif /* SIMGRID_TEST */ + /** @brief Join a set of strings as a single string */ char *xbt_str_join(xbt_dynar_t dyn, const char*sep) { - int len=2; - int cpt; + int len=1,dyn_len=xbt_dynar_length(dyn); + unsigned int cpt; char *cursor; char *res,*p; + + if (!dyn_len) + return xbt_strdup(""); + /* compute the length */ xbt_dynar_foreach(dyn,cpt,cursor) { len+=strlen(cursor); } - len+=strlen(sep)*(xbt_dynar_length(dyn)-1); + len+=strlen(sep)*dyn_len; /* Do the job */ res = xbt_malloc(len); p=res; xbt_dynar_foreach(dyn,cpt,cursor) { - p+=sprintf(p,"%s%s",cursor,sep); + if ((int)cpt0 && - (i<=0 || xbt_matrix_get_as(C,i,j-1,int) >= xbt_matrix_get_as(C,i-1,j,int))) { + } else if (j>=0 && + (i<=0 ||j==0|| xbt_matrix_get_as(C,i,j-1,int) >= xbt_matrix_get_as(C,i-1,j,int))) { diff_build_diff(res,C,da,db,i,j-1); topush = bprintf("+ %s",xbt_dynar_get_as(db,j,char*)); xbt_dynar_push(res,&topush); - } else if (i>0 && + } else if (i>=0 && (j<=0 || xbt_matrix_get_as(C,i,j-1,int) < xbt_matrix_get_as(C,i-1,j,int))) { diff_build_diff(res,C,da,db,i-1,j); topush = bprintf("- %s",xbt_dynar_get_as(da,i,char*)); @@ -380,6 +599,7 @@ static void diff_build_diff(xbt_dynar_t res, } else { THROW2(arg_error,0,"Invalid values: i=%d, j=%d",i,j); } + } /** @brief Compute the unified diff of two strings */ @@ -388,10 +608,21 @@ char *xbt_str_diff(char *a, char *b) { xbt_dynar_t db = xbt_str_split(b, "\n"); xbt_matrix_t C = diff_build_LCS(da,db); - xbt_dynar_t diff = xbt_dynar_new(sizeof(char*),free_string); + xbt_dynar_t diff = xbt_dynar_new(sizeof(char*),xbt_free_ref); char *res=NULL; diff_build_diff(diff, C, da,db, xbt_dynar_length(da)-1, xbt_dynar_length(db)-1); + /* Clean empty lines at the end */ + while (xbt_dynar_length(diff) > 0) { + char *str; + xbt_dynar_pop(diff,&str); + if (str[0]=='\0' || !strcmp(str," ")) { + free(str); + } else { + xbt_dynar_push(diff,&str); + break; + } + } res = xbt_str_join(diff, "\n"); xbt_dynar_free(&da);