X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/63282590b70f29e96c5f0f7d0db30ca48c9e0fa5..a3a9277b2d833bad63f6ca22dcf9cc563fbe0f68:/src/xbt/xbt_str.c?ds=sidebyside diff --git a/src/xbt/xbt_str.c b/src/xbt/xbt_str.c index 7ff082793a..6cd5a2da64 100644 --- a/src/xbt/xbt_str.c +++ b/src/xbt/xbt_str.c @@ -1,34 +1,32 @@ +/* $Id$ */ /* xbt_str.c - various helping functions to deal with strings */ -/* Copyright (C) 2005-2007 Malek Cherier, Martin Quinson. */ +/* Copyright (C) 2005-2008 The SimGrid Team. */ /* All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it - * under the terms of the license (GNU LGPL) which comes with this package. + * under the terms of the license (GNU LGPL) which comes with this package. */ - + #include "xbt/misc.h" #include "xbt/sysdep.h" #include "xbt/str.h" /* headers of these functions */ +#include "xbt/strbuff.h" #include "portable.h" #include "xbt/matrix.h" /* for the diff */ -static void free_string(void *d){ - free(*(void**)d); -} - /** @brief Strip whitespace (or other characters) from the end of a string. * - * Strips the whitespaces from the end of s. + * Strips the whitespaces from the end of s. * By default (when char_list=NULL), these characters get stripped: - * - * - " " (ASCII 32 (0x20)) space. - * - "\t" (ASCII 9 (0x09)) tab. - * - "\n" (ASCII 10 (0x0A)) line feed. - * - "\r" (ASCII 13 (0x0D)) carriage return. - * - "\0" (ASCII 0 (0x00)) NULL. - * - "\x0B" (ASCII 11 (0x0B)) vertical tab. + * + * - " " (ASCII 32 (0x20)) space. + * - "\t" (ASCII 9 (0x09)) tab. + * - "\n" (ASCII 10 (0x0A)) line feed. + * - "\r" (ASCII 13 (0x0D)) carriage return. + * - "\0" (ASCII 0 (0x00)) NULL. + * - "\x0B" (ASCII 11 (0x0B)) vertical tab. * * @param s The string to strip. Modified in place. * @param char_list A string which contains the characters you want to strip. @@ -37,43 +35,43 @@ static void free_string(void *d){ void xbt_str_rtrim(char* s, const char* char_list) { - char* cur = s; - const char* __char_list = " \t\n\r\x0B"; - char white_char[256] = {1,0}; - - if(!s) - return; - - if(!char_list){ - while(*__char_list) { - white_char[(unsigned char)*__char_list++] = 1; - } - }else{ - while(*char_list) { - white_char[(unsigned char)*char_list++] = 1; - } - } - - while(*cur) - ++cur; - - while((cur >= s) && white_char[(unsigned char)*cur]) - --cur; - - *++cur = '\0'; + char* cur = s; + const char* __char_list = " \t\n\r\x0B"; + char white_char[256] = {1,0}; + + if(!s) + return; + + if(!char_list){ + while(*__char_list) { + white_char[(unsigned char)*__char_list++] = 1; + } + }else{ + while(*char_list) { + white_char[(unsigned char)*char_list++] = 1; + } + } + + while(*cur) + ++cur; + + while((cur >= s) && white_char[(unsigned char)*cur]) + --cur; + + *++cur = '\0'; } /** @brief Strip whitespace (or other characters) from the beginning of a string. * * Strips the whitespaces from the begining of s. * By default (when char_list=NULL), these characters get stripped: - * - * - " " (ASCII 32 (0x20)) space. - * - "\t" (ASCII 9 (0x09)) tab. - * - "\n" (ASCII 10 (0x0A)) line feed. - * - "\r" (ASCII 13 (0x0D)) carriage return. - * - "\0" (ASCII 0 (0x00)) NULL. - * - "\x0B" (ASCII 11 (0x0B)) vertical tab. + * + * - " " (ASCII 32 (0x20)) space. + * - "\t" (ASCII 9 (0x09)) tab. + * - "\n" (ASCII 10 (0x0A)) line feed. + * - "\r" (ASCII 13 (0x0D)) carriage return. + * - "\0" (ASCII 0 (0x00)) NULL. + * - "\x0B" (ASCII 11 (0x0B)) vertical tab. * * @param s The string to strip. Modified in place. * @param char_list A string which contains the characters you want to strip. @@ -82,40 +80,40 @@ xbt_str_rtrim(char* s, const char* char_list) void xbt_str_ltrim( char* s, const char* char_list) { - char* cur = s; - const char* __char_list = " \t\n\r\x0B"; - char white_char[256] = {1,0}; - - if(!s) - return; - - if(!char_list){ - while(*__char_list) { - white_char[(unsigned char)*__char_list++] = 1; - } - }else{ - while(*char_list) { - white_char[(unsigned char)*char_list++] = 1; - } - } - - while(*cur && white_char[(unsigned char)*cur]) - ++cur; - - memmove(s,cur, strlen(cur)+1); + char* cur = s; + const char* __char_list = " \t\n\r\x0B"; + char white_char[256] = {1,0}; + + if(!s) + return; + + if(!char_list){ + while(*__char_list) { + white_char[(unsigned char)*__char_list++] = 1; + } + }else{ + while(*char_list) { + white_char[(unsigned char)*char_list++] = 1; + } + } + + while(*cur && white_char[(unsigned char)*cur]) + ++cur; + + memmove(s,cur, strlen(cur)+1); } /** @brief Strip whitespace (or other characters) from the end and the begining of a string. * * Strips the whitespaces from both the beginning and the end of s. * By default (when char_list=NULL), these characters get stripped: - * - * - " " (ASCII 32 (0x20)) space. - * - "\t" (ASCII 9 (0x09)) tab. - * - "\n" (ASCII 10 (0x0A)) line feed. - * - "\r" (ASCII 13 (0x0D)) carriage return. - * - "\0" (ASCII 0 (0x00)) NULL. - * - "\x0B" (ASCII 11 (0x0B)) vertical tab. + * + * - " " (ASCII 32 (0x20)) space. + * - "\t" (ASCII 9 (0x09)) tab. + * - "\n" (ASCII 10 (0x0A)) line feed. + * - "\r" (ASCII 13 (0x0D)) carriage return. + * - "\0" (ASCII 0 (0x00)) NULL. + * - "\x0B" (ASCII 11 (0x0B)) vertical tab. * * @param s The string to strip. * @param char_list A string which contains the characters you want to strip. @@ -123,18 +121,18 @@ xbt_str_ltrim( char* s, const char* char_list) */ void xbt_str_trim(char* s, const char* char_list ){ - - if(!s) - return; - - xbt_str_rtrim(s,char_list); - xbt_str_ltrim(s,char_list); + + if(!s) + return; + + xbt_str_rtrim(s,char_list); + xbt_str_ltrim(s,char_list); } /** @brief Replace double whitespaces (but no other characters) from the string. * * The function modifies the string so that each time that several spaces appear, - * they are replaced by a single space. It will only do so for spaces (ASCII 32, 0x20). + * they are replaced by a single space. It will only do so for spaces (ASCII 32, 0x20). * * @param s The string to strip. Modified in place. * @@ -150,48 +148,86 @@ xbt_str_strip_spaces(char *s) { while (1) { if (!*p) goto end; - + if (*p != ' ') break; - + p++; } - + e = 1; - + do { if (e) *s++ = *p; - + if (!*++p) goto end; - + if (e ^ (*p!=' ')) if ((e = !e)) - *s++ = ' '; + *s++ = ' '; } while (1); - end: + end: *s = '\0'; } -/** @brief Splits a string into a dynar of strings - * +/** @brief Substitutes a char for another in a string + * + * @param str the string to modify + * @param from char to search + * @param to char to put instead + * @param amount amount of changes to do (=0 means all) + */ +void xbt_str_subst(char *str, char from, char to, int occurence) { + char *p = str; + while (*p != '\0') { + if (*p == from) { + *p = to; + if (occurence == 1) + return; + occurence--; + } + p++; + } +} +/** @brief Replaces a set of variables by their values + * + * @param str where to apply the change + * @param patterns what to change + * @return The string modified + * + * Check xbt_strbuff_varsubst() for more details, and remember that the string may be reallocated (moved) in the process. + */ + +char *xbt_str_varsubst(char *str, xbt_dict_t patterns) { + xbt_strbuff_t buff = xbt_strbuff_new_from(str); + char * res; + xbt_strbuff_varsubst(buff,patterns); + res = buff->data; + xbt_strbuff_free_container(buff); + return res; +} + + +/** @brief Splits a string into a dynar of strings + * * @param s: the string to split * @param sep: a string of all chars to consider as separator. * - * By default (with sep=NULL), these characters are used as separator: - * - * - " " (ASCII 32 (0x20)) space. - * - "\t" (ASCII 9 (0x09)) tab. - * - "\n" (ASCII 10 (0x0A)) line feed. - * - "\r" (ASCII 13 (0x0D)) carriage return. - * - "\0" (ASCII 0 (0x00)) NULL. - * - "\x0B" (ASCII 11 (0x0B)) vertical tab. + * By default (with sep=NULL), these characters are used as separator: + * + * - " " (ASCII 32 (0x20)) space. + * - "\t" (ASCII 9 (0x09)) tab. + * - "\n" (ASCII 10 (0x0A)) line feed. + * - "\r" (ASCII 13 (0x0D)) carriage return. + * - "\0" (ASCII 0 (0x00)) NULL. + * - "\x0B" (ASCII 11 (0x0B)) vertical tab. */ xbt_dynar_t xbt_str_split(const char *s, const char *sep) { - xbt_dynar_t res = xbt_dynar_new(sizeof(char*), free_string); + xbt_dynar_t res = xbt_dynar_new(sizeof(char*), xbt_free_ref); const char *p, *q; int done; const char* sep_dflt = " \t\n\r\x0B"; @@ -207,9 +243,9 @@ xbt_dynar_t xbt_str_split(const char *s, const char *sep) { is_sep[(unsigned char) *sep++] = 1; } is_sep[0] = 1; /* End of string is also separator */ - + /* Do the job */ - p=q=s; + p=q=s; done=0; if (s[0] == '\0') @@ -233,18 +269,66 @@ xbt_dynar_t xbt_str_split(const char *s, const char *sep) { return res; } +/** + * \brief This functions splits a string after using another string as separator + * For example A!!B!!C splitted after !! will return the dynar {A,B,C} + * \return An array of dynars containing the string tokens + */ +xbt_dynar_t xbt_str_split_str(const char *s, const char *sep) { + xbt_dynar_t res = xbt_dynar_new(sizeof(char*), xbt_free_ref); + int done; + const char *p, *q; + + p = q = s; + done = 0; + + if (s[0] == '\0') + return res; + if (sep[0] == '\0') { + s = xbt_strdup(s); + xbt_dynar_push(res, &s); + return res; + } + + while (!done) { + char *to_push; + int v = 0; + //get the start of the first occurence of the substring + q = strstr(p, sep); + //if substring was not found add the entire string + if (NULL == q) { + v = strlen(p); + to_push = malloc(v + 1); + memcpy(to_push, p, v); + to_push[v] = '\0'; + xbt_dynar_push(res, &to_push); + done = 1; + } + else { + //get the appearance + to_push = malloc(q - p + 1); + memcpy(to_push, p, q - p); + //add string terminator + to_push[q - p] = '\0'; + xbt_dynar_push(res, &to_push); + p = q +strlen(sep); + } + } + return res; +} + /** @brief Splits a string into a dynar of strings, taking quotes into account - * - * It basically does the same argument separation than the shell, where white - * spaces can be escaped and where arguments are never splitted within a + * + * It basically does the same argument separation than the shell, where white + * spaces can be escaped and where arguments are never splitted within a * quote group. * Several subsequent spaces are ignored (unless within quotes, of course). * */ xbt_dynar_t xbt_str_split_quoted(const char *s) { - xbt_dynar_t res = xbt_dynar_new(sizeof(char*), free_string); - char *str; /* we have to copy the string before, to handle backslashes */ + xbt_dynar_t res = xbt_dynar_new(sizeof(char*), xbt_free_ref); + char *str_to_free; /* we have to copy the string before, to handle backslashes */ char *beg, *end; /* pointers around the parsed chunk */ int in_simple_quote=0, in_double_quote=0; int done = 0; @@ -252,98 +336,100 @@ xbt_dynar_t xbt_str_split_quoted(const char *s) { if (s[0] == '\0') return res; - beg = str = xbt_strdup(s); - + beg = str_to_free = xbt_strdup(s); + /* trim leading spaces */ xbt_str_ltrim(beg," "); end=beg; - + while (!done) { - - - switch (*end) { - case '\\': - ctn = 1; - /* Protected char; move it closer */ - memmove(end,end+1,strlen(end)); - if (*end=='\0') - THROW0(arg_error,0,"String ends with \\"); - end++; /* Pass the protected char */ - break; - case '\'': - ctn = 1; - if (!in_double_quote) { - in_simple_quote = !in_simple_quote; - memmove(end,end+1,strlen(end)); - } else { - /* simple quote protected by double ones */ - end++; - } - break; - case '"': - ctn = 1; - if (!in_simple_quote) { - in_double_quote = !in_double_quote; - memmove(end,end+1,strlen(end)); - } else { - /* double quote protected by simple ones */ - end++; - } - break; - case ' ': - case '\t': - case '\n': - case '\0': - if (*end == '\0' && (in_simple_quote || in_double_quote)) { - THROW2(arg_error,0, - "End of string found while searching for %c in %s", - (in_simple_quote?'\'':'"'), - s); - } - if (in_simple_quote || in_double_quote) { - end++; - } else { - if (ctn) { - /* Found a separator. Push the string if contains something */ - char *topush=xbt_malloc(end-beg+1); - memcpy(topush,beg,end-beg); - topush[end - beg] = '\0'; - xbt_dynar_push(res,&topush); - } - ctn= 0; - - if (*end == '\0') { - done = 1; - break; - } - - beg=++end; - xbt_str_ltrim(beg," "); - end=beg; - } - break; - - default: - ctn = 1; - end++; - } + switch (*end) { + case '\\': + ctn = 1; + /* Protected char; move it closer */ + memmove(end,end+1,strlen(end)); + if (*end=='\0') + THROW0(arg_error,0,"String ends with \\"); + end++; /* Pass the protected char */ + break; + + case '\'': + ctn = 1; + if (!in_double_quote) { + in_simple_quote = !in_simple_quote; + memmove(end,end+1,strlen(end)); + } else { + /* simple quote protected by double ones */ + end++; + } + break; + case '"': + ctn = 1; + if (!in_simple_quote) { + in_double_quote = !in_double_quote; + memmove(end,end+1,strlen(end)); + } else { + /* double quote protected by simple ones */ + end++; + } + break; + + case ' ': + case '\t': + case '\n': + case '\0': + if (*end == '\0' && (in_simple_quote || in_double_quote)) { + THROW2(arg_error,0, + "End of string found while searching for %c in %s", + (in_simple_quote?'\'':'"'), + s); + } + if (in_simple_quote || in_double_quote) { + end++; + } else { + if (ctn) { + /* Found a separator. Push the string if contains something */ + char *topush=xbt_malloc(end-beg+1); + memcpy(topush,beg,end-beg); + topush[end - beg] = '\0'; + xbt_dynar_push(res,&topush); + } + ctn= 0; + + if (*end == '\0') { + done = 1; + break; + } + + beg=++end; + xbt_str_ltrim(beg," "); + end=beg; + } + break; + + default: + ctn = 1; + end++; + } } - free(str); + free(str_to_free); return res; } #ifdef SIMGRID_TEST +#include "xbt/str.h" + #define mytest(name, input, expected) \ xbt_test_add0(name); \ d=xbt_str_split_quoted(input); \ s=xbt_str_join(d,"XXX"); \ xbt_test_assert3(!strcmp(s,expected),\ - "Input (%s) leads to (%s) instead of (%s)", \ - input,s,expected);\ - free(s); \ - xbt_dynar_free(&d); + "Input (%s) leads to (%s) instead of (%s)", \ + input,s,expected);\ + free(s); \ + xbt_dynar_free(&d); XBT_TEST_SUITE("xbt_str","String Handling"); XBT_TEST_UNIT("xbt_str_split_quoted",test_split_quoted, "test the function xbt_str_split_quoted") { @@ -364,16 +450,37 @@ XBT_TEST_UNIT("xbt_str_split_quoted",test_split_quoted, "test the function xbt_s mytest("Backslashed quotes + quotes", "'toto \\'tutu' tata", "toto 'tutuXXXtata"); } + +#define mytest_str(name, input, separator, expected) \ + xbt_test_add0(name); \ + d=xbt_str_split_str(input, separator); \ + s=xbt_str_join(d,"XXX"); \ + xbt_test_assert3(!strcmp(s,expected),\ + "Input (%s) leads to (%s) instead of (%s)", \ + input,s,expected);\ + free(s); \ + xbt_dynar_free(&d); + +XBT_TEST_UNIT("xbt_str_split_str",test_split_str, "test the function xbt_str_split_str") { + xbt_dynar_t d; + char *s; + + mytest_str("Empty string and separator", "", "", ""); + mytest_str("Empty string", "", "##", ""); + mytest_str("Empty separator", "toto", "", "toto"); + mytest_str("String with no separator in it", "toto", "##", "toto"); + mytest_str("Basic test", "toto##tutu", "##", "totoXXXtutu"); +} #endif /* SIMGRID_TEST */ - + /** @brief Join a set of strings as a single string */ char *xbt_str_join(xbt_dynar_t dyn, const char*sep) { int len=1,dyn_len=xbt_dynar_length(dyn); - int cpt; + unsigned int cpt; char *cursor; char *res,*p; - + if (!dyn_len) return xbt_strdup(""); @@ -386,73 +493,77 @@ char *xbt_str_join(xbt_dynar_t dyn, const char*sep) { res = xbt_malloc(len); p=res; xbt_dynar_foreach(dyn,cpt,cursor) { - if (cpt= (*n) + 1) - *buf = xbt_realloc(*buf, *n += 512); - - (*buf)[i] = ch; - - if ((*buf)[i] == '\n') { - i++; - (*buf)[i] = '\0'; - break; - } - } - - if (i == *n) - *buf = xbt_realloc(*buf, *n += 1); - - (*buf)[i] = '\0'; - - return (ssize_t)i; + + size_t i; + int ch; + + if (!*buf) { + *buf = xbt_malloc(512); + *n = 512; + } + + if (feof(stream)) + return (ssize_t)-1; + + for (i=0; (ch = fgetc(stream)) != EOF; i++) { + + if (i >= (*n) + 1) + *buf = xbt_realloc(*buf, *n += 512); + + (*buf)[i] = ch; + + if ((*buf)[i] == '\n') { + i++; + (*buf)[i] = '\0'; + break; + } + } + + if (i == *n) + *buf = xbt_realloc(*buf, *n += 1); + + (*buf)[i] = '\0'; + + return (ssize_t)i; } #endif /* HAVE_GETLINE */ /* - * Diff related functions + * Diff related functions */ static xbt_matrix_t diff_build_LCS(xbt_dynar_t da, xbt_dynar_t db) { xbt_matrix_t C = xbt_matrix_new(xbt_dynar_length(da),xbt_dynar_length(db), - sizeof(int),NULL); - int i,j; + sizeof(int),NULL); + unsigned long i,j; /* Compute the LCS */ /* @@ -468,32 +579,33 @@ static xbt_matrix_t diff_build_LCS(xbt_dynar_t da, xbt_dynar_t db) { else: C[i,j] := max(C[i,j-1], C[i-1,j]) return C[m,n] - */ - for (i=0; i 0 and j > 0 and X[i] = Y[j] printDiff(C, X, Y, i-1, j-1) @@ -505,29 +617,29 @@ static void diff_build_diff(xbt_dynar_t res, else if i > 0 and (j = 0 or C[i,j-1] < C[i-1,j]) printDiff(C, X, Y, i-1, j) print "- " + X[i] - */ + */ if (i>=0 && j >= 0 && !strcmp(xbt_dynar_get_as(da,i,char*), - xbt_dynar_get_as(db,j,char*))) { + xbt_dynar_get_as(db,j,char*))) { diff_build_diff(res,C,da,db,i-1,j-1); topush = bprintf(" %s",xbt_dynar_get_as(da,i,char*)); xbt_dynar_push(res, &topush); - } else if (j>=0 && - (i<=0 || xbt_matrix_get_as(C,i,j-1,int) >= xbt_matrix_get_as(C,i-1,j,int))) { + } else if (j>=0 && + (i<=0 ||j==0|| xbt_matrix_get_as(C,i,j-1,int) >= xbt_matrix_get_as(C,i-1,j,int))) { diff_build_diff(res,C,da,db,i,j-1); topush = bprintf("+ %s",xbt_dynar_get_as(db,j,char*)); xbt_dynar_push(res,&topush); - } else if (i>=0 && - (j<=0 || xbt_matrix_get_as(C,i,j-1,int) < xbt_matrix_get_as(C,i-1,j,int))) { + } else if (i>=0 && + (j<=0 || xbt_matrix_get_as(C,i,j-1,int) < xbt_matrix_get_as(C,i-1,j,int))) { diff_build_diff(res,C,da,db,i-1,j); topush = bprintf("- %s",xbt_dynar_get_as(da,i,char*)); - xbt_dynar_push(res,&topush); + xbt_dynar_push(res,&topush); } else if (i<=0 && j<=0) { return; } else { THROW2(arg_error,0,"Invalid values: i=%d, j=%d",i,j); } - + } /** @brief Compute the unified diff of two strings */ @@ -536,27 +648,28 @@ char *xbt_str_diff(char *a, char *b) { xbt_dynar_t db = xbt_str_split(b, "\n"); xbt_matrix_t C = diff_build_LCS(da,db); - xbt_dynar_t diff = xbt_dynar_new(sizeof(char*),free_string); + xbt_dynar_t diff = xbt_dynar_new(sizeof(char*),xbt_free_ref); char *res=NULL; - + diff_build_diff(diff, C, da,db, xbt_dynar_length(da)-1, xbt_dynar_length(db)-1); /* Clean empty lines at the end */ while (xbt_dynar_length(diff) > 0) { - char *str; - xbt_dynar_pop(diff,&str); - if (str[0]=='\0' || !strcmp(str," ")) { - free(str); - } else { - xbt_dynar_push(diff,&str); - break; - } - } + char *str; + xbt_dynar_pop(diff,&str); + if (str[0]=='\0' || !strcmp(str," ")) { + free(str); + } else { + xbt_dynar_push(diff,&str); + break; + } + } res = xbt_str_join(diff, "\n"); xbt_dynar_free(&da); xbt_dynar_free(&db); xbt_dynar_free(&diff); xbt_matrix_free(C); - + return res; } +