X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/blobdiff_plain/8c9adb42b4ae8fac83648b169ac445078c5b7567..3d845df082d79ab89649c1a8829eea201ae01085:/src/xbt/xbt_str.c diff --git a/src/xbt/xbt_str.c b/src/xbt/xbt_str.c index a0b6bfa015..d1f1cd91ea 100644 --- a/src/xbt/xbt_str.c +++ b/src/xbt/xbt_str.c @@ -1,6 +1,6 @@ /* xbt_str.c - various helping functions to deal with strings */ -/* Copyright (c) 2007, 2008, 2009, 2010. The SimGrid Team. +/* Copyright (c) 2007-2014. The SimGrid Team. * All rights reserved. */ /* This program is free software; you can redistribute it and/or modify it @@ -18,12 +18,12 @@ * Strips the whitespaces from the end of s. * By default (when char_list=NULL), these characters get stripped: * - * - " " (ASCII 32 (0x20)) space. - * - "\t" (ASCII 9 (0x09)) tab. - * - "\n" (ASCII 10 (0x0A)) line feed. - * - "\r" (ASCII 13 (0x0D)) carriage return. - * - "\0" (ASCII 0 (0x00)) NULL. - * - "\x0B" (ASCII 11 (0x0B)) vertical tab. + * - " " (ASCII 32 (0x20)) space. + * - "\t" (ASCII 9 (0x09)) tab. + * - "\n" (ASCII 10 (0x0A)) line feed. + * - "\r" (ASCII 13 (0x0D)) carriage return. + * - "\0" (ASCII 0 (0x00)) NULL. + * - "\x0B" (ASCII 11 (0x0B)) vertical tab. * * @param s The string to strip. Modified in place. * @param char_list A string which contains the characters you want to strip. @@ -62,12 +62,12 @@ void xbt_str_rtrim(char *s, const char *char_list) * Strips the whitespaces from the begining of s. * By default (when char_list=NULL), these characters get stripped: * - * - " " (ASCII 32 (0x20)) space. - * - "\t" (ASCII 9 (0x09)) tab. - * - "\n" (ASCII 10 (0x0A)) line feed. - * - "\r" (ASCII 13 (0x0D)) carriage return. - * - "\0" (ASCII 0 (0x00)) NULL. - * - "\x0B" (ASCII 11 (0x0B)) vertical tab. + * - " " (ASCII 32 (0x20)) space. + * - "\t" (ASCII 9 (0x09)) tab. + * - "\n" (ASCII 10 (0x0A)) line feed. + * - "\r" (ASCII 13 (0x0D)) carriage return. + * - "\0" (ASCII 0 (0x00)) NULL. + * - "\x0B" (ASCII 11 (0x0B)) vertical tab. * * @param s The string to strip. Modified in place. * @param char_list A string which contains the characters you want to strip. @@ -103,12 +103,12 @@ void xbt_str_ltrim(char *s, const char *char_list) * Strips the whitespaces from both the beginning and the end of s. * By default (when char_list=NULL), these characters get stripped: * - * - " " (ASCII 32 (0x20)) space. - * - "\t" (ASCII 9 (0x09)) tab. - * - "\n" (ASCII 10 (0x0A)) line feed. - * - "\r" (ASCII 13 (0x0D)) carriage return. - * - "\0" (ASCII 0 (0x00)) NULL. - * - "\x0B" (ASCII 11 (0x0B)) vertical tab. + * - " " (ASCII 32 (0x20)) space. + * - "\t" (ASCII 9 (0x09)) tab. + * - "\n" (ASCII 10 (0x0A)) line feed. + * - "\r" (ASCII 13 (0x0D)) carriage return. + * - "\0" (ASCII 0 (0x00)) NULL. + * - "\x0B" (ASCII 11 (0x0B)) vertical tab. * * @param s The string to strip. * @param char_list A string which contains the characters you want to strip. @@ -191,14 +191,21 @@ void xbt_str_subst(char *str, char from, char to, int occurence) /** @brief Replaces a set of variables by their values * - * @param str where to apply the change - * @param patterns what to change + * @param str The input of the replacement process + * @param patterns The changes to apply * @return The string modified * - * Check xbt_strbuff_varsubst() for more details, and remember that the string may be reallocated (moved) in the process. + * Both '$toto' and '${toto}' are valid (and the two writing are equivalent). + * + * If the variable name contains spaces, use the brace version (ie, ${toto tutu}) + * + * You can provide a default value to use if the variable is not set in the dict by using + * '${var:=default}' or '${var:-default}'. These two forms are equivalent, even if they + * shouldn't to respect the shell standard (:= form should set the value in the dict, + * but does not) (BUG). */ -char *xbt_str_varsubst(char *str, xbt_dict_t patterns) +char *xbt_str_varsubst(const char *str, xbt_dict_t patterns) { xbt_strbuff_t buff = xbt_strbuff_new_from(str); char *res; @@ -216,12 +223,12 @@ char *xbt_str_varsubst(char *str, xbt_dict_t patterns) * * By default (with sep=NULL), these characters are used as separator: * - * - " " (ASCII 32 (0x20)) space. - * - "\t" (ASCII 9 (0x09)) tab. - * - "\n" (ASCII 10 (0x0A)) line feed. - * - "\r" (ASCII 13 (0x0D)) carriage return. - * - "\0" (ASCII 0 (0x00)) NULL. - * - "\x0B" (ASCII 11 (0x0B)) vertical tab. + * - " " (ASCII 32 (0x20)) space. + * - "\t" (ASCII 9 (0x09)) tab. + * - "\n" (ASCII 10 (0x0A)) line feed. + * - "\r" (ASCII 13 (0x0D)) carriage return. + * - "\0" (ASCII 0 (0x00)) NULL. + * - "\x0B" (ASCII 11 (0x0B)) vertical tab. */ xbt_dynar_t xbt_str_split(const char *s, const char *sep) @@ -298,14 +305,14 @@ xbt_dynar_t xbt_str_split_str(const char *s, const char *sep) //if substring was not found add the entire string if (NULL == q) { v = strlen(p); - to_push = malloc(v + 1); + to_push = xbt_malloc(v + 1); memcpy(to_push, p, v); to_push[v] = '\0'; xbt_dynar_push(res, &to_push); done = 1; } else { //get the appearance - to_push = malloc(q - p + 1); + to_push = xbt_malloc(q - p + 1); memcpy(to_push, p, q - p); //add string terminator to_push[q - p] = '\0'; @@ -316,15 +323,18 @@ xbt_dynar_t xbt_str_split_str(const char *s, const char *sep) return res; } -/** @brief Just like @xbt_str_split_quoted (Splits a string into a dynar of strings), but without memory allocation +/** @brief Just like @ref xbt_str_split_quoted (Splits a string into a dynar of strings), but without memory allocation * * The string passed as argument must be writable (not const) * The elements of the dynar are just parts of the string passed as argument. + * So if you don't store that argument elsewhere, you should free it in addition + * to freeing the dynar. This can be done by simply freeing the first argument + * of the dynar: + * free(xbt_dynar_get_ptr(dynar,0)); * - * To free the structure constructed by this function, free the first element and free the dynar: - * - * free(xbt_dynar_get_ptr(dynar,0)); - * xbt_dynar_free(&dynar); + * Actually this function puts a bunch of \0 in the memory area you passed as + * argument to separate the elements, and pushes the address of each chunk + * in the resulting dynar. Yes, that's uneven. Yes, that's gory. But that's efficient. */ xbt_dynar_t xbt_str_split_quoted_in_place(char *s) { xbt_dynar_t res = xbt_dynar_new(sizeof(char *), NULL); @@ -338,7 +348,7 @@ xbt_dynar_t xbt_str_split_quoted_in_place(char *s) { beg = s; - /* do not trim leading spaces: caller responsability to clean his cruft */ + /* do not trim leading spaces: caller responsibility to clean his cruft */ end = beg; while (!done) { @@ -350,7 +360,7 @@ xbt_dynar_t xbt_str_split_quoted_in_place(char *s) { /* Protected char; move it closer */ memmove(end, end + 1, strlen(end)); if (*end == '\0') - THROW0(arg_error, 0, "String ends with \\"); + THROWF(arg_error, 0, "String ends with \\"); end++; /* Pass the protected char */ break; @@ -380,7 +390,7 @@ xbt_dynar_t xbt_str_split_quoted_in_place(char *s) { case '\n': case '\0': if (*end == '\0' && (in_simple_quote || in_double_quote)) { - THROW2(arg_error, 0, + THROWF(arg_error, 0, "End of string found while searching for %c in %s", (in_simple_quote ? '\'' : '"'), s); } @@ -449,66 +459,6 @@ xbt_dynar_t xbt_str_split_quoted(const char *s) return res; } -#ifdef SIMGRID_TEST -#include "xbt/str.h" - -#define mytest(name, input, expected) \ - xbt_test_add0(name); \ - d=xbt_str_split_quoted(input); \ - s=xbt_str_join(d,"XXX"); \ - xbt_test_assert3(!strcmp(s,expected),\ - "Input (%s) leads to (%s) instead of (%s)", \ - input,s,expected);\ - free(s); \ - xbt_dynar_free(&d); - -XBT_TEST_SUITE("xbt_str", "String Handling"); -XBT_TEST_UNIT("xbt_str_split_quoted", test_split_quoted, "test the function xbt_str_split_quoted") -{ - xbt_dynar_t d; - char *s; - - mytest("Empty", "", ""); - mytest("Basic test", "toto tutu", "totoXXXtutu"); - mytest("Useless backslashes", "\\t\\o\\t\\o \\t\\u\\t\\u", - "totoXXXtutu"); - mytest("Protected space", "toto\\ tutu", "toto tutu"); - mytest("Several spaces", "toto tutu", "totoXXXtutu"); - mytest("LTriming", " toto tatu", "totoXXXtatu"); - mytest("Triming", " toto tutu ", "totoXXXtutu"); - mytest("Single quotes", "'toto tutu' tata", "toto tutuXXXtata"); - mytest("Double quotes", "\"toto tutu\" tata", "toto tutuXXXtata"); - mytest("Mixed quotes", "\"toto' 'tutu\" tata", "toto' 'tutuXXXtata"); - mytest("Backslashed quotes", "\\'toto tutu\\' tata", - "'totoXXXtutu'XXXtata"); - mytest("Backslashed quotes + quotes", "'toto \\'tutu' tata", - "toto 'tutuXXXtata"); - -} - -#define mytest_str(name, input, separator, expected) \ - xbt_test_add0(name); \ - d=xbt_str_split_str(input, separator); \ - s=xbt_str_join(d,"XXX"); \ - xbt_test_assert3(!strcmp(s,expected),\ - "Input (%s) leads to (%s) instead of (%s)", \ - input,s,expected);\ - free(s); \ - xbt_dynar_free(&d); - -XBT_TEST_UNIT("xbt_str_split_str", test_split_str, "test the function xbt_str_split_str") -{ - xbt_dynar_t d; - char *s; - - mytest_str("Empty string and separator", "", "", ""); - mytest_str("Empty string", "", "##", ""); - mytest_str("Empty separator", "toto", "", "toto"); - mytest_str("String with no separator in it", "toto", "##", "toto"); - mytest_str("Basic test", "toto##tutu", "##", "totoXXXtutu"); -} -#endif /* SIMGRID_TEST */ - /** @brief Join a set of strings as a single string */ char *xbt_str_join(xbt_dynar_t dyn, const char *sep) { @@ -541,7 +491,7 @@ char *xbt_str_join(xbt_dynar_t dyn, const char *sep) * The parameter must be a NULL-terminated array of chars, * just like xbt_dynar_to_array() produces */ -char *xbt_str_join_array(char*const* strs, const char *sep) +char *xbt_str_join_array(const char *const *strs, const char *sep) { char *res,*q; int amount_strings=0; @@ -570,190 +520,312 @@ char *xbt_str_join_array(char*const* strs, const char *sep) return res; } -#if defined(SIMGRID_NEED_GETLINE) || defined(DOXYGEN) -/** @brief Get a single line from the stream (reimplementation of the GNU getline) - * - * This is a redefinition of the GNU getline function, used on platforms where it does not exists. - * - * getline() reads an entire line from stream, storing the address of the buffer - * containing the text into *buf. The buffer is null-terminated and includes - * the newline character, if one was found. +/* + * Diff related functions * - * If *buf is NULL, then getline() will allocate a buffer for storing the line, - * which should be freed by the user program. Alternatively, before calling getline(), - * *buf can contain a pointer to a malloc()-allocated buffer *n bytes in size. If the buffer - * is not large enough to hold the line, getline() resizes it with realloc(), updating *buf and *n - * as necessary. In either case, on a successful call, *buf and *n will be updated to - * reflect the buffer address and allocated size respectively. + * Implementation of the algorithm described in "An O(NP) Sequence Comparison + * Algorithm", by Sun Wu, Udi Manber, Gene Myers, and Webb Miller (Information + * Processing Letters 35(6):317-323, 1990), with the linear-space + * divide-and-conquer strategy described in "An O(ND) Difference Algorithm and + * Its Variations", by Eugene W. Myers (Algorithmica 1:251-266, 1986). */ -long getline(char **buf, size_t * n, FILE * stream) -{ - size_t i; - int ch; +struct subsequence { + int x, y; /* starting coordinates */ + int len; /* length */ +}; - if (!*buf) { - *buf = xbt_malloc(512); - *n = 512; +static XBT_INLINE +void diff_snake(const char *vec_a[], int a0, int len_a, + const char *vec_b[], int b0, int len_b, + struct subsequence *seqs, int *fp, int k, int limit) +{ + int record_seq; + int x, y; + int fp_left = fp[k - 1] + 1; + int fp_right = fp[k + 1]; + if (fp_left > fp_right) { + x = fp_left; + record_seq = k - 1; + } else { + x = fp_right; + record_seq = k + 1; } + y = x - k; + if (x + y <= limit) { + seqs[k].x = x; + seqs[k].y = y; + record_seq = k; + } else { + seqs[k] = seqs[record_seq]; + } + while (x < len_a && y < len_b && !strcmp(vec_a[a0 + x], vec_b[b0 + y])) + ++x, ++y; + fp[k] = x; + if (record_seq == k) + seqs[k].len = x - seqs[k].x; +} - if (feof(stream)) - return (ssize_t) - 1; - - for (i = 0; (ch = fgetc(stream)) != EOF; i++) { - - if (i >= (*n) + 1) - *buf = xbt_realloc(*buf, *n += 512); - - (*buf)[i] = ch; +/* Returns the length of a shortest edit script, and a common + * subsequence from the middle. + */ +static int diff_middle_subsequence(const char *vec_a[], int a0, int len_a, + const char *vec_b[], int b0, int len_b, + struct subsequence *subseq, + struct subsequence *seqs, int *fp) +{ + const int delta = len_a - len_b; + const int limit = (len_a + len_b) / 2; + int kmin; + int kmax; + int k; + int p = -1; + + if (delta >= 0) { + kmin = 0; + kmax = delta; + } else { + kmin = delta; + kmax = 0; + } + for (k = kmin; k <= kmax; k++) + fp[k] = -1; + do { + p++; + fp[kmin - p - 1] = fp[kmax + p + 1] = -1; + for (k = kmax + p; k > delta; k--) + diff_snake(vec_a, a0, len_a, vec_b, b0, len_b, seqs, fp, k, limit); + for (k = kmin - p; k <= delta; k++) + diff_snake(vec_a, a0, len_a, vec_b, b0, len_b, seqs, fp, k, limit); + } while (fp[delta] != len_a); + + subseq->x = a0 + seqs[delta].x; + subseq->y = b0 + seqs[delta].y; + subseq->len = seqs[delta].len; + return abs(delta) + 2 * p;; +} - if ((*buf)[i] == '\n') { - i++; - (*buf)[i] = '\0'; - break; +/* Finds a longest common subsequence. + * Returns its length. + */ +static int diff_compute_lcs(const char *vec_a[], int a0, int len_a, + const char *vec_b[], int b0, int len_b, + xbt_dynar_t common_sequence, + struct subsequence *seqs, int *fp) +{ + if (len_a > 0 && len_b > 0) { + struct subsequence subseq; + int ses_len = diff_middle_subsequence(vec_a, a0, len_a, vec_b, b0, len_b, + &subseq, seqs, fp); + int lcs_len = (len_a + len_b - ses_len) / 2; + if (lcs_len == 0) { + return 0; + } else if (ses_len > 1) { + int lcs_len1 = subseq.len; + if (lcs_len1 < lcs_len) + lcs_len1 += diff_compute_lcs(vec_a, a0, subseq.x - a0, + vec_b, b0, subseq.y - b0, + common_sequence, seqs, fp); + if (subseq.len > 0) + xbt_dynar_push(common_sequence, &subseq); + if (lcs_len1 < lcs_len) { + int u = subseq.x + subseq.len; + int v = subseq.y + subseq.len; + diff_compute_lcs(vec_a, u, a0 + len_a - u, vec_b, v, b0 + len_b - v, + common_sequence, seqs, fp); + } + } else { + int len = MIN(len_a, len_b) - subseq.len; + if (subseq.x == a0 && subseq.y == b0) { + if (subseq.len > 0) + xbt_dynar_push(common_sequence, &subseq); + if (len > 0) { + struct subsequence subseq0 = {a0 + len_a - len, + b0 + len_b - len, len}; + xbt_dynar_push(common_sequence, &subseq0); + } + } else { + if (len > 0) { + struct subsequence subseq0 = {a0, b0, len}; + xbt_dynar_push(common_sequence, &subseq0); + } + if (subseq.len > 0) + xbt_dynar_push(common_sequence, &subseq); + } } + return lcs_len; + } else { + return 0; } - - if (i == *n) - *buf = xbt_realloc(*buf, *n += 1); - - (*buf)[i] = '\0'; - - return (ssize_t) i; } -#endif /* HAVE_GETLINE */ +static int diff_member(const char *s, const char *vec[], int from, int to) +{ + for ( ; from < to ; from++) + if (!strcmp(s, vec[from])) + return 1; + return 0; +} -/* - * Diff related functions +/* Extract common prefix. */ -static xbt_matrix_t diff_build_LCS(xbt_dynar_t da, xbt_dynar_t db) +static void diff_easy_prefix(const char *vec_a[], int *a0_p, int *len_a_p, + const char *vec_b[], int *b0_p, int *len_b_p, + xbt_dynar_t common_sequence) { - xbt_matrix_t C = - xbt_matrix_new(xbt_dynar_length(da), xbt_dynar_length(db), - sizeof(int), NULL); - unsigned long i, j; - - /* Compute the LCS */ - /* - C = array(0..m, 0..n) - for i := 0..m - C[i,0] = 0 - for j := 1..n - C[0,j] = 0 - for i := 1..m - for j := 1..n - if X[i] = Y[j] - C[i,j] := C[i-1,j-1] + 1 - else: - C[i,j] := max(C[i,j-1], C[i-1,j]) - return C[m,n] - */ - if (xbt_dynar_length(db) != 0) - for (i = 0; i < xbt_dynar_length(da); i++) - *((int *) xbt_matrix_get_ptr(C, i, 0)) = 0; - - if (xbt_dynar_length(da) != 0) - for (j = 0; j < xbt_dynar_length(db); j++) - *((int *) xbt_matrix_get_ptr(C, 0, j)) = 0; - - for (i = 1; i < xbt_dynar_length(da); i++) - for (j = 1; j < xbt_dynar_length(db); j++) { - - if (!strcmp - (xbt_dynar_get_as(da, i, char *), - xbt_dynar_get_as(db, j, char *))) - *((int *) xbt_matrix_get_ptr(C, i, j)) = - xbt_matrix_get_as(C, i - 1, j - 1, int) + 1; - else - *((int *) xbt_matrix_get_ptr(C, i, j)) = - max(xbt_matrix_get_as(C, i, j - 1, int), - xbt_matrix_get_as(C, i - 1, j, int)); + int a0 = *a0_p; + int b0 = *b0_p; + int len_a = *len_a_p; + int len_b = *len_b_p; + + while (len_a > 0 && len_b > 0) { + struct subsequence subseq = {a0, b0, 0}; + while (len_a > 0 && len_b > 0 && !strcmp(vec_a[a0], vec_b[b0])) { + a0++, len_a--; + b0++, len_b--; + subseq.len++; + } + if (subseq.len > 0) + xbt_dynar_push(common_sequence, &subseq); + if (len_a > 0 && len_b > 0 && + !diff_member(vec_a[a0], vec_b, b0 + 1, b0 + len_b)) { + a0++, len_a--; + } else { + break; } - return C; + } + + *a0_p = a0; + *b0_p = b0; + *len_a_p = len_a; + *len_b_p = len_b; } -static void diff_build_diff(xbt_dynar_t res, - xbt_matrix_t C, - xbt_dynar_t da, xbt_dynar_t db, int i, int j) +/* Extract common suffix. + */ +static void diff_easy_suffix(const char *vec_a[], int *a0_p, int *len_a_p, + const char *vec_b[], int *b0_p, int *len_b_p, + xbt_dynar_t common_suffix) { - char *topush; - /* Construct the diff - function printDiff(C[0..m,0..n], X[1..m], Y[1..n], i, j) - if i > 0 and j > 0 and X[i] = Y[j] - printDiff(C, X, Y, i-1, j-1) - print " " + X[i] - else - if j > 0 and (i = 0 or C[i,j-1] >= C[i-1,j]) - printDiff(C, X, Y, i, j-1) - print "+ " + Y[j] - else if i > 0 and (j = 0 or C[i,j-1] < C[i-1,j]) - printDiff(C, X, Y, i-1, j) - print "- " + X[i] - */ - - if (i >= 0 && j >= 0 && !strcmp(xbt_dynar_get_as(da, i, char *), - xbt_dynar_get_as(db, j, char *))) { - diff_build_diff(res, C, da, db, i - 1, j - 1); - topush = bprintf(" %s", xbt_dynar_get_as(da, i, char *)); - xbt_dynar_push(res, &topush); - } else if (j >= 0 && - (i <= 0 || j == 0 - || xbt_matrix_get_as(C, i, j - 1, - int) >= xbt_matrix_get_as(C, i - 1, j, - int))) { - diff_build_diff(res, C, da, db, i, j - 1); - topush = bprintf("+ %s", xbt_dynar_get_as(db, j, char *)); - xbt_dynar_push(res, &topush); - } else if (i >= 0 && - (j <= 0 - || xbt_matrix_get_as(C, i, j - 1, int) < xbt_matrix_get_as(C, - i - - - 1, - j, - int))) - { - diff_build_diff(res, C, da, db, i - 1, j); - topush = bprintf("- %s", xbt_dynar_get_as(da, i, char *)); - xbt_dynar_push(res, &topush); - } else if (i <= 0 && j <= 0) { - return; - } else { - THROW2(arg_error, 0, "Invalid values: i=%d, j=%d", i, j); + int a0 = *a0_p; + int b0 = *b0_p; + int len_a = *len_a_p; + int len_b = *len_b_p; + + while (len_a > 0 && len_b > 0){ + struct subsequence subseq; + subseq.len = 0; + while (len_a > 0 && len_b > 0 && + !strcmp(vec_a[a0 + len_a - 1], vec_b[b0 + len_b - 1])) { + len_a--; + len_b--; + subseq.len++; + } + if (subseq.len > 0) { + subseq.x = a0 + len_a; + subseq.y = b0 + len_b; + xbt_dynar_push(common_suffix, &subseq); + } + if (len_a > 0 && len_b > 0 && + !diff_member(vec_b[b0 + len_b - 1], vec_a, a0, a0 + len_a - 1)) { + len_b--; + } else { + break; + } } + *a0_p = a0; + *b0_p = b0; + *len_a_p = len_a; + *len_b_p = len_b; } /** @brief Compute the unified diff of two strings */ -char *xbt_str_diff(char *a, char *b) +char *xbt_str_diff(const char *a, const char *b) { xbt_dynar_t da = xbt_str_split(a, "\n"); xbt_dynar_t db = xbt_str_split(b, "\n"); + xbt_dynar_t common_sequence, common_suffix; + size_t len; + const char **vec_a, **vec_b; + int a0, b0; + int len_a, len_b; + int max; + int *fp_base, *fp; + struct subsequence *seqs_base, *seqs; + struct subsequence subseq; + xbt_dynar_t diff; + char *res; + int x, y; + unsigned s; + + /* Clean empty lines at the end of da and db */ + len = strlen(a); + if (len > 0 && a[len - 1] == '\n') + xbt_dynar_pop(da, NULL); + len = strlen(b); + if (len > 0 && b[len - 1] == '\n') + xbt_dynar_pop(db, NULL); + + /* Various initializations */ + /* Assume that dynar's content is contiguous */ + a0 = 0; + len_a = xbt_dynar_length(da); + vec_a = len_a ? xbt_dynar_get_ptr(da, 0) : NULL; + b0 = 0; + len_b = xbt_dynar_length(db); + vec_b = len_b ? xbt_dynar_get_ptr(db, 0) : NULL; + max = MAX(len_a, len_b) + 1; + fp_base = xbt_new(int, 2 * max + 1); + fp = fp_base + max; /* indexes in [-max..max] */ + seqs_base = xbt_new(struct subsequence, 2 * max + 1); + seqs = seqs_base + max; /* indexes in [-max..max] */ + common_sequence = xbt_dynar_new(sizeof(struct subsequence), NULL); + common_suffix = xbt_dynar_new(sizeof(struct subsequence), NULL); + + /* Add a sentinel a the end of the sequence */ + subseq.x = len_a; + subseq.y = len_b; + subseq.len = 0; + xbt_dynar_push(common_suffix, &subseq); + + /* Compute the Longest Common Subsequence */ + diff_easy_prefix(vec_a, &a0, &len_a, vec_b, &b0, &len_b, common_sequence); + diff_easy_suffix(vec_a, &a0, &len_a, vec_b, &b0, &len_b, common_suffix); + diff_compute_lcs(vec_a, a0, len_a, vec_b, b0, len_b, common_sequence, seqs, fp); + while (!xbt_dynar_is_empty(common_suffix)) { + xbt_dynar_pop(common_suffix, &subseq); + xbt_dynar_push(common_sequence, &subseq); + } - xbt_matrix_t C = diff_build_LCS(da, db); - xbt_dynar_t diff = xbt_dynar_new(sizeof(char *), &xbt_free_ref); - char *res = NULL; - - diff_build_diff(diff, C, da, db, xbt_dynar_length(da) - 1, - xbt_dynar_length(db) - 1); - /* Clean empty lines at the end */ - while (xbt_dynar_length(diff) > 0) { - char *str; - xbt_dynar_pop(diff, &str); - if (str[0] == '\0' || !strcmp(str, " ")) { - free(str); - } else { - xbt_dynar_push(diff, &str); - break; + /* Build a Shortest Edit Script, and the final result */ + diff = xbt_dynar_new(sizeof(char *), &xbt_free_ref); + x = 0; + y = 0; + xbt_dynar_foreach(common_sequence, s, subseq) { + while (x < subseq.x) { + char *topush = bprintf("- %s", vec_a[x++]); + xbt_dynar_push_as(diff, char*, topush); + } + while (y < subseq.y) { + char *topush = bprintf("+ %s", vec_b[y++]); + xbt_dynar_push_as(diff, char*, topush); + } + while (x < subseq.x + subseq.len) { + char *topush = bprintf(" %s", vec_a[x++]); + xbt_dynar_push_as(diff, char*, topush); + y++; } } res = xbt_str_join(diff, "\n"); - xbt_dynar_free(&da); + xbt_free(fp_base); + xbt_free(seqs_base); xbt_dynar_free(&db); + xbt_dynar_free(&da); + xbt_dynar_free(&common_sequence); + xbt_dynar_free(&common_suffix); xbt_dynar_free(&diff); - xbt_matrix_free(C); return res; } @@ -779,3 +851,215 @@ char *xbt_str_from_file(FILE * file) xbt_strbuff_free_container(buff); return res; } + +/* @brief Retrun 1 if string 'str' starts with string 'start' + * + * \param str a string + * \param start the string to search in str + * + * \return 1 if 'str' starts with 'start' + */ +int xbt_str_start_with(const char* str, const char* start) +{ + int i; + size_t l_str = strlen(str); + size_t l_start = strlen(start); + + if(l_start > l_str) return 0; + + for(i = 0; i< l_start; i++){ + if(str[i] != start[i]) return 0; + } + + return 1; +} + +#ifdef SIMGRID_TEST +#include "xbt/str.h" + +#define mytest(name, input, expected) \ + xbt_test_add(name); \ + d=xbt_str_split_quoted(input); \ + s=xbt_str_join(d,"XXX"); \ + xbt_test_assert(!strcmp(s,expected),\ + "Input (%s) leads to (%s) instead of (%s)", \ + input,s,expected);\ + free(s); \ + xbt_dynar_free(&d); + +XBT_TEST_SUITE("xbt_str", "String Handling"); +XBT_TEST_UNIT("xbt_str_split_quoted", test_split_quoted, "test the function xbt_str_split_quoted") +{ + xbt_dynar_t d; + char *s; + + mytest("Empty", "", ""); + mytest("Basic test", "toto tutu", "totoXXXtutu"); + mytest("Useless backslashes", "\\t\\o\\t\\o \\t\\u\\t\\u", + "totoXXXtutu"); + mytest("Protected space", "toto\\ tutu", "toto tutu"); + mytest("Several spaces", "toto tutu", "totoXXXtutu"); + mytest("LTriming", " toto tatu", "totoXXXtatu"); + mytest("Triming", " toto tutu ", "totoXXXtutu"); + mytest("Single quotes", "'toto tutu' tata", "toto tutuXXXtata"); + mytest("Double quotes", "\"toto tutu\" tata", "toto tutuXXXtata"); + mytest("Mixed quotes", "\"toto' 'tutu\" tata", "toto' 'tutuXXXtata"); + mytest("Backslashed quotes", "\\'toto tutu\\' tata", + "'totoXXXtutu'XXXtata"); + mytest("Backslashed quotes + quotes", "'toto \\'tutu' tata", + "toto 'tutuXXXtata"); + +} + +#define mytest_str(name, input, separator, expected) \ + xbt_test_add(name); \ + d=xbt_str_split_str(input, separator); \ + s=xbt_str_join(d,"XXX"); \ + xbt_test_assert(!strcmp(s,expected),\ + "Input (%s) leads to (%s) instead of (%s)", \ + input,s,expected);\ + free(s); \ + xbt_dynar_free(&d); + +XBT_TEST_UNIT("xbt_str_split_str", test_split_str, "test the function xbt_str_split_str") +{ + xbt_dynar_t d; + char *s; + + mytest_str("Empty string and separator", "", "", ""); + mytest_str("Empty string", "", "##", ""); + mytest_str("Empty separator", "toto", "", "toto"); + mytest_str("String with no separator in it", "toto", "##", "toto"); + mytest_str("Basic test", "toto##tutu", "##", "totoXXXtutu"); +} + +/* Last args are format string and parameters for xbt_test_add */ +#define mytest_diff(s1, s2, diff, ...) \ + do { \ + char *mytest_diff_res; \ + xbt_test_add(__VA_ARGS__); \ + mytest_diff_res = xbt_str_diff(s1, s2); \ + xbt_test_assert(!strcmp(mytest_diff_res, diff), \ + "Wrong output:\n--- got:\n%s\n--- expected:\n%s\n---", \ + mytest_diff_res, diff); \ + free(mytest_diff_res); \ + } while (0) + +XBT_TEST_UNIT("xbt_str_diff", test_diff, "test the function xbt_str_diff") +{ + unsigned i; + + /* Trivial cases */ + mytest_diff("a", "a", " a", "1 word, no difference"); + mytest_diff("a", "A", "- a\n+ A", "1 word, different"); + mytest_diff("a\n", "a\n", " a", "1 line, no difference"); + mytest_diff("a\n", "A\n", "- a\n+ A", "1 line, different"); + + /* Empty strings */ + mytest_diff("", "", "", "empty strings"); + mytest_diff("", "a", "+ a", "1 word, added"); + mytest_diff("a", "", "- a", "1 word, removed"); + mytest_diff("", "a\n", "+ a", "1 line, added"); + mytest_diff("a\n", "", "- a", "1 line, removed"); + mytest_diff("", "a\nb\nc\n", "+ a\n+ b\n+ c", "4 lines, all added"); + mytest_diff("a\nb\nc\n", "", "- a\n- b\n- c", "4 lines, all removed"); + + /* Empty lines */ + mytest_diff("\n", "\n", " ", "empty lines"); + mytest_diff("", "\n", "+ ", "empty line, added"); + mytest_diff("\n", "", "- ", "empty line, removed"); + + mytest_diff("a", "\na", "+ \n a", "empty line added before word"); + mytest_diff("a", "a\n\n", " a\n+ ", "empty line added after word"); + mytest_diff("\na", "a", "- \n a", "empty line removed before word"); + mytest_diff("a\n\n", "a", " a\n- ", "empty line removed after word"); + + mytest_diff("a\n", "\na\n", "+ \n a", "empty line added before line"); + mytest_diff("a\n", "a\n\n", " a\n+ ", "empty line added after line"); + mytest_diff("\na\n", "a\n", "- \n a", "empty line removed before line"); + mytest_diff("a\n\n", "a\n", " a\n- ", "empty line removed after line"); + + mytest_diff("a\nb\nc\nd\n", "\na\nb\nc\nd\n", "+ \n a\n b\n c\n d", + "empty line added before 4 lines"); + mytest_diff("a\nb\nc\nd\n", "a\nb\nc\nd\n\n", " a\n b\n c\n d\n+ ", + "empty line added after 4 lines"); + mytest_diff("\na\nb\nc\nd\n", "a\nb\nc\nd\n", "- \n a\n b\n c\n d", + "empty line removed before 4 lines"); + mytest_diff("a\nb\nc\nd\n\n", "a\nb\nc\nd\n", " a\n b\n c\n d\n- ", + "empty line removed after 4 lines"); + + /* Missing newline at the end of one of the strings */ + mytest_diff("a\n", "a", " a", "1 line, 1 word, no difference"); + mytest_diff("a", "a\n", " a", "1 word, 1 line, no difference"); + mytest_diff("a\n", "A", "- a\n+ A", "1 line, 1 word, different"); + mytest_diff("a", "A\n", "- a\n+ A", "1 word, 1 line, different"); + + mytest_diff("a\nb\nc\nd", "a\nb\nc\nd\n", " a\n b\n c\n d", + "4 lines, no newline on first"); + mytest_diff("a\nb\nc\nd\n", "a\nb\nc\nd", " a\n b\n c\n d", + "4 lines, no newline on second"); + + /* Four lines, all combinations of differences */ + for (i = 0 ; i < (1U << 4) ; i++) { + char d2[4 + 1]; + char s2[4 * 2 + 1]; + char res[4 * 8 + 1]; + char *pd = d2; + char *ps = s2; + char *pr = res; + unsigned j = 0; + while (j < 4) { + unsigned k; + for (/* j */ ; j < 4 && !(i & (1U << j)) ; j++) { + *pd++ = "abcd"[j]; + ps += sprintf(ps, "%c\n", "abcd"[j]); + pr += sprintf(pr, " %c\n", "abcd"[j]); + } + for (k = j ; k < 4 && (i & (1U << k)) ; k++) { + *pd++ = "ABCD"[k]; + ps += sprintf(ps, "%c\n", "ABCD"[k]); + pr += sprintf(pr, "- %c\n", "abcd"[k]); + } + for (/* j */ ; j < k ; j++) { + pr += sprintf(pr, "+ %c\n", "ABCD"[j]); + } + } + *pd = '\0'; + *--pr = '\0'; /* strip last '\n' from expected result */ + mytest_diff("a\nb\nc\nd\n", s2, res, + "compare (abcd) with changed (%s)", d2); + } + + /* Subsets of four lines, do not test for empty subset */ + for (i = 1 ; i < (1U << 4) ; i++) { + char d2[4 + 1]; + char s2[4 * 2 + 1]; + char res[4 * 8 + 1]; + char *pd = d2; + char *ps = s2; + char *pr = res; + unsigned j = 0; + while (j < 4) { + for (/* j */ ; j < 4 && (i & (1U << j)) ; j++) { + *pd++ = "abcd"[j]; + ps += sprintf(ps, "%c\n", "abcd"[j]); + pr += sprintf(pr, " %c\n", "abcd"[j]); + } + for (/* j */; j < 4 && !(i & (1U << j)) ; j++) { + pr += sprintf(pr, "- %c\n", "abcd"[j]); + } + } + *pd = '\0'; + *--pr = '\0'; /* strip last '\n' from expected result */ + mytest_diff("a\nb\nc\nd\n", s2, res, + "compare (abcd) with subset (%s)", d2); + + for (pr = res ; *pr != '\0' ; pr++) + if (*pr == '-') + *pr = '+'; + mytest_diff(s2, "a\nb\nc\nd\n", res, + "compare subset (%s) with (abcd)", d2); + } +} + +#endif /* SIMGRID_TEST */