From: Martin Quinson Date: Fri, 26 Feb 2016 00:16:41 +0000 (+0100) Subject: tesh is not in C anymore, there is no need to compute string diffs X-Git-Tag: v3_13~673 X-Git-Url: http://info.iut-bm.univ-fcomte.fr/pub/gitweb/simgrid.git/commitdiff_plain/c4c2d9748e2ebb46cf99738bbade29ca62368e78 tesh is not in C anymore, there is no need to compute string diffs --- diff --git a/include/xbt/str.h b/include/xbt/str.h index 8ba1010a0e..813e3ebc55 100644 --- a/include/xbt/str.h +++ b/include/xbt/str.h @@ -50,8 +50,6 @@ XBT_PUBLIC(void) xbt_str_subst(char *str, char from, char to, int amount); XBT_PUBLIC(char *) xbt_str_varsubst(const char *str, xbt_dict_t patterns); /* */ -XBT_PUBLIC(char *) xbt_str_diff(const char *a, const char *b); - XBT_PUBLIC(char *) xbt_str_from_file(FILE * file); XBT_PUBLIC(long int) xbt_str_parse_int(const char* str, const char* error_msg); diff --git a/src/xbt/xbt_str.c b/src/xbt/xbt_str.c index fb1a0af27c..14f928fe27 100644 --- a/src/xbt/xbt_str.c +++ b/src/xbt/xbt_str.c @@ -476,317 +476,6 @@ char *xbt_str_join_array(const char *const *strs, const char *sep) return res; } -/* - * Diff related functions - * - * Implementation of the algorithm described in "An O(NP) Sequence Comparison - * Algorithm", by Sun Wu, Udi Manber, Gene Myers, and Webb Miller (Information - * Processing Letters 35(6):317-323, 1990), with the linear-space - * divide-and-conquer strategy described in "An O(ND) Difference Algorithm and - * Its Variations", by Eugene W. Myers (Algorithmica 1:251-266, 1986). - */ - -struct subsequence { - int x, y; /* starting coordinates */ - int len; /* length */ -}; - -static XBT_INLINE -void diff_snake(const char *vec_a[], int a0, int len_a, - const char *vec_b[], int b0, int len_b, - struct subsequence *seqs, int *fp, int k, int limit) -{ - int record_seq; - int x, y; - int fp_left = fp[k - 1] + 1; - int fp_right = fp[k + 1]; - if (fp_left > fp_right) { - x = fp_left; - record_seq = k - 1; - } else { - x = fp_right; - record_seq = k + 1; - } - y = x - k; - if (x + y <= limit) { - seqs[k].x = x; - seqs[k].y = y; - record_seq = k; - } else { - seqs[k] = seqs[record_seq]; - } - while (x < len_a && y < len_b && !strcmp(vec_a[a0 + x], vec_b[b0 + y])) - ++x, ++y; - fp[k] = x; - if (record_seq == k) - seqs[k].len = x - seqs[k].x; -} - -/* Returns the length of a shortest edit script, and a common - * subsequence from the middle. - */ -static int diff_middle_subsequence(const char *vec_a[], int a0, int len_a, - const char *vec_b[], int b0, int len_b, - struct subsequence *subseq, - struct subsequence *seqs, int *fp) -{ - const int delta = len_a - len_b; - const int limit = (len_a + len_b) / 2; - int kmin; - int kmax; - int k; - int p = -1; - - if (delta >= 0) { - kmin = 0; - kmax = delta; - } else { - kmin = delta; - kmax = 0; - } - for (k = kmin; k <= kmax; k++) - fp[k] = -1; - do { - p++; - fp[kmin - p - 1] = fp[kmax + p + 1] = -1; - for (k = kmax + p; k > delta; k--) - diff_snake(vec_a, a0, len_a, vec_b, b0, len_b, seqs, fp, k, limit); - for (k = kmin - p; k <= delta; k++) - diff_snake(vec_a, a0, len_a, vec_b, b0, len_b, seqs, fp, k, limit); - } while (fp[delta] != len_a); - - subseq->x = a0 + seqs[delta].x; - subseq->y = b0 + seqs[delta].y; - subseq->len = seqs[delta].len; - return abs(delta) + 2 * p;; -} - -/* Finds a longest common subsequence. - * Returns its length. - */ -static int diff_compute_lcs(const char *vec_a[], int a0, int len_a, - const char *vec_b[], int b0, int len_b, - xbt_dynar_t common_sequence, - struct subsequence *seqs, int *fp) -{ - if (len_a > 0 && len_b > 0) { - struct subsequence subseq; - int ses_len = diff_middle_subsequence(vec_a, a0, len_a, vec_b, b0, len_b, - &subseq, seqs, fp); - int lcs_len = (len_a + len_b - ses_len) / 2; - if (lcs_len == 0) { - return 0; - } else if (ses_len > 1) { - int lcs_len1 = subseq.len; - if (lcs_len1 < lcs_len) - lcs_len1 += diff_compute_lcs(vec_a, a0, subseq.x - a0, - vec_b, b0, subseq.y - b0, - common_sequence, seqs, fp); - if (subseq.len > 0) - xbt_dynar_push(common_sequence, &subseq); - if (lcs_len1 < lcs_len) { - int u = subseq.x + subseq.len; - int v = subseq.y + subseq.len; - diff_compute_lcs(vec_a, u, a0 + len_a - u, vec_b, v, b0 + len_b - v, - common_sequence, seqs, fp); - } - } else { - int len = MIN(len_a, len_b) - subseq.len; - if (subseq.x == a0 && subseq.y == b0) { - if (subseq.len > 0) - xbt_dynar_push(common_sequence, &subseq); - if (len > 0) { - struct subsequence subseq0 = {a0 + len_a - len, - b0 + len_b - len, len}; - xbt_dynar_push(common_sequence, &subseq0); - } - } else { - if (len > 0) { - struct subsequence subseq0 = {a0, b0, len}; - xbt_dynar_push(common_sequence, &subseq0); - } - if (subseq.len > 0) - xbt_dynar_push(common_sequence, &subseq); - } - } - return lcs_len; - } else { - return 0; - } -} - -static int diff_member(const char *s, const char *vec[], int from, int to) -{ - for ( ; from < to ; from++) - if (!strcmp(s, vec[from])) - return 1; - return 0; -} - -/* Extract common prefix. - */ -static void diff_easy_prefix(const char *vec_a[], int *a0_p, int *len_a_p, - const char *vec_b[], int *b0_p, int *len_b_p, - xbt_dynar_t common_sequence) -{ - int a0 = *a0_p; - int b0 = *b0_p; - int len_a = *len_a_p; - int len_b = *len_b_p; - - while (len_a > 0 && len_b > 0) { - struct subsequence subseq = {a0, b0, 0}; - while (len_a > 0 && len_b > 0 && !strcmp(vec_a[a0], vec_b[b0])) { - a0++, len_a--; - b0++, len_b--; - subseq.len++; - } - if (subseq.len > 0) - xbt_dynar_push(common_sequence, &subseq); - if (len_a > 0 && len_b > 0 && - !diff_member(vec_a[a0], vec_b, b0 + 1, b0 + len_b)) { - a0++, len_a--; - } else { - break; - } - } - - *a0_p = a0; - *b0_p = b0; - *len_a_p = len_a; - *len_b_p = len_b; -} - -/* Extract common suffix. - */ -static void diff_easy_suffix(const char *vec_a[], int *a0_p, int *len_a_p, - const char *vec_b[], int *b0_p, int *len_b_p, - xbt_dynar_t common_suffix) -{ - int a0 = *a0_p; - int b0 = *b0_p; - int len_a = *len_a_p; - int len_b = *len_b_p; - - while (len_a > 0 && len_b > 0){ - struct subsequence subseq; - subseq.len = 0; - while (len_a > 0 && len_b > 0 && - !strcmp(vec_a[a0 + len_a - 1], vec_b[b0 + len_b - 1])) { - len_a--; - len_b--; - subseq.len++; - } - if (subseq.len > 0) { - subseq.x = a0 + len_a; - subseq.y = b0 + len_b; - xbt_dynar_push(common_suffix, &subseq); - } - if (len_a > 0 && len_b > 0 && - !diff_member(vec_b[b0 + len_b - 1], vec_a, a0, a0 + len_a - 1)) { - len_b--; - } else { - break; - } - } - - *a0_p = a0; - *b0_p = b0; - *len_a_p = len_a; - *len_b_p = len_b; -} - -/** @brief Compute the unified diff of two strings */ -char *xbt_str_diff(const char *a, const char *b) -{ - xbt_dynar_t da = xbt_str_split(a, "\n"); - xbt_dynar_t db = xbt_str_split(b, "\n"); - xbt_dynar_t common_sequence, common_suffix; - size_t len; - const char **vec_a, **vec_b; - int a0, b0; - int len_a, len_b; - int max; - int *fp_base, *fp; - struct subsequence *seqs_base, *seqs; - struct subsequence subseq; - xbt_dynar_t diff; - char *res; - int x, y; - unsigned s; - - /* Clean empty lines at the end of da and db */ - len = strlen(a); - if (len > 0 && a[len - 1] == '\n') - xbt_dynar_pop(da, NULL); - len = strlen(b); - if (len > 0 && b[len - 1] == '\n') - xbt_dynar_pop(db, NULL); - - /* Various initializations */ - /* Assume that dynar's content is contiguous */ - a0 = 0; - len_a = xbt_dynar_length(da); - vec_a = len_a ? xbt_dynar_get_ptr(da, 0) : NULL; - b0 = 0; - len_b = xbt_dynar_length(db); - vec_b = len_b ? xbt_dynar_get_ptr(db, 0) : NULL; - max = MAX(len_a, len_b) + 1; - fp_base = xbt_new(int, 2 * max + 1); - fp = fp_base + max; /* indexes in [-max..max] */ - seqs_base = xbt_new(struct subsequence, 2 * max + 1); - seqs = seqs_base + max; /* indexes in [-max..max] */ - common_sequence = xbt_dynar_new(sizeof(struct subsequence), NULL); - common_suffix = xbt_dynar_new(sizeof(struct subsequence), NULL); - - /* Add a sentinel a the end of the sequence */ - subseq.x = len_a; - subseq.y = len_b; - subseq.len = 0; - xbt_dynar_push(common_suffix, &subseq); - - /* Compute the Longest Common Subsequence */ - diff_easy_prefix(vec_a, &a0, &len_a, vec_b, &b0, &len_b, common_sequence); - diff_easy_suffix(vec_a, &a0, &len_a, vec_b, &b0, &len_b, common_suffix); - diff_compute_lcs(vec_a, a0, len_a, vec_b, b0, len_b, common_sequence, seqs, fp); - while (!xbt_dynar_is_empty(common_suffix)) { - xbt_dynar_pop(common_suffix, &subseq); - xbt_dynar_push(common_sequence, &subseq); - } - - /* Build a Shortest Edit Script, and the final result */ - diff = xbt_dynar_new(sizeof(char *), &xbt_free_ref); - x = 0; - y = 0; - xbt_dynar_foreach(common_sequence, s, subseq) { - while (x < subseq.x) { - char *topush = bprintf("- %s", vec_a[x++]); - xbt_dynar_push_as(diff, char*, topush); - } - while (y < subseq.y) { - char *topush = bprintf("+ %s", vec_b[y++]); - xbt_dynar_push_as(diff, char*, topush); - } - while (x < subseq.x + subseq.len) { - char *topush = bprintf(" %s", vec_a[x++]); - xbt_dynar_push_as(diff, char*, topush); - y++; - } - } - res = xbt_str_join(diff, "\n"); - - xbt_free(fp_base); - xbt_free(seqs_base); - xbt_dynar_free(&db); - xbt_dynar_free(&da); - xbt_dynar_free(&common_sequence); - xbt_dynar_free(&common_suffix); - xbt_dynar_free(&diff); - - return res; -} - - /** @brief creates a new string containing what can be read on a fd * */ @@ -902,135 +591,6 @@ XBT_TEST_UNIT("xbt_str_split_str", test_split_str, "test the function xbt_str_sp mytest_str("Basic test", "toto##tutu", "##", "totoXXXtutu"); } -/* Last args are format string and parameters for xbt_test_add */ -#define mytest_diff(s1, s2, diff, ...) \ - do { \ - char *mytest_diff_res; \ - xbt_test_add(__VA_ARGS__); \ - mytest_diff_res = xbt_str_diff(s1, s2); \ - xbt_test_assert(!strcmp(mytest_diff_res, diff), \ - "Wrong output:\n--- got:\n%s\n--- expected:\n%s\n---", \ - mytest_diff_res, diff); \ - free(mytest_diff_res); \ - } while (0) - -XBT_TEST_UNIT("xbt_str_diff", test_diff, "test the function xbt_str_diff") -{ - unsigned i; - - /* Trivial cases */ - mytest_diff("a", "a", " a", "1 word, no difference"); - mytest_diff("a", "A", "- a\n+ A", "1 word, different"); - mytest_diff("a\n", "a\n", " a", "1 line, no difference"); - mytest_diff("a\n", "A\n", "- a\n+ A", "1 line, different"); - - /* Empty strings */ - mytest_diff("", "", "", "empty strings"); - mytest_diff("", "a", "+ a", "1 word, added"); - mytest_diff("a", "", "- a", "1 word, removed"); - mytest_diff("", "a\n", "+ a", "1 line, added"); - mytest_diff("a\n", "", "- a", "1 line, removed"); - mytest_diff("", "a\nb\nc\n", "+ a\n+ b\n+ c", "4 lines, all added"); - mytest_diff("a\nb\nc\n", "", "- a\n- b\n- c", "4 lines, all removed"); - - /* Empty lines */ - mytest_diff("\n", "\n", " ", "empty lines"); - mytest_diff("", "\n", "+ ", "empty line, added"); - mytest_diff("\n", "", "- ", "empty line, removed"); - - mytest_diff("a", "\na", "+ \n a", "empty line added before word"); - mytest_diff("a", "a\n\n", " a\n+ ", "empty line added after word"); - mytest_diff("\na", "a", "- \n a", "empty line removed before word"); - mytest_diff("a\n\n", "a", " a\n- ", "empty line removed after word"); - - mytest_diff("a\n", "\na\n", "+ \n a", "empty line added before line"); - mytest_diff("a\n", "a\n\n", " a\n+ ", "empty line added after line"); - mytest_diff("\na\n", "a\n", "- \n a", "empty line removed before line"); - mytest_diff("a\n\n", "a\n", " a\n- ", "empty line removed after line"); - - mytest_diff("a\nb\nc\nd\n", "\na\nb\nc\nd\n", "+ \n a\n b\n c\n d", - "empty line added before 4 lines"); - mytest_diff("a\nb\nc\nd\n", "a\nb\nc\nd\n\n", " a\n b\n c\n d\n+ ", - "empty line added after 4 lines"); - mytest_diff("\na\nb\nc\nd\n", "a\nb\nc\nd\n", "- \n a\n b\n c\n d", - "empty line removed before 4 lines"); - mytest_diff("a\nb\nc\nd\n\n", "a\nb\nc\nd\n", " a\n b\n c\n d\n- ", - "empty line removed after 4 lines"); - - /* Missing newline at the end of one of the strings */ - mytest_diff("a\n", "a", " a", "1 line, 1 word, no difference"); - mytest_diff("a", "a\n", " a", "1 word, 1 line, no difference"); - mytest_diff("a\n", "A", "- a\n+ A", "1 line, 1 word, different"); - mytest_diff("a", "A\n", "- a\n+ A", "1 word, 1 line, different"); - - mytest_diff("a\nb\nc\nd", "a\nb\nc\nd\n", " a\n b\n c\n d", - "4 lines, no newline on first"); - mytest_diff("a\nb\nc\nd\n", "a\nb\nc\nd", " a\n b\n c\n d", - "4 lines, no newline on second"); - - /* Four lines, all combinations of differences */ - for (i = 0 ; i < (1U << 4) ; i++) { - char d2[4 + 1]; - char s2[4 * 2 + 1]; - char res[4 * 8 + 1]; - char *pd = d2; - char *ps = s2; - char *pr = res; - unsigned j = 0; - while (j < 4) { - unsigned k; - for (/* j */ ; j < 4 && !(i & (1U << j)) ; j++) { - *pd++ = "abcd"[j]; - ps += sprintf(ps, "%c\n", "abcd"[j]); - pr += sprintf(pr, " %c\n", "abcd"[j]); - } - for (k = j ; k < 4 && (i & (1U << k)) ; k++) { - *pd++ = "ABCD"[k]; - ps += sprintf(ps, "%c\n", "ABCD"[k]); - pr += sprintf(pr, "- %c\n", "abcd"[k]); - } - for (/* j */ ; j < k ; j++) { - pr += sprintf(pr, "+ %c\n", "ABCD"[j]); - } - } - *pd = '\0'; - *--pr = '\0'; /* strip last '\n' from expected result */ - mytest_diff("a\nb\nc\nd\n", s2, res, - "compare (abcd) with changed (%s)", d2); - } - - /* Subsets of four lines, do not test for empty subset */ - for (i = 1 ; i < (1U << 4) ; i++) { - char d2[4 + 1]; - char s2[4 * 2 + 1]; - char res[4 * 8 + 1]; - char *pd = d2; - char *ps = s2; - char *pr = res; - unsigned j = 0; - while (j < 4) { - for (/* j */ ; j < 4 && (i & (1U << j)) ; j++) { - *pd++ = "abcd"[j]; - ps += sprintf(ps, "%c\n", "abcd"[j]); - pr += sprintf(pr, " %c\n", "abcd"[j]); - } - for (/* j */; j < 4 && !(i & (1U << j)) ; j++) { - pr += sprintf(pr, "- %c\n", "abcd"[j]); - } - } - *pd = '\0'; - *--pr = '\0'; /* strip last '\n' from expected result */ - mytest_diff("a\nb\nc\nd\n", s2, res, - "compare (abcd) with subset (%s)", d2); - - for (pr = res ; *pr != '\0' ; pr++) - if (*pr == '-') - *pr = '+'; - mytest_diff(s2, "a\nb\nc\nd\n", res, - "compare subset (%s) with (abcd)", d2); - } -} - #define test_parse_error(function, name, variable, str) \ do { \ xbt_test_add(name); \