3 /* DataDesc/ddt_parse.c -- automatic parsing of data structures */
5 /* Copyright (c) 2004 Arnaud Legrand, Martin Quinson. All rights reserved. */
7 /* This program is free software; you can redistribute it and/or modify it
8 * under the terms of the license (GNU LGPL) which comes with this package. */
10 #include <ctype.h> /* isdigit */
12 #include "gras/DataDesc/datadesc_private.h"
13 #include "gras/DataDesc/ddt_parse.yy.h"
15 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(ddt_parse,datadesc,
16 "Parsing C data structures to build GRAS data description");
18 typedef struct s_type_modifier{
28 } s_type_modifier_t,*type_modifier_t;
30 typedef struct s_field {
31 gras_datadesc_type_t type;
37 extern char *gras_ddt_parse_text; /* text being considered in the parser */
40 static void parse_type_modifier(type_modifier_t type_modifier) {
43 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
44 /* This only used when parsing 'short *' since this function returns when int, float, double,... is encountered */
45 DEBUG0("This is a reference");
46 type_modifier->is_ref++;
48 } else if (!strcmp(gras_ddt_parse_text,"unsigned")) {
49 DEBUG0("This is an unsigned");
50 type_modifier->is_unsigned = 1;
52 } else if (!strcmp(gras_ddt_parse_text,"short")) {
53 DEBUG0("This is short");
54 type_modifier->is_short = 1;
56 } else if (!strcmp(gras_ddt_parse_text,"long")) {
57 DEBUG0("This is long");
58 type_modifier->is_long++; /* handle "long long" */
60 } else if (!strcmp(gras_ddt_parse_text,"struct")) {
61 DEBUG0("This is a struct");
62 type_modifier->is_struct = 1;
64 } else if (!strcmp(gras_ddt_parse_text,"union")) {
65 DEBUG0("This is an union");
66 type_modifier->is_union = 1;
68 } else if (!strcmp(gras_ddt_parse_text,"enum")) {
69 DEBUG0("This is an enum");
70 type_modifier->is_enum = 1;
72 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_EMPTY) {
76 DEBUG1("Done with modifiers (got %s)",gras_ddt_parse_text);
80 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
81 if((gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD) &&
82 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_STAR)) {
83 DEBUG2("Done with modifiers (got %s,%d)",gras_ddt_parse_text,gras_ddt_parse_tok_num);
90 static void print_type_modifier(s_type_modifier_t tm) {
94 if (tm.is_unsigned) printf("(unsigned) ");
95 if (tm.is_short) printf("(short) ");
96 for (i=0 ; i<tm.is_long ; i++) printf("(long) ");
98 if(tm.is_struct) printf("(struct) ");
99 if(tm.is_enum) printf("(enum) ");
100 if(tm.is_union) printf("(union) ");
102 for (i=0 ; i<tm.is_ref ; i++) printf("(ref) ");
106 static void change_to_fixed_array(xbt_dynar_t dynar, long int size) {
107 s_identifier_t former,array;
108 memset(&array,0,sizeof(array));
111 xbt_dynar_pop(dynar,&former);
112 array.type_name=(char*)xbt_malloc(strlen(former.type->name)+20);
113 DEBUG2("Array specification (size=%ld, elm='%s'), change pushed type",
114 size,former.type_name);
115 sprintf(array.type_name,"%s[%ld]",former.type_name,size);
116 xbt_free(former.type_name);
118 array.type = gras_datadesc_array_fixed(array.type_name, former.type, size); /* redeclaration are ignored */
119 array.name = former.name;
121 xbt_dynar_push(dynar,&array);
124 static void change_to_ref(xbt_dynar_t dynar) {
125 s_identifier_t former,ref;
126 memset(&ref,0,sizeof(ref));
129 xbt_dynar_pop(dynar,&former);
130 ref.type_name=(char*)xbt_malloc(strlen(former.type->name)+2);
131 DEBUG1("Ref specification (elm='%s'), change pushed type", former.type_name);
132 sprintf(ref.type_name,"%s*",former.type_name);
133 xbt_free(former.type_name);
135 ref.type = gras_datadesc_ref(ref.type_name, former.type); /* redeclaration are ignored */
136 ref.name = former.name;
138 xbt_dynar_push(dynar,&ref);
142 static void change_to_ref_pop_array(xbt_dynar_t dynar) {
143 s_identifier_t former,ref;
144 memset(&ref,0,sizeof(ref));
147 xbt_dynar_pop(dynar,&former);
148 ref.type = gras_datadesc_ref_pop_arr(former.type); /* redeclaration are ignored */
149 ref.type_name = (char*)strdup(ref.type->name);
150 ref.name = former.name;
152 xbt_free(former.type_name);
154 xbt_dynar_push(dynar,&ref);
158 static xbt_error_t parse_statement(char *definition,
159 xbt_dynar_t identifiers,
160 xbt_dynar_t fields_to_push) {
163 s_identifier_t identifier;
165 int expect_id_separator = 0;
168 memset(&identifier,0,sizeof(identifier));
170 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
171 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RA) {
173 return mismatch_error; /* end of the englobing structure or union */
176 if (XBT_LOG_ISENABLED(ddt_parse,xbt_log_priority_debug)) {
178 for (colon_pos = gras_ddt_parse_col_pos;
179 definition[colon_pos] != ';';
181 definition[colon_pos] = '\0';
182 DEBUG3("Parse the statement \"%s%s;\" (col_pos=%d)",
184 definition+gras_ddt_parse_col_pos,
185 gras_ddt_parse_col_pos);
186 definition[colon_pos] = ';';
189 if(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
190 PARSE_ERROR1("Unparsable symbol: found a typeless statement (got '%s' instead)",
191 gras_ddt_parse_text);
193 /**** get the type modifier of this statement ****/
194 parse_type_modifier(&identifier.tm);
196 /* FIXME: This does not detect recursive definitions at all? */
197 if (identifier.tm.is_union || identifier.tm.is_enum || identifier.tm.is_struct)
198 PARSE_ERROR0("Cannot handle recursive type definition yet");
200 /**** get the base type, giving "short a" the needed love ****/
201 if (!identifier.tm.is_union &&
202 !identifier.tm.is_enum &&
203 !identifier.tm.is_struct &&
205 (identifier.tm.is_short || identifier.tm.is_long || identifier.tm.is_unsigned) &&
207 strcmp(gras_ddt_parse_text,"char") &&
208 strcmp(gras_ddt_parse_text,"float") &&
209 strcmp(gras_ddt_parse_text,"double") &&
210 strcmp(gras_ddt_parse_text,"int") ) {
212 /* bastard user, they omited "int" ! */
213 identifier.type_name=(char*)strdup("int");
214 DEBUG0("the base type is 'int', which were omited (you vicious user)");
216 identifier.type_name=(char*)strdup(gras_ddt_parse_text);
217 DEBUG1("the base type is '%s'",identifier.type_name);
218 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
221 /**** build the base type for latter use ****/
222 if (identifier.tm.is_union) {
223 PARSE_ERROR0("Cannot handle union yet (get callback from annotation?)");
225 } else if (identifier.tm.is_enum) {
226 PARSE_ERROR0("Cannot handle enum yet");
228 } else if (identifier.tm.is_struct) {
229 sprintf(buffname,"struct %s",identifier.type_name);
230 identifier.type = gras_datadesc_struct(buffname); /* Get created when does not exist */
232 } else if (identifier.tm.is_unsigned) {
233 if (!strcmp(identifier.type_name,"int")) {
234 if (identifier.tm.is_long == 2) {
235 identifier.type = gras_datadesc_by_name("unsigned long long int");
236 } else if (identifier.tm.is_long) {
237 identifier.type = gras_datadesc_by_name("unsigned long int");
238 } else if (identifier.tm.is_short) {
239 identifier.type = gras_datadesc_by_name("unsigned short int");
241 identifier.type = gras_datadesc_by_name("unsigned int");
244 } else if (!strcmp(identifier.type_name, "char")) {
245 identifier.type = gras_datadesc_by_name("unsigned char");
247 } else { /* impossible, gcc parses this shit before us */
251 } else if (!strcmp(identifier.type_name, "float")) {
252 /* no modificator allowed by gcc */
253 identifier.type = gras_datadesc_by_name("float");
255 } else if (!strcmp(identifier.type_name, "double")) {
256 if (identifier.tm.is_long)
257 PARSE_ERROR0("long double not portable and thus not handled");
259 identifier.type = gras_datadesc_by_name("double");
261 } else { /* signed integer elemental */
262 if (!strcmp(identifier.type_name,"int")) {
263 if (identifier.tm.is_long == 2) {
264 identifier.type = gras_datadesc_by_name("signed long long int");
265 } else if (identifier.tm.is_long) {
266 identifier.type = gras_datadesc_by_name("signed long int");
267 } else if (identifier.tm.is_short) {
268 identifier.type = gras_datadesc_by_name("signed short int");
270 identifier.type = gras_datadesc_by_name("int");
273 } else if (!strcmp(identifier.type_name, "char")) {
274 identifier.type = gras_datadesc_by_name("char");
277 DEBUG1("Base type is a constructed one (%s)",identifier.type_name);
278 identifier.type = gras_datadesc_by_name(identifier.type_name);
279 if (!identifier.type)
280 PARSE_ERROR1("Unknown base type '%s'",identifier.type_name);
283 /* Now identifier.type and identifier.name speak about the base type.
284 Stars are not eaten unless 'int' was omitted.
285 We will have to enhance it if we are in fact asked for array or reference */
287 /**** look for the symbols of this type ****/
288 for(expect_id_separator = 0;
290 (/*(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_EMPTY) && FIXME*/
291 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_SEMI_COLON)) ;
293 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump() ) {
295 if(expect_id_separator) {
296 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
297 expect_id_separator = 0;
300 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_LB) {
301 /* Handle fixed size arrays */
302 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
303 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RB) {
304 PARSE_ERROR0("Cannot deal with [] constructs (yet)");
306 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
308 long int size=strtol(gras_ddt_parse_text, &end, 10);
310 if (end == gras_ddt_parse_text || *end != '\0')
311 PARSE_ERROR1("Unparsable size of array (found '%c', expected number)",*end);
313 /* replace the previously pushed type to an array of it */
314 change_to_fixed_array(identifiers,size);
316 /* eat the closing bracket */
317 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
318 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RB)
319 PARSE_ERROR0("Unparsable size of array");
320 DEBUG1("Fixed size array, size=%ld",size);
323 PARSE_ERROR0("Unparsable size of array");
325 /* End of fixed size arrays handling */
327 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
328 /* Handle annotation */
329 s_identifier_t array;
330 char *keyname = NULL;
332 memset(&array,0,sizeof(array));
333 if (strcmp(gras_ddt_parse_text,"GRAS_ANNOTE"))
334 PARSE_ERROR1("Unparsable symbol: Expected 'GRAS_ANNOTE', got '%s'",gras_ddt_parse_text);
336 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
337 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LP)
338 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
340 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
342 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
343 PARSE_ERROR1("Unparsable annotation: Expected key name, got '%s'",gras_ddt_parse_text);
344 keyname = (char*)strdup(gras_ddt_parse_text);
346 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
348 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_COLON)
349 PARSE_ERROR1("Unparsable annotation: expected ',' after the key name, got '%s'",gras_ddt_parse_text);
351 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
353 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
354 PARSE_ERROR1("Unparsable annotation: Expected key value, got '%s'",gras_ddt_parse_text);
355 keyval = (char*)strdup(gras_ddt_parse_text);
357 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
359 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RP)
360 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
362 /* Done with parsing the annotation. Now deal with it by replacing previously pushed type with the right one */
364 DEBUG2("Anotation: %s=%s",keyname,keyval);
365 if (!strcmp(keyname,"size")) {
367 if (!identifier.tm.is_ref)
368 PARSE_ERROR0("Size annotation for a field not being a reference");
369 identifier.tm.is_ref--;
371 if (!strcmp(keyval,"1")) {
372 change_to_ref(identifiers);
378 for (p = keyval; *p != '\0'; p++)
382 change_to_fixed_array(identifiers,atoi(keyval));
383 change_to_ref(identifiers);
388 change_to_ref_pop_array(identifiers);
389 xbt_dynar_push(fields_to_push,&keyval);
396 PARSE_ERROR1("Unknown annotation type: '%s'",keyname);
400 /* End of annotation handling */
402 PARSE_ERROR1("Unparsable symbol: Got '%s' instead of expected comma (',')",gras_ddt_parse_text);
404 } else if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
405 PARSE_ERROR0("Unparsable symbol: Unexpected comma (',')");
408 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
409 identifier.tm.is_ref++; /* We indeed deal with multiple references with multiple annotations */
413 /* found a symbol name. Build the type and push it to dynar */
414 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
416 identifier.name=(char*)strdup(gras_ddt_parse_text);
417 DEBUG1("Found the identifier \"%s\"",identifier.name);
419 xbt_dynar_push(identifiers, &identifier);
420 DEBUG1("Dynar_len=%lu",xbt_dynar_length(identifiers));
421 expect_id_separator = 1;
425 PARSE_ERROR0("Unparasable symbol (maybe a def struct in a def struct or a parser bug ;)");
432 static gras_datadesc_type_t parse_struct(char *definition) {
436 static int anonymous_struct=0;
438 xbt_dynar_t identifiers;
439 s_identifier_t field;
442 xbt_dynar_t fields_to_push;
445 gras_datadesc_type_t struct_type;
448 identifiers = xbt_dynar_new(sizeof(s_identifier_t),NULL);
449 fields_to_push = xbt_dynar_new(sizeof(char*),NULL);
451 /* Create the struct descriptor */
452 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
453 struct_type = gras_datadesc_struct(gras_ddt_parse_text);
454 VERB1("Parse the struct '%s'", gras_ddt_parse_text);
455 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
457 sprintf(buffname,"anonymous struct %d",anonymous_struct++);
458 VERB1("Parse the anonymous struct nb %d", anonymous_struct);
459 struct_type = gras_datadesc_struct(buffname);
462 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LA)
463 PARSE_ERROR1("Unparasable symbol: Expecting struct definition, but got %s instead of '{'",
464 gras_ddt_parse_text);
466 /* Parse the identifiers */
467 for (errcode=parse_statement(definition,identifiers,fields_to_push);
468 errcode == no_error ;
469 errcode=parse_statement(definition,identifiers,fields_to_push)) {
471 DEBUG1("This statement contained %lu identifiers",xbt_dynar_length(identifiers));
472 /* append the identifiers we've found */
473 xbt_dynar_foreach(identifiers,i, field) {
475 PARSE_ERROR2("Not enough GRAS_ANNOTATE to deal with all dereferencing levels of %s (%d '*' left)",
476 field.name,field.tm.is_ref);
478 VERB2("Append field '%s' to %p",field.name, (void*)struct_type);
479 gras_datadesc_struct_append(struct_type, field.name, field.type);
480 xbt_free(field.name);
481 xbt_free(field.type_name);
484 xbt_dynar_reset(identifiers);
485 DEBUG1("struct_type=%p",(void*)struct_type);
487 /* Make sure that all fields declaring a size push it into the cbps */
488 xbt_dynar_foreach(fields_to_push,i, name) {
489 DEBUG1("struct_type=%p",(void*)struct_type);
490 VERB2("Push field '%s' into size stack of %p", name, (void*)struct_type);
491 gras_datadesc_cb_field_push(struct_type, name);
494 xbt_dynar_reset(fields_to_push);
496 gras_datadesc_struct_close(struct_type);
497 if (errcode != mismatch_error) {
499 return NULL; /* FIXME: LEAK! */
503 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RA)
504 PARSE_ERROR1("Unparasable symbol: Expected '}' at the end of struct definition, got '%s'",
505 gras_ddt_parse_text);
507 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
509 xbt_dynar_free(&identifiers);
510 xbt_dynar_free(&fields_to_push);
515 static gras_datadesc_type_t parse_typedef(char *definition) {
517 s_type_modifier_t tm;
519 gras_datadesc_type_t struct_desc=NULL;
520 gras_datadesc_type_t typedef_desc=NULL;
523 memset(&tm,0,sizeof(tm));
525 /* get the aliased type */
526 parse_type_modifier(&tm);
529 struct_desc = parse_struct(definition);
532 parse_type_modifier(&tm);
535 PARSE_ERROR0("Cannot handle reference without annotation");
537 /* get the aliasing name */
538 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
539 PARSE_ERROR1("Unparsable typedef: Expected the alias name, and got '%s'",
540 gras_ddt_parse_text);
542 /* (FIXME: should) build the alias */
543 PARSE_ERROR0("Cannot handle typedef yet");
551 * gras_datadesc_parse:
553 * Create a datadescription from the result of parsing the C type description
556 gras_datadesc_parse(const char *name,
557 const char *C_statement) {
559 gras_datadesc_type_t res=NULL;
561 int semicolon_count=0;
562 int def_count,C_count;
565 /* reput the \n in place for debug */
566 for (C_count=0; C_statement[C_count] != '\0'; C_count++)
567 if (C_statement[C_count] == ';' || C_statement[C_count] == '{')
569 definition = (char*)xbt_malloc(C_count + semicolon_count + 1);
570 for (C_count=0,def_count=0; C_statement[C_count] != '\0'; C_count++) {
571 definition[def_count++] = C_statement[C_count];
572 if (C_statement[C_count] == ';' || C_statement[C_count] == '{') {
573 definition[def_count++] = '\n';
576 definition[def_count] = '\0';
579 VERB2("_gras_ddt_type_parse(%s) -> %d chars",definition, def_count);
580 gras_ddt_parse_pointer_string_init(definition);
582 /* Do I have a typedef, or a raw struct ?*/
583 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
585 if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"struct"))) {
586 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
587 res = parse_struct(definition);
589 } else if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"typedef"))) {
590 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
591 res = parse_typedef(definition);
594 ERROR1("Failed to parse the following symbol (not a struct neither a typedef) :\n%s",definition);
598 gras_ddt_parse_pointer_string_close();
599 VERB0("end of _gras_ddt_type_parse()");
600 xbt_free(definition);
601 /* register it under the name provided as symbol */
602 if (strcmp(res->name,name)) {
603 ERROR2("In GRAS_DEFINE_TYPE, the provided symbol (here %s) must be the C type name (here %s)",