3 /* DataDesc/ddt_parse.c -- automatic parsing of data structures */
5 /* Authors: Arnaud Legrand, Martin Quinson */
6 /* Copyright (C) 2003, 2004 Martin Quinson. */
8 /* This program is free software; you can redistribute it and/or modify it
9 under the terms of the license (GNU LGPL) which comes with this package. */
11 #include <ctype.h> /* isdigit */
13 #include "gras/DataDesc/datadesc_private.h"
14 #include "gras/DataDesc/ddt_parse.yy.h"
16 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(ddt_parse,datadesc,
17 "Parsing C data structures to build GRAS data description");
19 typedef struct s_type_modifier{
29 } s_type_modifier_t,*type_modifier_t;
31 typedef struct s_field {
32 gras_datadesc_type_t type;
38 extern char *gras_ddt_parse_text; /* text being considered in the parser */
41 static void parse_type_modifier(type_modifier_t type_modifier) {
44 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
45 /* This only used when parsing 'short *' since this function returns when int, float, double,... is encountered */
46 DEBUG0("This is a reference");
47 type_modifier->is_ref++;
49 } else if (!strcmp(gras_ddt_parse_text,"unsigned")) {
50 DEBUG0("This is an unsigned");
51 type_modifier->is_unsigned = 1;
53 } else if (!strcmp(gras_ddt_parse_text,"short")) {
54 DEBUG0("This is short");
55 type_modifier->is_short = 1;
57 } else if (!strcmp(gras_ddt_parse_text,"long")) {
58 DEBUG0("This is long");
59 type_modifier->is_long++; /* handle "long long" */
61 } else if (!strcmp(gras_ddt_parse_text,"struct")) {
62 DEBUG0("This is a struct");
63 type_modifier->is_struct = 1;
65 } else if (!strcmp(gras_ddt_parse_text,"union")) {
66 DEBUG0("This is an union");
67 type_modifier->is_union = 1;
69 } else if (!strcmp(gras_ddt_parse_text,"enum")) {
70 DEBUG0("This is an enum");
71 type_modifier->is_enum = 1;
73 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_EMPTY) {
77 DEBUG1("Done with modifiers (got %s)",gras_ddt_parse_text);
81 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
82 if((gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD) &&
83 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_STAR)) {
84 DEBUG2("Done with modifiers (got %s,%d)",gras_ddt_parse_text,gras_ddt_parse_tok_num);
91 static void print_type_modifier(s_type_modifier_t tm) {
95 if (tm.is_unsigned) printf("(unsigned) ");
96 if (tm.is_short) printf("(short) ");
97 for (i=0 ; i<tm.is_long ; i++) printf("(long) ");
99 if(tm.is_struct) printf("(struct) ");
100 if(tm.is_enum) printf("(enum) ");
101 if(tm.is_union) printf("(union) ");
103 for (i=0 ; i<tm.is_ref ; i++) printf("(ref) ");
107 static void change_to_fixed_array(xbt_dynar_t dynar, long int size) {
108 s_identifier_t former,array;
109 memset(&array,0,sizeof(array));
112 xbt_dynar_pop(dynar,&former);
113 array.type_name=(char*)xbt_malloc(strlen(former.type->name)+20);
114 DEBUG2("Array specification (size=%ld, elm='%s'), change pushed type",
115 size,former.type_name);
116 sprintf(array.type_name,"%s[%ld]",former.type_name,size);
117 xbt_free(former.type_name);
119 array.type = gras_datadesc_array_fixed(array.type_name, former.type, size); /* redeclaration are ignored */
120 array.name = former.name;
122 xbt_dynar_push(dynar,&array);
125 static void change_to_ref(xbt_dynar_t dynar) {
126 s_identifier_t former,ref;
127 memset(&ref,0,sizeof(ref));
130 xbt_dynar_pop(dynar,&former);
131 ref.type_name=(char*)xbt_malloc(strlen(former.type->name)+2);
132 DEBUG1("Ref specification (elm='%s'), change pushed type", former.type_name);
133 sprintf(ref.type_name,"%s*",former.type_name);
134 xbt_free(former.type_name);
136 ref.type = gras_datadesc_ref(ref.type_name, former.type); /* redeclaration are ignored */
137 ref.name = former.name;
139 xbt_dynar_push(dynar,&ref);
143 static void change_to_ref_pop_array(xbt_dynar_t dynar) {
144 s_identifier_t former,ref;
145 memset(&ref,0,sizeof(ref));
148 xbt_dynar_pop(dynar,&former);
149 ref.type = gras_datadesc_ref_pop_arr(former.type); /* redeclaration are ignored */
150 ref.type_name = (char*)strdup(ref.type->name);
151 ref.name = former.name;
153 xbt_free(former.type_name);
155 xbt_dynar_push(dynar,&ref);
159 static xbt_error_t parse_statement(char *definition,
160 xbt_dynar_t identifiers,
161 xbt_dynar_t fields_to_push) {
164 s_identifier_t identifier;
166 int expect_id_separator = 0;
169 memset(&identifier,0,sizeof(identifier));
171 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
172 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RA) {
174 return mismatch_error; /* end of the englobing structure or union */
177 if (XBT_LOG_ISENABLED(ddt_parse,xbt_log_priority_debug)) {
179 for (colon_pos = gras_ddt_parse_col_pos;
180 definition[colon_pos] != ';';
182 definition[colon_pos] = '\0';
183 DEBUG3("Parse the statement \"%s%s;\" (col_pos=%d)",
185 definition+gras_ddt_parse_col_pos,
186 gras_ddt_parse_col_pos);
187 definition[colon_pos] = ';';
190 if(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
191 PARSE_ERROR1("Unparsable symbol: found a typeless statement (got '%s' instead)",
192 gras_ddt_parse_text);
194 /**** get the type modifier of this statement ****/
195 parse_type_modifier(&identifier.tm);
197 /* FIXME: This does not detect recursive definitions at all? */
198 if (identifier.tm.is_union || identifier.tm.is_enum || identifier.tm.is_struct)
199 PARSE_ERROR0("Cannot handle recursive type definition yet");
201 /**** get the base type, giving "short a" the needed love ****/
202 if (!identifier.tm.is_union &&
203 !identifier.tm.is_enum &&
204 !identifier.tm.is_struct &&
206 (identifier.tm.is_short || identifier.tm.is_long || identifier.tm.is_unsigned) &&
208 strcmp(gras_ddt_parse_text,"char") &&
209 strcmp(gras_ddt_parse_text,"float") &&
210 strcmp(gras_ddt_parse_text,"double") &&
211 strcmp(gras_ddt_parse_text,"int") ) {
213 /* bastard user, they omited "int" ! */
214 identifier.type_name=(char*)strdup("int");
215 DEBUG0("the base type is 'int', which were omited (you vicious user)");
217 identifier.type_name=(char*)strdup(gras_ddt_parse_text);
218 DEBUG1("the base type is '%s'",identifier.type_name);
219 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
222 /**** build the base type for latter use ****/
223 if (identifier.tm.is_union) {
224 PARSE_ERROR0("Cannot handle union yet (get callback from annotation?)");
226 } else if (identifier.tm.is_enum) {
227 PARSE_ERROR0("Cannot handle enum yet");
229 } else if (identifier.tm.is_struct) {
230 sprintf(buffname,"struct %s",identifier.type_name);
231 identifier.type = gras_datadesc_struct(buffname); /* Get created when does not exist */
233 } else if (identifier.tm.is_unsigned) {
234 if (!strcmp(identifier.type_name,"int")) {
235 if (identifier.tm.is_long == 2) {
236 identifier.type = gras_datadesc_by_name("unsigned long long int");
237 } else if (identifier.tm.is_long) {
238 identifier.type = gras_datadesc_by_name("unsigned long int");
239 } else if (identifier.tm.is_short) {
240 identifier.type = gras_datadesc_by_name("unsigned short int");
242 identifier.type = gras_datadesc_by_name("unsigned int");
245 } else if (!strcmp(identifier.type_name, "char")) {
246 identifier.type = gras_datadesc_by_name("unsigned char");
248 } else { /* impossible, gcc parses this shit before us */
252 } else if (!strcmp(identifier.type_name, "float")) {
253 /* no modificator allowed by gcc */
254 identifier.type = gras_datadesc_by_name("float");
256 } else if (!strcmp(identifier.type_name, "double")) {
257 if (identifier.tm.is_long)
258 PARSE_ERROR0("long double not portable and thus not handled");
260 identifier.type = gras_datadesc_by_name("double");
262 } else { /* signed integer elemental */
263 if (!strcmp(identifier.type_name,"int")) {
264 if (identifier.tm.is_long == 2) {
265 identifier.type = gras_datadesc_by_name("signed long long int");
266 } else if (identifier.tm.is_long) {
267 identifier.type = gras_datadesc_by_name("signed long int");
268 } else if (identifier.tm.is_short) {
269 identifier.type = gras_datadesc_by_name("signed short int");
271 identifier.type = gras_datadesc_by_name("int");
274 } else if (!strcmp(identifier.type_name, "char")) {
275 identifier.type = gras_datadesc_by_name("char");
278 DEBUG1("Base type is a constructed one (%s)",identifier.type_name);
279 identifier.type = gras_datadesc_by_name(identifier.type_name);
280 if (!identifier.type)
281 PARSE_ERROR1("Unknown base type '%s'",identifier.type_name);
284 /* Now identifier.type and identifier.name speak about the base type.
285 Stars are not eaten unless 'int' was omitted.
286 We will have to enhance it if we are in fact asked for array or reference */
288 /**** look for the symbols of this type ****/
289 for(expect_id_separator = 0;
291 (/*(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_EMPTY) && FIXME*/
292 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_SEMI_COLON)) ;
294 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump() ) {
296 if(expect_id_separator) {
297 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
298 expect_id_separator = 0;
301 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_LB) {
302 /* Handle fixed size arrays */
303 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
304 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RB) {
305 PARSE_ERROR0("Cannot deal with [] constructs (yet)");
307 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
309 long int size=strtol(gras_ddt_parse_text, &end, 10);
311 if (end == gras_ddt_parse_text || *end != '\0')
312 PARSE_ERROR1("Unparsable size of array (found '%c', expected number)",*end);
314 /* replace the previously pushed type to an array of it */
315 change_to_fixed_array(identifiers,size);
317 /* eat the closing bracket */
318 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
319 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RB)
320 PARSE_ERROR0("Unparsable size of array");
321 DEBUG1("Fixed size array, size=%ld",size);
324 PARSE_ERROR0("Unparsable size of array");
326 /* End of fixed size arrays handling */
328 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
329 /* Handle annotation */
330 s_identifier_t array;
331 char *keyname = NULL;
333 memset(&array,0,sizeof(array));
334 if (strcmp(gras_ddt_parse_text,"GRAS_ANNOTE"))
335 PARSE_ERROR1("Unparsable symbol: Expected 'GRAS_ANNOTE', got '%s'",gras_ddt_parse_text);
337 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
338 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LP)
339 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
341 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
343 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
344 PARSE_ERROR1("Unparsable annotation: Expected key name, got '%s'",gras_ddt_parse_text);
345 keyname = (char*)strdup(gras_ddt_parse_text);
347 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
349 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_COLON)
350 PARSE_ERROR1("Unparsable annotation: expected ',' after the key name, got '%s'",gras_ddt_parse_text);
352 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
354 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
355 PARSE_ERROR1("Unparsable annotation: Expected key value, got '%s'",gras_ddt_parse_text);
356 keyval = (char*)strdup(gras_ddt_parse_text);
358 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
360 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RP)
361 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
363 /* Done with parsing the annotation. Now deal with it by replacing previously pushed type with the right one */
365 DEBUG2("Anotation: %s=%s",keyname,keyval);
366 if (!strcmp(keyname,"size")) {
368 if (!identifier.tm.is_ref)
369 PARSE_ERROR0("Size annotation for a field not being a reference");
370 identifier.tm.is_ref--;
372 if (!strcmp(keyval,"1")) {
373 change_to_ref(identifiers);
379 for (p = keyval; *p != '\0'; p++)
383 change_to_fixed_array(identifiers,atoi(keyval));
384 change_to_ref(identifiers);
389 change_to_ref_pop_array(identifiers);
390 xbt_dynar_push(fields_to_push,&keyval);
397 PARSE_ERROR1("Unknown annotation type: '%s'",keyname);
401 /* End of annotation handling */
403 PARSE_ERROR1("Unparsable symbol: Got '%s' instead of expected comma (',')",gras_ddt_parse_text);
405 } else if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
406 PARSE_ERROR0("Unparsable symbol: Unexpected comma (',')");
409 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
410 identifier.tm.is_ref++; /* We indeed deal with multiple references with multiple annotations */
414 /* found a symbol name. Build the type and push it to dynar */
415 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
417 identifier.name=(char*)strdup(gras_ddt_parse_text);
418 DEBUG1("Found the identifier \"%s\"",identifier.name);
420 xbt_dynar_push(identifiers, &identifier);
421 DEBUG1("Dynar_len=%lu",xbt_dynar_length(identifiers));
422 expect_id_separator = 1;
426 PARSE_ERROR0("Unparasable symbol (maybe a def struct in a def struct or a parser bug ;)");
433 static gras_datadesc_type_t parse_struct(char *definition) {
437 static int anonymous_struct=0;
439 xbt_dynar_t identifiers;
440 s_identifier_t field;
443 xbt_dynar_t fields_to_push;
446 gras_datadesc_type_t struct_type;
449 identifiers = xbt_dynar_new(sizeof(s_identifier_t),NULL);
450 fields_to_push = xbt_dynar_new(sizeof(char*),NULL);
452 /* Create the struct descriptor */
453 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
454 struct_type = gras_datadesc_struct(gras_ddt_parse_text);
455 VERB1("Parse the struct '%s'", gras_ddt_parse_text);
456 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
458 sprintf(buffname,"anonymous struct %d",anonymous_struct++);
459 VERB1("Parse the anonymous struct nb %d", anonymous_struct);
460 struct_type = gras_datadesc_struct(buffname);
463 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LA)
464 PARSE_ERROR1("Unparasable symbol: Expecting struct definition, but got %s instead of '{'",
465 gras_ddt_parse_text);
467 /* Parse the identifiers */
468 for (errcode=parse_statement(definition,identifiers,fields_to_push);
469 errcode == no_error ;
470 errcode=parse_statement(definition,identifiers,fields_to_push)) {
472 DEBUG1("This statement contained %lu identifiers",xbt_dynar_length(identifiers));
473 /* append the identifiers we've found */
474 xbt_dynar_foreach(identifiers,i, field) {
476 PARSE_ERROR2("Not enough GRAS_ANNOTATE to deal with all dereferencing levels of %s (%d '*' left)",
477 field.name,field.tm.is_ref);
479 VERB2("Append field '%s' to %p",field.name, (void*)struct_type);
480 gras_datadesc_struct_append(struct_type, field.name, field.type);
481 xbt_free(field.name);
482 xbt_free(field.type_name);
485 xbt_dynar_reset(identifiers);
486 DEBUG1("struct_type=%p",(void*)struct_type);
488 /* Make sure that all fields declaring a size push it into the cbps */
489 xbt_dynar_foreach(fields_to_push,i, name) {
490 DEBUG1("struct_type=%p",(void*)struct_type);
491 VERB2("Push field '%s' into size stack of %p", name, (void*)struct_type);
492 gras_datadesc_cb_field_push(struct_type, name);
495 xbt_dynar_reset(fields_to_push);
497 gras_datadesc_struct_close(struct_type);
498 if (errcode != mismatch_error) {
500 return NULL; /* FIXME: LEAK! */
504 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RA)
505 PARSE_ERROR1("Unparasable symbol: Expected '}' at the end of struct definition, got '%s'",
506 gras_ddt_parse_text);
508 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
510 xbt_dynar_free(&identifiers);
511 xbt_dynar_free(&fields_to_push);
516 static gras_datadesc_type_t parse_typedef(char *definition) {
518 s_type_modifier_t tm;
520 gras_datadesc_type_t struct_desc=NULL;
521 gras_datadesc_type_t typedef_desc=NULL;
524 memset(&tm,0,sizeof(tm));
526 /* get the aliased type */
527 parse_type_modifier(&tm);
530 struct_desc = parse_struct(definition);
533 parse_type_modifier(&tm);
536 PARSE_ERROR0("Cannot handle reference without annotation");
538 /* get the aliasing name */
539 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
540 PARSE_ERROR1("Unparsable typedef: Expected the alias name, and got '%s'",
541 gras_ddt_parse_text);
543 /* (FIXME: should) build the alias */
544 PARSE_ERROR0("Cannot handle typedef yet");
552 * gras_datadesc_parse:
554 * Create a datadescription from the result of parsing the C type description
557 gras_datadesc_parse(const char *name,
558 const char *C_statement) {
560 gras_datadesc_type_t res=NULL;
562 int semicolon_count=0;
563 int def_count,C_count;
566 /* reput the \n in place for debug */
567 for (C_count=0; C_statement[C_count] != '\0'; C_count++)
568 if (C_statement[C_count] == ';' || C_statement[C_count] == '{')
570 definition = (char*)xbt_malloc(C_count + semicolon_count + 1);
571 for (C_count=0,def_count=0; C_statement[C_count] != '\0'; C_count++) {
572 definition[def_count++] = C_statement[C_count];
573 if (C_statement[C_count] == ';' || C_statement[C_count] == '{') {
574 definition[def_count++] = '\n';
577 definition[def_count] = '\0';
580 VERB2("_gras_ddt_type_parse(%s) -> %d chars",definition, def_count);
581 gras_ddt_parse_pointer_string_init(definition);
583 /* Do I have a typedef, or a raw struct ?*/
584 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
586 if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"struct"))) {
587 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
588 res = parse_struct(definition);
590 } else if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"typedef"))) {
591 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
592 res = parse_typedef(definition);
595 ERROR1("Failed to parse the following symbol (not a struct neither a typedef) :\n%s",definition);
599 gras_ddt_parse_pointer_string_close();
600 VERB0("end of _gras_ddt_type_parse()");
601 xbt_free(definition);
602 /* register it under the name provided as symbol */
603 if (strcmp(res->name,name)) {
604 ERROR2("In GRAS_DEFINE_TYPE, the provided symbol (here %s) must be the C type name (here %s)",