3 /* DataDesc/ddt_parse.c -- automatic parsing of data structures */
5 /* Copyright (c) 2003 Arnaud Legrand. */
6 /* Copyright (c) 2003, 2004 Martin Quinson. */
7 /* All rights reserved. */
9 /* This program is free software; you can redistribute it and/or modify it
10 * under the terms of the license (GNU LGPL) which comes with this package. */
12 #include <ctype.h> /* isdigit */
14 #include "gras/DataDesc/datadesc_private.h"
15 #include "gras/DataDesc/ddt_parse.yy.h"
17 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(ddt_parse,datadesc,
18 "Parsing C data structures to build GRAS data description");
20 typedef struct s_type_modifier{
30 } s_type_modifier_t,*type_modifier_t;
32 typedef struct s_field {
33 gras_datadesc_type_t type;
39 extern char *gras_ddt_parse_text; /* text being considered in the parser */
42 static void parse_type_modifier(type_modifier_t type_modifier) {
45 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
46 /* This only used when parsing 'short *' since this function returns when int, float, double,... is encountered */
47 DEBUG0("This is a reference");
48 type_modifier->is_ref++;
50 } else if (!strcmp(gras_ddt_parse_text,"unsigned")) {
51 DEBUG0("This is an unsigned");
52 type_modifier->is_unsigned = 1;
54 } else if (!strcmp(gras_ddt_parse_text,"short")) {
55 DEBUG0("This is short");
56 type_modifier->is_short = 1;
58 } else if (!strcmp(gras_ddt_parse_text,"long")) {
59 DEBUG0("This is long");
60 type_modifier->is_long++; /* handle "long long" */
62 } else if (!strcmp(gras_ddt_parse_text,"struct")) {
63 DEBUG0("This is a struct");
64 type_modifier->is_struct = 1;
66 } else if (!strcmp(gras_ddt_parse_text,"union")) {
67 DEBUG0("This is an union");
68 type_modifier->is_union = 1;
70 } else if (!strcmp(gras_ddt_parse_text,"enum")) {
71 DEBUG0("This is an enum");
72 type_modifier->is_enum = 1;
74 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_EMPTY) {
78 DEBUG1("Done with modifiers (got %s)",gras_ddt_parse_text);
82 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
83 if((gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD) &&
84 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_STAR)) {
85 DEBUG2("Done with modifiers (got %s,%d)",gras_ddt_parse_text,gras_ddt_parse_tok_num);
92 static void print_type_modifier(s_type_modifier_t tm) {
96 if (tm.is_unsigned) printf("(unsigned) ");
97 if (tm.is_short) printf("(short) ");
98 for (i=0 ; i<tm.is_long ; i++) printf("(long) ");
100 if(tm.is_struct) printf("(struct) ");
101 if(tm.is_enum) printf("(enum) ");
102 if(tm.is_union) printf("(union) ");
104 for (i=0 ; i<tm.is_ref ; i++) printf("(ref) ");
108 static void change_to_fixed_array(xbt_dynar_t dynar, long int size) {
109 s_identifier_t former,array;
110 memset(&array,0,sizeof(array));
113 xbt_dynar_pop(dynar,&former);
114 array.type_name=(char*)xbt_malloc(strlen(former.type->name)+20);
115 DEBUG2("Array specification (size=%ld, elm='%s'), change pushed type",
116 size,former.type_name);
117 sprintf(array.type_name,"%s[%ld]",former.type_name,size);
118 free(former.type_name);
120 array.type = gras_datadesc_array_fixed(array.type_name, former.type, size); /* redeclaration are ignored */
121 array.name = former.name;
123 xbt_dynar_push(dynar,&array);
126 static void change_to_ref(xbt_dynar_t dynar) {
127 s_identifier_t former,ref;
128 memset(&ref,0,sizeof(ref));
131 xbt_dynar_pop(dynar,&former);
132 ref.type_name=(char*)xbt_malloc(strlen(former.type->name)+2);
133 DEBUG1("Ref specification (elm='%s'), change pushed type", former.type_name);
134 sprintf(ref.type_name,"%s*",former.type_name);
135 free(former.type_name);
137 ref.type = gras_datadesc_ref(ref.type_name, former.type); /* redeclaration are ignored */
138 ref.name = former.name;
140 xbt_dynar_push(dynar,&ref);
144 static void change_to_ref_pop_array(xbt_dynar_t dynar) {
145 s_identifier_t former,ref;
146 memset(&ref,0,sizeof(ref));
149 xbt_dynar_pop(dynar,&former);
150 ref.type = gras_datadesc_ref_pop_arr(former.type); /* redeclaration are ignored */
151 ref.type_name = (char*)strdup(ref.type->name);
152 ref.name = former.name;
154 free(former.type_name);
156 xbt_dynar_push(dynar,&ref);
160 static xbt_error_t parse_statement(char *definition,
161 xbt_dynar_t identifiers,
162 xbt_dynar_t fields_to_push) {
165 s_identifier_t identifier;
167 int expect_id_separator = 0;
170 memset(&identifier,0,sizeof(identifier));
172 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
173 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RA) {
175 return mismatch_error; /* end of the englobing structure or union */
178 if (XBT_LOG_ISENABLED(ddt_parse,xbt_log_priority_debug)) {
180 for (colon_pos = gras_ddt_parse_col_pos;
181 definition[colon_pos] != ';';
183 definition[colon_pos] = '\0';
184 DEBUG3("Parse the statement \"%s%s;\" (col_pos=%d)",
186 definition+gras_ddt_parse_col_pos,
187 gras_ddt_parse_col_pos);
188 definition[colon_pos] = ';';
191 if(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
192 PARSE_ERROR1("Unparsable symbol: found a typeless statement (got '%s' instead)",
193 gras_ddt_parse_text);
195 /**** get the type modifier of this statement ****/
196 parse_type_modifier(&identifier.tm);
198 /* FIXME: This does not detect recursive definitions at all? */
199 if (identifier.tm.is_union || identifier.tm.is_enum || identifier.tm.is_struct)
200 PARSE_ERROR0("Cannot handle recursive type definition yet");
202 /**** get the base type, giving "short a" the needed love ****/
203 if (!identifier.tm.is_union &&
204 !identifier.tm.is_enum &&
205 !identifier.tm.is_struct &&
207 (identifier.tm.is_short || identifier.tm.is_long || identifier.tm.is_unsigned) &&
209 strcmp(gras_ddt_parse_text,"char") &&
210 strcmp(gras_ddt_parse_text,"float") &&
211 strcmp(gras_ddt_parse_text,"double") &&
212 strcmp(gras_ddt_parse_text,"int") ) {
214 /* bastard user, they omited "int" ! */
215 identifier.type_name=(char*)strdup("int");
216 DEBUG0("the base type is 'int', which were omited (you vicious user)");
218 identifier.type_name=(char*)strdup(gras_ddt_parse_text);
219 DEBUG1("the base type is '%s'",identifier.type_name);
220 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
223 /**** build the base type for latter use ****/
224 if (identifier.tm.is_union) {
225 PARSE_ERROR0("Cannot handle union yet (get callback from annotation?)");
227 } else if (identifier.tm.is_enum) {
228 PARSE_ERROR0("Cannot handle enum yet");
230 } else if (identifier.tm.is_struct) {
231 sprintf(buffname,"struct %s",identifier.type_name);
232 identifier.type = gras_datadesc_struct(buffname); /* Get created when does not exist */
234 } else if (identifier.tm.is_unsigned) {
235 if (!strcmp(identifier.type_name,"int")) {
236 if (identifier.tm.is_long == 2) {
237 identifier.type = gras_datadesc_by_name("unsigned long long int");
238 } else if (identifier.tm.is_long) {
239 identifier.type = gras_datadesc_by_name("unsigned long int");
240 } else if (identifier.tm.is_short) {
241 identifier.type = gras_datadesc_by_name("unsigned short int");
243 identifier.type = gras_datadesc_by_name("unsigned int");
246 } else if (!strcmp(identifier.type_name, "char")) {
247 identifier.type = gras_datadesc_by_name("unsigned char");
249 } else { /* impossible, gcc parses this shit before us */
253 } else if (!strcmp(identifier.type_name, "float")) {
254 /* no modificator allowed by gcc */
255 identifier.type = gras_datadesc_by_name("float");
257 } else if (!strcmp(identifier.type_name, "double")) {
258 if (identifier.tm.is_long)
259 PARSE_ERROR0("long double not portable and thus not handled");
261 identifier.type = gras_datadesc_by_name("double");
263 } else { /* signed integer elemental */
264 if (!strcmp(identifier.type_name,"int")) {
265 if (identifier.tm.is_long == 2) {
266 identifier.type = gras_datadesc_by_name("signed long long int");
267 } else if (identifier.tm.is_long) {
268 identifier.type = gras_datadesc_by_name("signed long int");
269 } else if (identifier.tm.is_short) {
270 identifier.type = gras_datadesc_by_name("signed short int");
272 identifier.type = gras_datadesc_by_name("int");
275 } else if (!strcmp(identifier.type_name, "char")) {
276 identifier.type = gras_datadesc_by_name("char");
279 DEBUG1("Base type is a constructed one (%s)",identifier.type_name);
280 identifier.type = gras_datadesc_by_name(identifier.type_name);
281 if (!identifier.type)
282 PARSE_ERROR1("Unknown base type '%s'",identifier.type_name);
285 /* Now identifier.type and identifier.name speak about the base type.
286 Stars are not eaten unless 'int' was omitted.
287 We will have to enhance it if we are in fact asked for array or reference */
289 /**** look for the symbols of this type ****/
290 for(expect_id_separator = 0;
292 (/*(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_EMPTY) && FIXME*/
293 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_SEMI_COLON)) ;
295 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump() ) {
297 if(expect_id_separator) {
298 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
299 expect_id_separator = 0;
302 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_LB) {
303 /* Handle fixed size arrays */
304 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
305 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RB) {
306 PARSE_ERROR0("Cannot deal with [] constructs (yet)");
308 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
310 long int size=strtol(gras_ddt_parse_text, &end, 10);
312 if (end == gras_ddt_parse_text || *end != '\0')
313 PARSE_ERROR1("Unparsable size of array (found '%c', expected number)",*end);
315 /* replace the previously pushed type to an array of it */
316 change_to_fixed_array(identifiers,size);
318 /* eat the closing bracket */
319 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
320 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RB)
321 PARSE_ERROR0("Unparsable size of array");
322 DEBUG1("Fixed size array, size=%ld",size);
325 PARSE_ERROR0("Unparsable size of array");
327 /* End of fixed size arrays handling */
329 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
330 /* Handle annotation */
331 s_identifier_t array;
332 char *keyname = NULL;
334 memset(&array,0,sizeof(array));
335 if (strcmp(gras_ddt_parse_text,"GRAS_ANNOTE"))
336 PARSE_ERROR1("Unparsable symbol: Expected 'GRAS_ANNOTE', got '%s'",gras_ddt_parse_text);
338 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
339 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LP)
340 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
342 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
344 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
345 PARSE_ERROR1("Unparsable annotation: Expected key name, got '%s'",gras_ddt_parse_text);
346 keyname = (char*)strdup(gras_ddt_parse_text);
348 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
350 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_COLON)
351 PARSE_ERROR1("Unparsable annotation: expected ',' after the key name, got '%s'",gras_ddt_parse_text);
353 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
355 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
356 PARSE_ERROR1("Unparsable annotation: Expected key value, got '%s'",gras_ddt_parse_text);
357 keyval = (char*)strdup(gras_ddt_parse_text);
359 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
361 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RP)
362 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
364 /* Done with parsing the annotation. Now deal with it by replacing previously pushed type with the right one */
366 DEBUG2("Anotation: %s=%s",keyname,keyval);
367 if (!strcmp(keyname,"size")) {
369 if (!identifier.tm.is_ref)
370 PARSE_ERROR0("Size annotation for a field not being a reference");
371 identifier.tm.is_ref--;
373 if (!strcmp(keyval,"1")) {
374 change_to_ref(identifiers);
380 for (p = keyval; *p != '\0'; p++)
384 change_to_fixed_array(identifiers,atoi(keyval));
385 change_to_ref(identifiers);
390 change_to_ref_pop_array(identifiers);
391 xbt_dynar_push(fields_to_push,&keyval);
398 PARSE_ERROR1("Unknown annotation type: '%s'",keyname);
402 /* End of annotation handling */
404 PARSE_ERROR1("Unparsable symbol: Got '%s' instead of expected comma (',')",gras_ddt_parse_text);
406 } else if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
407 PARSE_ERROR0("Unparsable symbol: Unexpected comma (',')");
410 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
411 identifier.tm.is_ref++; /* We indeed deal with multiple references with multiple annotations */
415 /* found a symbol name. Build the type and push it to dynar */
416 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
418 identifier.name=(char*)strdup(gras_ddt_parse_text);
419 DEBUG1("Found the identifier \"%s\"",identifier.name);
421 xbt_dynar_push(identifiers, &identifier);
422 DEBUG1("Dynar_len=%lu",xbt_dynar_length(identifiers));
423 expect_id_separator = 1;
427 PARSE_ERROR0("Unparasable symbol (maybe a def struct in a def struct or a parser bug ;)");
434 static gras_datadesc_type_t parse_struct(char *definition) {
438 static int anonymous_struct=0;
440 xbt_dynar_t identifiers;
441 s_identifier_t field;
444 xbt_dynar_t fields_to_push;
447 gras_datadesc_type_t struct_type;
450 identifiers = xbt_dynar_new(sizeof(s_identifier_t),NULL);
451 fields_to_push = xbt_dynar_new(sizeof(char*),NULL);
453 /* Create the struct descriptor */
454 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
455 struct_type = gras_datadesc_struct(gras_ddt_parse_text);
456 VERB1("Parse the struct '%s'", gras_ddt_parse_text);
457 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
459 sprintf(buffname,"anonymous struct %d",anonymous_struct++);
460 VERB1("Parse the anonymous struct nb %d", anonymous_struct);
461 struct_type = gras_datadesc_struct(buffname);
464 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LA)
465 PARSE_ERROR1("Unparasable symbol: Expecting struct definition, but got %s instead of '{'",
466 gras_ddt_parse_text);
468 /* Parse the identifiers */
469 for (errcode=parse_statement(definition,identifiers,fields_to_push);
470 errcode == no_error ;
471 errcode=parse_statement(definition,identifiers,fields_to_push)) {
473 DEBUG1("This statement contained %lu identifiers",xbt_dynar_length(identifiers));
474 /* append the identifiers we've found */
475 xbt_dynar_foreach(identifiers,i, field) {
477 PARSE_ERROR2("Not enough GRAS_ANNOTATE to deal with all dereferencing levels of %s (%d '*' left)",
478 field.name,field.tm.is_ref);
480 VERB2("Append field '%s' to %p",field.name, (void*)struct_type);
481 gras_datadesc_struct_append(struct_type, field.name, field.type);
483 free(field.type_name);
486 xbt_dynar_reset(identifiers);
487 DEBUG1("struct_type=%p",(void*)struct_type);
489 /* Make sure that all fields declaring a size push it into the cbps */
490 xbt_dynar_foreach(fields_to_push,i, name) {
491 DEBUG1("struct_type=%p",(void*)struct_type);
492 VERB2("Push field '%s' into size stack of %p", name, (void*)struct_type);
493 gras_datadesc_cb_field_push(struct_type, name);
496 xbt_dynar_reset(fields_to_push);
498 gras_datadesc_struct_close(struct_type);
499 if (errcode != mismatch_error) {
501 return NULL; /* FIXME: LEAK! */
505 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RA)
506 PARSE_ERROR1("Unparasable symbol: Expected '}' at the end of struct definition, got '%s'",
507 gras_ddt_parse_text);
509 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
511 xbt_dynar_free(&identifiers);
512 xbt_dynar_free(&fields_to_push);
517 static gras_datadesc_type_t parse_typedef(char *definition) {
519 s_type_modifier_t tm;
521 gras_datadesc_type_t struct_desc=NULL;
522 gras_datadesc_type_t typedef_desc=NULL;
525 memset(&tm,0,sizeof(tm));
527 /* get the aliased type */
528 parse_type_modifier(&tm);
531 struct_desc = parse_struct(definition);
534 parse_type_modifier(&tm);
537 PARSE_ERROR0("Cannot handle reference without annotation");
539 /* get the aliasing name */
540 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
541 PARSE_ERROR1("Unparsable typedef: Expected the alias name, and got '%s'",
542 gras_ddt_parse_text);
544 /* (FIXME: should) build the alias */
545 PARSE_ERROR0("Cannot handle typedef yet");
553 * gras_datadesc_parse:
555 * Create a datadescription from the result of parsing the C type description
558 gras_datadesc_parse(const char *name,
559 const char *C_statement) {
561 gras_datadesc_type_t res=NULL;
563 int semicolon_count=0;
564 int def_count,C_count;
567 /* reput the \n in place for debug */
568 for (C_count=0; C_statement[C_count] != '\0'; C_count++)
569 if (C_statement[C_count] == ';' || C_statement[C_count] == '{')
571 definition = (char*)xbt_malloc(C_count + semicolon_count + 1);
572 for (C_count=0,def_count=0; C_statement[C_count] != '\0'; C_count++) {
573 definition[def_count++] = C_statement[C_count];
574 if (C_statement[C_count] == ';' || C_statement[C_count] == '{') {
575 definition[def_count++] = '\n';
578 definition[def_count] = '\0';
581 VERB2("_gras_ddt_type_parse(%s) -> %d chars",definition, def_count);
582 gras_ddt_parse_pointer_string_init(definition);
584 /* Do I have a typedef, or a raw struct ?*/
585 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
587 if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"struct"))) {
588 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
589 res = parse_struct(definition);
591 } else if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"typedef"))) {
592 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
593 res = parse_typedef(definition);
596 ERROR1("Failed to parse the following symbol (not a struct neither a typedef) :\n%s",definition);
600 gras_ddt_parse_pointer_string_close();
601 VERB0("end of _gras_ddt_type_parse()");
603 /* register it under the name provided as symbol */
604 if (strcmp(res->name,name)) {
605 ERROR2("In GRAS_DEFINE_TYPE, the provided symbol (here %s) must be the C type name (here %s)",
609 gras_ddt_parse_lex_destroy();