3 /* DataDesc/ddt_parse.c -- automatic parsing of data structures */
5 /* Copyright (c) 2003 Arnaud Legrand. */
6 /* Copyright (c) 2003, 2004 Martin Quinson. */
7 /* All rights reserved. */
9 /* This program is free software; you can redistribute it and/or modify it
10 * under the terms of the license (GNU LGPL) which comes with this package. */
12 #include <ctype.h> /* isdigit */
15 #include "gras/DataDesc/datadesc_private.h"
16 #include "gras/DataDesc/ddt_parse.yy.h"
18 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(gras_ddt_parse,gras_ddt,
19 "Parsing C data structures to build GRAS data description");
21 typedef struct s_type_modifier{
31 } s_type_modifier_t,*type_modifier_t;
33 typedef struct s_field {
34 gras_datadesc_type_t type;
40 extern char *gras_ddt_parse_text; /* text being considered in the parser */
43 static void parse_type_modifier(type_modifier_t type_modifier) {
46 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
47 /* This only used when parsing 'short *' since this function returns when int, float, double,... is encountered */
48 DEBUG0("This is a reference");
49 type_modifier->is_ref++;
51 } else if (!strcmp(gras_ddt_parse_text,"unsigned")) {
52 DEBUG0("This is an unsigned");
53 type_modifier->is_unsigned = 1;
55 } else if (!strcmp(gras_ddt_parse_text,"short")) {
56 DEBUG0("This is short");
57 type_modifier->is_short = 1;
59 } else if (!strcmp(gras_ddt_parse_text,"long")) {
60 DEBUG0("This is long");
61 type_modifier->is_long++; /* handle "long long" */
63 } else if (!strcmp(gras_ddt_parse_text,"struct")) {
64 DEBUG0("This is a struct");
65 type_modifier->is_struct = 1;
67 } else if (!strcmp(gras_ddt_parse_text,"union")) {
68 DEBUG0("This is an union");
69 type_modifier->is_union = 1;
71 } else if (!strcmp(gras_ddt_parse_text,"enum")) {
72 DEBUG0("This is an enum");
73 type_modifier->is_enum = 1;
75 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_EMPTY) {
79 DEBUG1("Done with modifiers (got %s)",gras_ddt_parse_text);
83 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
84 if((gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD) &&
85 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_STAR)) {
86 DEBUG2("Done with modifiers (got %s,%d)",gras_ddt_parse_text,gras_ddt_parse_tok_num);
93 static void print_type_modifier(s_type_modifier_t tm) {
97 if (tm.is_unsigned) printf("(unsigned) ");
98 if (tm.is_short) printf("(short) ");
99 for (i=0 ; i<tm.is_long ; i++) printf("(long) ");
101 if(tm.is_struct) printf("(struct) ");
102 if(tm.is_enum) printf("(enum) ");
103 if(tm.is_union) printf("(union) ");
105 for (i=0 ; i<tm.is_ref ; i++) printf("(ref) ");
109 static void change_to_fixed_array(xbt_dynar_t dynar, long int size) {
110 s_identifier_t former,array;
111 memset(&array,0,sizeof(array));
114 xbt_dynar_pop(dynar,&former);
115 array.type_name=(char*)xbt_malloc(strlen(former.type->name)+48);
116 DEBUG2("Array specification (size=%ld, elm='%s'), change pushed type",
117 size,former.type_name);
118 sprintf(array.type_name,"%s%s%s%s[%ld]",
119 (former.tm.is_unsigned?"u ":""),
120 (former.tm.is_short?"s ":""),
121 (former.tm.is_long?"l ":""),
124 free(former.type_name);
126 array.type = gras_datadesc_array_fixed(array.type_name, former.type, size); /* redeclaration are ignored */
127 array.name = former.name;
129 xbt_dynar_push(dynar,&array);
132 static void change_to_ref(xbt_dynar_t dynar) {
133 s_identifier_t former,ref;
134 memset(&ref,0,sizeof(ref));
137 xbt_dynar_pop(dynar,&former);
138 ref.type_name=(char*)xbt_malloc(strlen(former.type->name)+2);
139 DEBUG1("Ref specification (elm='%s'), change pushed type", former.type_name);
140 sprintf(ref.type_name,"%s*",former.type_name);
141 free(former.type_name);
143 ref.type = gras_datadesc_ref(ref.type_name, former.type); /* redeclaration are ignored */
144 ref.name = former.name;
146 xbt_dynar_push(dynar,&ref);
150 static void change_to_ref_pop_array(xbt_dynar_t dynar) {
151 s_identifier_t former,ref;
152 memset(&ref,0,sizeof(ref));
155 xbt_dynar_pop(dynar,&former);
156 ref.type = gras_datadesc_ref_pop_arr(former.type); /* redeclaration are ignored */
157 ref.type_name = (char*)strdup(ref.type->name);
158 ref.name = former.name;
160 free(former.type_name);
162 xbt_dynar_push(dynar,&ref);
166 static void parse_statement(char *definition,
167 xbt_dynar_t identifiers,
168 xbt_dynar_t fields_to_push) {
171 s_identifier_t identifier;
173 int expect_id_separator = 0;
176 memset(&identifier,0,sizeof(identifier));
178 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
179 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RA) {
181 THROW0(mismatch_error,0,"End of the englobing structure or union");
184 if (XBT_LOG_ISENABLED(gras_ddt_parse,xbt_log_priority_debug)) {
186 for (colon_pos = gras_ddt_parse_col_pos;
187 definition[colon_pos] != ';';
189 definition[colon_pos] = '\0';
190 DEBUG3("Parse the statement \"%s%s;\" (col_pos=%d)",
192 definition+gras_ddt_parse_col_pos,
193 gras_ddt_parse_col_pos);
194 definition[colon_pos] = ';';
197 if(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
198 PARSE_ERROR1("Unparsable symbol: found a typeless statement (got '%s' instead)",
199 gras_ddt_parse_text);
201 /**** get the type modifier of this statement ****/
202 parse_type_modifier(&identifier.tm);
204 /* FIXME: This does not detect recursive definitions at all? */
205 if (identifier.tm.is_union || identifier.tm.is_enum || identifier.tm.is_struct)
206 PARSE_ERROR0("Cannot handle recursive type definition yet");
208 /**** get the base type, giving "short a" the needed love ****/
209 if (!identifier.tm.is_union &&
210 !identifier.tm.is_enum &&
211 !identifier.tm.is_struct &&
213 (identifier.tm.is_short || identifier.tm.is_long || identifier.tm.is_unsigned) &&
215 strcmp(gras_ddt_parse_text,"char") &&
216 strcmp(gras_ddt_parse_text,"float") &&
217 strcmp(gras_ddt_parse_text,"double") &&
218 strcmp(gras_ddt_parse_text,"int") ) {
220 /* bastard user, they omited "int" ! */
221 identifier.type_name=(char*)strdup("int");
222 DEBUG0("the base type is 'int', which were omited (you vicious user)");
224 identifier.type_name=(char*)strdup(gras_ddt_parse_text);
225 DEBUG1("the base type is '%s'",identifier.type_name);
226 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
229 /**** build the base type for latter use ****/
230 if (identifier.tm.is_union) {
231 PARSE_ERROR0("Cannot handle union yet (get callback from annotation?)");
233 } else if (identifier.tm.is_enum) {
234 PARSE_ERROR0("Cannot handle enum yet");
236 } else if (identifier.tm.is_struct) {
237 sprintf(buffname,"struct %s",identifier.type_name);
238 identifier.type = gras_datadesc_struct(buffname); /* Get created when does not exist */
240 } else if (identifier.tm.is_unsigned) {
241 if (!strcmp(identifier.type_name,"int")) {
242 if (identifier.tm.is_long == 2) {
243 identifier.type = gras_datadesc_by_name("unsigned long long int");
244 } else if (identifier.tm.is_long) {
245 identifier.type = gras_datadesc_by_name("unsigned long int");
246 } else if (identifier.tm.is_short) {
247 identifier.type = gras_datadesc_by_name("unsigned short int");
249 identifier.type = gras_datadesc_by_name("unsigned int");
252 } else if (!strcmp(identifier.type_name, "char")) {
253 identifier.type = gras_datadesc_by_name("unsigned char");
255 } else { /* impossible, gcc parses this shit before us */
259 } else if (!strcmp(identifier.type_name, "float")) {
260 /* no modificator allowed by gcc */
261 identifier.type = gras_datadesc_by_name("float");
263 } else if (!strcmp(identifier.type_name, "double")) {
264 if (identifier.tm.is_long)
265 PARSE_ERROR0("long double not portable and thus not handled");
267 identifier.type = gras_datadesc_by_name("double");
269 } else { /* signed integer elemental */
270 if (!strcmp(identifier.type_name,"int")) {
271 if (identifier.tm.is_long == 2) {
272 identifier.type = gras_datadesc_by_name("signed long long int");
273 } else if (identifier.tm.is_long) {
274 identifier.type = gras_datadesc_by_name("signed long int");
275 } else if (identifier.tm.is_short) {
276 identifier.type = gras_datadesc_by_name("signed short int");
278 identifier.type = gras_datadesc_by_name("int");
281 } else if (!strcmp(identifier.type_name, "char")) {
282 identifier.type = gras_datadesc_by_name("char");
285 DEBUG1("Base type is a constructed one (%s)",identifier.type_name);
286 identifier.type = gras_datadesc_by_name(identifier.type_name);
287 if (!identifier.type)
288 PARSE_ERROR1("Unknown base type '%s'",identifier.type_name);
291 /* Now identifier.type and identifier.name speak about the base type.
292 Stars are not eaten unless 'int' was omitted.
293 We will have to enhance it if we are in fact asked for array or reference */
295 /**** look for the symbols of this type ****/
296 for(expect_id_separator = 0;
298 (/*(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_EMPTY) && FIXME*/
299 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_SEMI_COLON)) ;
301 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump() ) {
303 if(expect_id_separator) {
304 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
305 expect_id_separator = 0;
308 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_LB) {
309 /* Handle fixed size arrays */
310 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
311 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RB) {
312 PARSE_ERROR0("Cannot deal with [] constructs (yet)");
314 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
316 long int size=strtol(gras_ddt_parse_text, &end, 10);
318 if (end == gras_ddt_parse_text || *end != '\0') {
319 /* Not a number. Get the constant value, if any */
320 int *storage=xbt_dict_get_or_null(gras_dd_constants,gras_ddt_parse_text);
324 PARSE_ERROR1("Unparsable size of array. Found '%s', expected number or known constant. Need to use gras_datadesc_set_const(), huh?",
325 gras_ddt_parse_text);
329 /* replace the previously pushed type to an array of it */
330 change_to_fixed_array(identifiers,size);
332 /* eat the closing bracket */
333 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
334 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RB)
335 PARSE_ERROR0("Unparsable size of array");
336 DEBUG1("Fixed size array, size=%ld",size);
339 PARSE_ERROR0("Unparsable size of array");
341 /* End of fixed size arrays handling */
343 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
344 /* Handle annotation */
345 s_identifier_t array;
346 char *keyname = NULL;
348 memset(&array,0,sizeof(array));
349 if (strcmp(gras_ddt_parse_text,"GRAS_ANNOTE"))
350 PARSE_ERROR1("Unparsable symbol: Expected 'GRAS_ANNOTE', got '%s'",gras_ddt_parse_text);
352 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
353 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LP)
354 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
356 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
358 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
359 PARSE_ERROR1("Unparsable annotation: Expected key name, got '%s'",gras_ddt_parse_text);
360 keyname = (char*)strdup(gras_ddt_parse_text);
362 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
364 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_COLON)
365 PARSE_ERROR1("Unparsable annotation: expected ',' after the key name, got '%s'",gras_ddt_parse_text);
367 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
371 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
372 PARSE_ERROR1("Unparsable annotation: Expected key value, got '%s'",gras_ddt_parse_text);
373 keyval = (char*)strdup(gras_ddt_parse_text);
375 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
377 /* Done with parsing the annotation. Now deal with it by replacing previously pushed type with the right one */
379 DEBUG2("Anotation: %s=%s",keyname,keyval);
380 if (!strcmp(keyname,"size")) {
382 if (!identifier.tm.is_ref)
383 PARSE_ERROR0("Size annotation for a field not being a reference");
384 identifier.tm.is_ref--;
386 if (!strcmp(keyval,"1")) {
387 change_to_ref(identifiers);
392 for (p = keyval; *p != '\0'; p++)
396 change_to_fixed_array(identifiers,atoi(keyval));
397 change_to_ref(identifiers);
401 change_to_ref_pop_array(identifiers);
402 xbt_dynar_push(fields_to_push,&keyval);
406 PARSE_ERROR1("Unknown annotation type: '%s'",keyname);
409 /* Get all the multipliers */
410 while (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
412 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
414 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
415 PARSE_ERROR1("Unparsable annotation: Expected field name after '*', got '%s'",gras_ddt_parse_text);
417 keyval = xbt_malloc(strlen(gras_ddt_parse_text)+2);
418 sprintf(keyval,"*%s",gras_ddt_parse_text);
420 /* ask caller to push field as a multiplier */
421 xbt_dynar_push(fields_to_push,&keyval);
423 /* skip blanks after this block*/
424 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump())
425 == GRAS_DDT_PARSE_TOKEN_EMPTY );
428 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RP)
429 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",
430 gras_ddt_parse_text);
434 /* End of annotation handling */
436 PARSE_ERROR1("Unparsable symbol: Got '%s' instead of expected comma (',')",gras_ddt_parse_text);
438 } else if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
439 PARSE_ERROR0("Unparsable symbol: Unexpected comma (',')");
442 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
443 identifier.tm.is_ref++; /* We indeed deal with multiple references with multiple annotations */
447 /* found a symbol name. Build the type and push it to dynar */
448 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
450 identifier.name=(char*)strdup(gras_ddt_parse_text);
451 DEBUG1("Found the identifier \"%s\"",identifier.name);
453 xbt_dynar_push(identifiers, &identifier);
454 DEBUG1("Dynar_len=%lu",xbt_dynar_length(identifiers));
455 expect_id_separator = 1;
459 PARSE_ERROR0("Unparasable symbol (maybe a def struct in a def struct or a parser bug ;)");
465 static gras_datadesc_type_t parse_struct(char *definition) {
470 static int anonymous_struct=0;
472 xbt_dynar_t identifiers;
473 s_identifier_t field;
477 xbt_dynar_t fields_to_push;
480 gras_datadesc_type_t struct_type;
483 identifiers = xbt_dynar_new(sizeof(s_identifier_t),NULL);
484 fields_to_push = xbt_dynar_new(sizeof(char*),NULL);
486 /* Create the struct descriptor */
487 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
488 struct_type = gras_datadesc_struct(gras_ddt_parse_text);
489 VERB1("Parse the struct '%s'", gras_ddt_parse_text);
490 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
492 sprintf(buffname,"anonymous struct %d",anonymous_struct++);
493 VERB1("Parse the anonymous struct nb %d", anonymous_struct);
494 struct_type = gras_datadesc_struct(buffname);
497 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LA)
498 PARSE_ERROR1("Unparasable symbol: Expecting struct definition, but got %s instead of '{'",
499 gras_ddt_parse_text);
501 /* Parse the identifiers */
505 parse_statement(definition,identifiers,fields_to_push);
507 if (e.category != mismatch_error)
513 DEBUG1("This statement contained %lu identifiers",xbt_dynar_length(identifiers));
514 /* append the identifiers we've found */
515 xbt_dynar_foreach(identifiers,i, field) {
517 PARSE_ERROR2("Not enough GRAS_ANNOTATE to deal with all dereferencing levels of %s (%d '*' left)",
518 field.name,field.tm.is_ref);
520 VERB2("Append field '%s' to %p",field.name, (void*)struct_type);
521 gras_datadesc_struct_append(struct_type, field.name, field.type);
523 free(field.type_name);
526 xbt_dynar_reset(identifiers);
527 DEBUG1("struct_type=%p",(void*)struct_type);
529 /* Make sure that all fields declaring a size push it into the cbps */
530 xbt_dynar_foreach(fields_to_push,i, name) {
531 DEBUG1("struct_type=%p",(void*)struct_type);
532 if (name[0] == '*') {
533 VERB2("Push field '%s' as a multiplier into size stack of %p",
534 name+1, (void*)struct_type);
535 gras_datadesc_cb_field_push_multiplier(struct_type, name+1);
537 VERB2("Push field '%s' into size stack of %p",
538 name, (void*)struct_type);
539 gras_datadesc_cb_field_push(struct_type, name);
543 xbt_dynar_reset(fields_to_push);
545 gras_datadesc_struct_close(struct_type);
548 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RA)
549 PARSE_ERROR1("Unparasable symbol: Expected '}' at the end of struct definition, got '%s'",
550 gras_ddt_parse_text);
552 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
554 xbt_dynar_free(&identifiers);
555 xbt_dynar_free(&fields_to_push);
560 static gras_datadesc_type_t parse_typedef(char *definition) {
562 s_type_modifier_t tm;
564 gras_datadesc_type_t struct_desc=NULL;
565 gras_datadesc_type_t typedef_desc=NULL;
568 memset(&tm,0,sizeof(tm));
570 /* get the aliased type */
571 parse_type_modifier(&tm);
574 struct_desc = parse_struct(definition);
577 parse_type_modifier(&tm);
580 PARSE_ERROR0("Cannot handle reference without annotation");
582 /* get the aliasing name */
583 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
584 PARSE_ERROR1("Unparsable typedef: Expected the alias name, and got '%s'",
585 gras_ddt_parse_text);
587 /* (FIXME: should) build the alias */
588 PARSE_ERROR0("Cannot handle typedef yet");
596 * gras_datadesc_parse:
598 * Create a datadescription from the result of parsing the C type description
601 gras_datadesc_parse(const char *name,
602 const char *C_statement) {
604 gras_datadesc_type_t res=NULL;
606 int semicolon_count=0;
607 int def_count,C_count;
610 /* reput the \n in place for debug */
611 for (C_count=0; C_statement[C_count] != '\0'; C_count++)
612 if (C_statement[C_count] == ';' || C_statement[C_count] == '{')
614 definition = (char*)xbt_malloc(C_count + semicolon_count + 1);
615 for (C_count=0,def_count=0; C_statement[C_count] != '\0'; C_count++) {
616 definition[def_count++] = C_statement[C_count];
617 if (C_statement[C_count] == ';' || C_statement[C_count] == '{') {
618 definition[def_count++] = '\n';
621 definition[def_count] = '\0';
624 VERB2("_gras_ddt_type_parse(%s) -> %d chars",definition, def_count);
625 gras_ddt_parse_pointer_string_init(definition);
627 /* Do I have a typedef, or a raw struct ?*/
628 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
630 if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"struct"))) {
631 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
632 res = parse_struct(definition);
634 } else if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"typedef"))) {
635 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
636 res = parse_typedef(definition);
639 ERROR1("Failed to parse the following symbol (not a struct neither a typedef) :\n%s",definition);
643 gras_ddt_parse_pointer_string_close();
644 VERB0("end of _gras_ddt_type_parse()");
646 /* register it under the name provided as symbol */
647 if (strcmp(res->name,name)) {
648 ERROR2("In GRAS_DEFINE_TYPE, the provided symbol (here %s) must be the C type name (here %s)",
652 gras_ddt_parse_lex_destroy();
657 xbt_dict_t gras_dd_constants;
658 /** \brief Declare a constant to the parsing mecanism. See the "\#define and fixed size array" section */
659 void gras_datadesc_set_const(const char*name, int value) {
660 int *stored = xbt_new(int, 1);
663 xbt_dict_set(gras_dd_constants,name, stored, free);