3 /* DataDesc/ddt_parse.c -- automatic parsing of data structures */
5 /* Copyright (c) 2003 Arnaud Legrand. */
6 /* Copyright (c) 2003, 2004 Martin Quinson. */
7 /* All rights reserved. */
9 /* This program is free software; you can redistribute it and/or modify it
10 * under the terms of the license (GNU LGPL) which comes with this package. */
12 #include <ctype.h> /* isdigit */
14 #include "gras/DataDesc/datadesc_private.h"
15 #include "gras/DataDesc/ddt_parse.yy.h"
17 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(ddt_parse,datadesc,
18 "Parsing C data structures to build GRAS data description");
20 typedef struct s_type_modifier{
30 } s_type_modifier_t,*type_modifier_t;
32 typedef struct s_field {
33 gras_datadesc_type_t type;
39 extern char *gras_ddt_parse_text; /* text being considered in the parser */
42 static void parse_type_modifier(type_modifier_t type_modifier) {
45 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
46 /* This only used when parsing 'short *' since this function returns when int, float, double,... is encountered */
47 DEBUG0("This is a reference");
48 type_modifier->is_ref++;
50 } else if (!strcmp(gras_ddt_parse_text,"unsigned")) {
51 DEBUG0("This is an unsigned");
52 type_modifier->is_unsigned = 1;
54 } else if (!strcmp(gras_ddt_parse_text,"short")) {
55 DEBUG0("This is short");
56 type_modifier->is_short = 1;
58 } else if (!strcmp(gras_ddt_parse_text,"long")) {
59 DEBUG0("This is long");
60 type_modifier->is_long++; /* handle "long long" */
62 } else if (!strcmp(gras_ddt_parse_text,"struct")) {
63 DEBUG0("This is a struct");
64 type_modifier->is_struct = 1;
66 } else if (!strcmp(gras_ddt_parse_text,"union")) {
67 DEBUG0("This is an union");
68 type_modifier->is_union = 1;
70 } else if (!strcmp(gras_ddt_parse_text,"enum")) {
71 DEBUG0("This is an enum");
72 type_modifier->is_enum = 1;
74 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_EMPTY) {
78 DEBUG1("Done with modifiers (got %s)",gras_ddt_parse_text);
82 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
83 if((gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD) &&
84 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_STAR)) {
85 DEBUG2("Done with modifiers (got %s,%d)",gras_ddt_parse_text,gras_ddt_parse_tok_num);
92 static void print_type_modifier(s_type_modifier_t tm) {
96 if (tm.is_unsigned) printf("(unsigned) ");
97 if (tm.is_short) printf("(short) ");
98 for (i=0 ; i<tm.is_long ; i++) printf("(long) ");
100 if(tm.is_struct) printf("(struct) ");
101 if(tm.is_enum) printf("(enum) ");
102 if(tm.is_union) printf("(union) ");
104 for (i=0 ; i<tm.is_ref ; i++) printf("(ref) ");
108 static void change_to_fixed_array(xbt_dynar_t dynar, long int size) {
109 s_identifier_t former,array;
110 memset(&array,0,sizeof(array));
113 xbt_dynar_pop(dynar,&former);
114 array.type_name=(char*)xbt_malloc(strlen(former.type->name)+48);
115 DEBUG2("Array specification (size=%ld, elm='%s'), change pushed type",
116 size,former.type_name);
117 sprintf(array.type_name,"%s%s%s%s[%ld]",
118 (former.tm.is_unsigned?"u ":""),
119 (former.tm.is_short?"s ":""),
120 (former.tm.is_long?"l ":""),
123 free(former.type_name);
125 array.type = gras_datadesc_array_fixed(array.type_name, former.type, size); /* redeclaration are ignored */
126 array.name = former.name;
128 xbt_dynar_push(dynar,&array);
131 static void change_to_ref(xbt_dynar_t dynar) {
132 s_identifier_t former,ref;
133 memset(&ref,0,sizeof(ref));
136 xbt_dynar_pop(dynar,&former);
137 ref.type_name=(char*)xbt_malloc(strlen(former.type->name)+2);
138 DEBUG1("Ref specification (elm='%s'), change pushed type", former.type_name);
139 sprintf(ref.type_name,"%s*",former.type_name);
140 free(former.type_name);
142 ref.type = gras_datadesc_ref(ref.type_name, former.type); /* redeclaration are ignored */
143 ref.name = former.name;
145 xbt_dynar_push(dynar,&ref);
149 static void change_to_ref_pop_array(xbt_dynar_t dynar) {
150 s_identifier_t former,ref;
151 memset(&ref,0,sizeof(ref));
154 xbt_dynar_pop(dynar,&former);
155 ref.type = gras_datadesc_ref_pop_arr(former.type); /* redeclaration are ignored */
156 ref.type_name = (char*)strdup(ref.type->name);
157 ref.name = former.name;
159 free(former.type_name);
161 xbt_dynar_push(dynar,&ref);
165 static xbt_error_t parse_statement(char *definition,
166 xbt_dynar_t identifiers,
167 xbt_dynar_t fields_to_push) {
170 s_identifier_t identifier;
172 int expect_id_separator = 0;
175 memset(&identifier,0,sizeof(identifier));
177 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
178 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RA) {
180 return mismatch_error; /* end of the englobing structure or union */
183 if (XBT_LOG_ISENABLED(ddt_parse,xbt_log_priority_debug)) {
185 for (colon_pos = gras_ddt_parse_col_pos;
186 definition[colon_pos] != ';';
188 definition[colon_pos] = '\0';
189 DEBUG3("Parse the statement \"%s%s;\" (col_pos=%d)",
191 definition+gras_ddt_parse_col_pos,
192 gras_ddt_parse_col_pos);
193 definition[colon_pos] = ';';
196 if(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
197 PARSE_ERROR1("Unparsable symbol: found a typeless statement (got '%s' instead)",
198 gras_ddt_parse_text);
200 /**** get the type modifier of this statement ****/
201 parse_type_modifier(&identifier.tm);
203 /* FIXME: This does not detect recursive definitions at all? */
204 if (identifier.tm.is_union || identifier.tm.is_enum || identifier.tm.is_struct)
205 PARSE_ERROR0("Cannot handle recursive type definition yet");
207 /**** get the base type, giving "short a" the needed love ****/
208 if (!identifier.tm.is_union &&
209 !identifier.tm.is_enum &&
210 !identifier.tm.is_struct &&
212 (identifier.tm.is_short || identifier.tm.is_long || identifier.tm.is_unsigned) &&
214 strcmp(gras_ddt_parse_text,"char") &&
215 strcmp(gras_ddt_parse_text,"float") &&
216 strcmp(gras_ddt_parse_text,"double") &&
217 strcmp(gras_ddt_parse_text,"int") ) {
219 /* bastard user, they omited "int" ! */
220 identifier.type_name=(char*)strdup("int");
221 DEBUG0("the base type is 'int', which were omited (you vicious user)");
223 identifier.type_name=(char*)strdup(gras_ddt_parse_text);
224 DEBUG1("the base type is '%s'",identifier.type_name);
225 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
228 /**** build the base type for latter use ****/
229 if (identifier.tm.is_union) {
230 PARSE_ERROR0("Cannot handle union yet (get callback from annotation?)");
232 } else if (identifier.tm.is_enum) {
233 PARSE_ERROR0("Cannot handle enum yet");
235 } else if (identifier.tm.is_struct) {
236 sprintf(buffname,"struct %s",identifier.type_name);
237 identifier.type = gras_datadesc_struct(buffname); /* Get created when does not exist */
239 } else if (identifier.tm.is_unsigned) {
240 if (!strcmp(identifier.type_name,"int")) {
241 if (identifier.tm.is_long == 2) {
242 identifier.type = gras_datadesc_by_name("unsigned long long int");
243 } else if (identifier.tm.is_long) {
244 identifier.type = gras_datadesc_by_name("unsigned long int");
245 } else if (identifier.tm.is_short) {
246 identifier.type = gras_datadesc_by_name("unsigned short int");
248 identifier.type = gras_datadesc_by_name("unsigned int");
251 } else if (!strcmp(identifier.type_name, "char")) {
252 identifier.type = gras_datadesc_by_name("unsigned char");
254 } else { /* impossible, gcc parses this shit before us */
258 } else if (!strcmp(identifier.type_name, "float")) {
259 /* no modificator allowed by gcc */
260 identifier.type = gras_datadesc_by_name("float");
262 } else if (!strcmp(identifier.type_name, "double")) {
263 if (identifier.tm.is_long)
264 PARSE_ERROR0("long double not portable and thus not handled");
266 identifier.type = gras_datadesc_by_name("double");
268 } else { /* signed integer elemental */
269 if (!strcmp(identifier.type_name,"int")) {
270 if (identifier.tm.is_long == 2) {
271 identifier.type = gras_datadesc_by_name("signed long long int");
272 } else if (identifier.tm.is_long) {
273 identifier.type = gras_datadesc_by_name("signed long int");
274 } else if (identifier.tm.is_short) {
275 identifier.type = gras_datadesc_by_name("signed short int");
277 identifier.type = gras_datadesc_by_name("int");
280 } else if (!strcmp(identifier.type_name, "char")) {
281 identifier.type = gras_datadesc_by_name("char");
284 DEBUG1("Base type is a constructed one (%s)",identifier.type_name);
285 identifier.type = gras_datadesc_by_name(identifier.type_name);
286 if (!identifier.type)
287 PARSE_ERROR1("Unknown base type '%s'",identifier.type_name);
290 /* Now identifier.type and identifier.name speak about the base type.
291 Stars are not eaten unless 'int' was omitted.
292 We will have to enhance it if we are in fact asked for array or reference */
294 /**** look for the symbols of this type ****/
295 for(expect_id_separator = 0;
297 (/*(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_EMPTY) && FIXME*/
298 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_SEMI_COLON)) ;
300 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump() ) {
302 if(expect_id_separator) {
303 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
304 expect_id_separator = 0;
307 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_LB) {
308 /* Handle fixed size arrays */
309 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
310 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RB) {
311 PARSE_ERROR0("Cannot deal with [] constructs (yet)");
313 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
315 long int size=strtol(gras_ddt_parse_text, &end, 10);
317 if (end == gras_ddt_parse_text || *end != '\0')
318 PARSE_ERROR1("Unparsable size of array (found '%c', expected number)",*end);
320 /* replace the previously pushed type to an array of it */
321 change_to_fixed_array(identifiers,size);
323 /* eat the closing bracket */
324 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
325 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RB)
326 PARSE_ERROR0("Unparsable size of array");
327 DEBUG1("Fixed size array, size=%ld",size);
330 PARSE_ERROR0("Unparsable size of array");
332 /* End of fixed size arrays handling */
334 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
335 /* Handle annotation */
336 s_identifier_t array;
337 char *keyname = NULL;
339 memset(&array,0,sizeof(array));
340 if (strcmp(gras_ddt_parse_text,"GRAS_ANNOTE"))
341 PARSE_ERROR1("Unparsable symbol: Expected 'GRAS_ANNOTE', got '%s'",gras_ddt_parse_text);
343 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
344 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LP)
345 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
347 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
349 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
350 PARSE_ERROR1("Unparsable annotation: Expected key name, got '%s'",gras_ddt_parse_text);
351 keyname = (char*)strdup(gras_ddt_parse_text);
353 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
355 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_COLON)
356 PARSE_ERROR1("Unparsable annotation: expected ',' after the key name, got '%s'",gras_ddt_parse_text);
358 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
360 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
361 PARSE_ERROR1("Unparsable annotation: Expected key value, got '%s'",gras_ddt_parse_text);
362 keyval = (char*)strdup(gras_ddt_parse_text);
364 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
366 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RP)
367 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
369 /* Done with parsing the annotation. Now deal with it by replacing previously pushed type with the right one */
371 DEBUG2("Anotation: %s=%s",keyname,keyval);
372 if (!strcmp(keyname,"size")) {
374 if (!identifier.tm.is_ref)
375 PARSE_ERROR0("Size annotation for a field not being a reference");
376 identifier.tm.is_ref--;
378 if (!strcmp(keyval,"1")) {
379 change_to_ref(identifiers);
385 for (p = keyval; *p != '\0'; p++)
389 change_to_fixed_array(identifiers,atoi(keyval));
390 change_to_ref(identifiers);
395 change_to_ref_pop_array(identifiers);
396 xbt_dynar_push(fields_to_push,&keyval);
403 PARSE_ERROR1("Unknown annotation type: '%s'",keyname);
407 /* End of annotation handling */
409 PARSE_ERROR1("Unparsable symbol: Got '%s' instead of expected comma (',')",gras_ddt_parse_text);
411 } else if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
412 PARSE_ERROR0("Unparsable symbol: Unexpected comma (',')");
415 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
416 identifier.tm.is_ref++; /* We indeed deal with multiple references with multiple annotations */
420 /* found a symbol name. Build the type and push it to dynar */
421 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
423 identifier.name=(char*)strdup(gras_ddt_parse_text);
424 DEBUG1("Found the identifier \"%s\"",identifier.name);
426 xbt_dynar_push(identifiers, &identifier);
427 DEBUG1("Dynar_len=%lu",xbt_dynar_length(identifiers));
428 expect_id_separator = 1;
432 PARSE_ERROR0("Unparasable symbol (maybe a def struct in a def struct or a parser bug ;)");
439 static gras_datadesc_type_t parse_struct(char *definition) {
443 static int anonymous_struct=0;
445 xbt_dynar_t identifiers;
446 s_identifier_t field;
449 xbt_dynar_t fields_to_push;
452 gras_datadesc_type_t struct_type;
455 identifiers = xbt_dynar_new(sizeof(s_identifier_t),NULL);
456 fields_to_push = xbt_dynar_new(sizeof(char*),NULL);
458 /* Create the struct descriptor */
459 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
460 struct_type = gras_datadesc_struct(gras_ddt_parse_text);
461 VERB1("Parse the struct '%s'", gras_ddt_parse_text);
462 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
464 sprintf(buffname,"anonymous struct %d",anonymous_struct++);
465 VERB1("Parse the anonymous struct nb %d", anonymous_struct);
466 struct_type = gras_datadesc_struct(buffname);
469 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LA)
470 PARSE_ERROR1("Unparasable symbol: Expecting struct definition, but got %s instead of '{'",
471 gras_ddt_parse_text);
473 /* Parse the identifiers */
474 for (errcode=parse_statement(definition,identifiers,fields_to_push);
475 errcode == no_error ;
476 errcode=parse_statement(definition,identifiers,fields_to_push)) {
478 DEBUG1("This statement contained %lu identifiers",xbt_dynar_length(identifiers));
479 /* append the identifiers we've found */
480 xbt_dynar_foreach(identifiers,i, field) {
482 PARSE_ERROR2("Not enough GRAS_ANNOTATE to deal with all dereferencing levels of %s (%d '*' left)",
483 field.name,field.tm.is_ref);
485 VERB2("Append field '%s' to %p",field.name, (void*)struct_type);
486 gras_datadesc_struct_append(struct_type, field.name, field.type);
488 free(field.type_name);
491 xbt_dynar_reset(identifiers);
492 DEBUG1("struct_type=%p",(void*)struct_type);
494 /* Make sure that all fields declaring a size push it into the cbps */
495 xbt_dynar_foreach(fields_to_push,i, name) {
496 DEBUG1("struct_type=%p",(void*)struct_type);
497 VERB2("Push field '%s' into size stack of %p", name, (void*)struct_type);
498 gras_datadesc_cb_field_push(struct_type, name);
501 xbt_dynar_reset(fields_to_push);
503 gras_datadesc_struct_close(struct_type);
504 if (errcode != mismatch_error) {
506 return NULL; /* FIXME: LEAK! */
510 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RA)
511 PARSE_ERROR1("Unparasable symbol: Expected '}' at the end of struct definition, got '%s'",
512 gras_ddt_parse_text);
514 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
516 xbt_dynar_free(&identifiers);
517 xbt_dynar_free(&fields_to_push);
522 static gras_datadesc_type_t parse_typedef(char *definition) {
524 s_type_modifier_t tm;
526 gras_datadesc_type_t struct_desc=NULL;
527 gras_datadesc_type_t typedef_desc=NULL;
530 memset(&tm,0,sizeof(tm));
532 /* get the aliased type */
533 parse_type_modifier(&tm);
536 struct_desc = parse_struct(definition);
539 parse_type_modifier(&tm);
542 PARSE_ERROR0("Cannot handle reference without annotation");
544 /* get the aliasing name */
545 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
546 PARSE_ERROR1("Unparsable typedef: Expected the alias name, and got '%s'",
547 gras_ddt_parse_text);
549 /* (FIXME: should) build the alias */
550 PARSE_ERROR0("Cannot handle typedef yet");
558 * gras_datadesc_parse:
560 * Create a datadescription from the result of parsing the C type description
563 gras_datadesc_parse(const char *name,
564 const char *C_statement) {
566 gras_datadesc_type_t res=NULL;
568 int semicolon_count=0;
569 int def_count,C_count;
572 /* reput the \n in place for debug */
573 for (C_count=0; C_statement[C_count] != '\0'; C_count++)
574 if (C_statement[C_count] == ';' || C_statement[C_count] == '{')
576 definition = (char*)xbt_malloc(C_count + semicolon_count + 1);
577 for (C_count=0,def_count=0; C_statement[C_count] != '\0'; C_count++) {
578 definition[def_count++] = C_statement[C_count];
579 if (C_statement[C_count] == ';' || C_statement[C_count] == '{') {
580 definition[def_count++] = '\n';
583 definition[def_count] = '\0';
586 VERB2("_gras_ddt_type_parse(%s) -> %d chars",definition, def_count);
587 gras_ddt_parse_pointer_string_init(definition);
589 /* Do I have a typedef, or a raw struct ?*/
590 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
592 if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"struct"))) {
593 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
594 res = parse_struct(definition);
596 } else if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"typedef"))) {
597 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
598 res = parse_typedef(definition);
601 ERROR1("Failed to parse the following symbol (not a struct neither a typedef) :\n%s",definition);
605 gras_ddt_parse_pointer_string_close();
606 VERB0("end of _gras_ddt_type_parse()");
608 /* register it under the name provided as symbol */
609 if (strcmp(res->name,name)) {
610 ERROR2("In GRAS_DEFINE_TYPE, the provided symbol (here %s) must be the C type name (here %s)",
614 gras_ddt_parse_lex_destroy();