3 /* DataDesc/ddt_parse.c -- automatic parsing of data structures */
5 /* Copyright (c) 2003 Arnaud Legrand. */
6 /* Copyright (c) 2003, 2004 Martin Quinson. */
7 /* All rights reserved. */
9 /* This program is free software; you can redistribute it and/or modify it
10 * under the terms of the license (GNU LGPL) which comes with this package. */
12 #include <ctype.h> /* isdigit */
15 #include "gras/DataDesc/datadesc_private.h"
16 #include "gras/DataDesc/ddt_parse.yy.h"
18 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(ddt_parse,datadesc,
19 "Parsing C data structures to build GRAS data description");
21 typedef struct s_type_modifier{
31 } s_type_modifier_t,*type_modifier_t;
33 typedef struct s_field {
34 gras_datadesc_type_t type;
40 extern char *gras_ddt_parse_text; /* text being considered in the parser */
43 static void parse_type_modifier(type_modifier_t type_modifier) {
46 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
47 /* This only used when parsing 'short *' since this function returns when int, float, double,... is encountered */
48 DEBUG0("This is a reference");
49 type_modifier->is_ref++;
51 } else if (!strcmp(gras_ddt_parse_text,"unsigned")) {
52 DEBUG0("This is an unsigned");
53 type_modifier->is_unsigned = 1;
55 } else if (!strcmp(gras_ddt_parse_text,"short")) {
56 DEBUG0("This is short");
57 type_modifier->is_short = 1;
59 } else if (!strcmp(gras_ddt_parse_text,"long")) {
60 DEBUG0("This is long");
61 type_modifier->is_long++; /* handle "long long" */
63 } else if (!strcmp(gras_ddt_parse_text,"struct")) {
64 DEBUG0("This is a struct");
65 type_modifier->is_struct = 1;
67 } else if (!strcmp(gras_ddt_parse_text,"union")) {
68 DEBUG0("This is an union");
69 type_modifier->is_union = 1;
71 } else if (!strcmp(gras_ddt_parse_text,"enum")) {
72 DEBUG0("This is an enum");
73 type_modifier->is_enum = 1;
75 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_EMPTY) {
79 DEBUG1("Done with modifiers (got %s)",gras_ddt_parse_text);
83 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
84 if((gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD) &&
85 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_STAR)) {
86 DEBUG2("Done with modifiers (got %s,%d)",gras_ddt_parse_text,gras_ddt_parse_tok_num);
93 static void print_type_modifier(s_type_modifier_t tm) {
97 if (tm.is_unsigned) printf("(unsigned) ");
98 if (tm.is_short) printf("(short) ");
99 for (i=0 ; i<tm.is_long ; i++) printf("(long) ");
101 if(tm.is_struct) printf("(struct) ");
102 if(tm.is_enum) printf("(enum) ");
103 if(tm.is_union) printf("(union) ");
105 for (i=0 ; i<tm.is_ref ; i++) printf("(ref) ");
109 static void change_to_fixed_array(xbt_dynar_t dynar, long int size) {
110 s_identifier_t former,array;
111 memset(&array,0,sizeof(array));
114 xbt_dynar_pop(dynar,&former);
115 array.type_name=(char*)xbt_malloc(strlen(former.type->name)+48);
116 DEBUG2("Array specification (size=%ld, elm='%s'), change pushed type",
117 size,former.type_name);
118 sprintf(array.type_name,"%s%s%s%s[%ld]",
119 (former.tm.is_unsigned?"u ":""),
120 (former.tm.is_short?"s ":""),
121 (former.tm.is_long?"l ":""),
124 free(former.type_name);
126 array.type = gras_datadesc_array_fixed(array.type_name, former.type, size); /* redeclaration are ignored */
127 array.name = former.name;
129 xbt_dynar_push(dynar,&array);
132 static void change_to_ref(xbt_dynar_t dynar) {
133 s_identifier_t former,ref;
134 memset(&ref,0,sizeof(ref));
137 xbt_dynar_pop(dynar,&former);
138 ref.type_name=(char*)xbt_malloc(strlen(former.type->name)+2);
139 DEBUG1("Ref specification (elm='%s'), change pushed type", former.type_name);
140 sprintf(ref.type_name,"%s*",former.type_name);
141 free(former.type_name);
143 ref.type = gras_datadesc_ref(ref.type_name, former.type); /* redeclaration are ignored */
144 ref.name = former.name;
146 xbt_dynar_push(dynar,&ref);
150 static void change_to_ref_pop_array(xbt_dynar_t dynar) {
151 s_identifier_t former,ref;
152 memset(&ref,0,sizeof(ref));
155 xbt_dynar_pop(dynar,&former);
156 ref.type = gras_datadesc_ref_pop_arr(former.type); /* redeclaration are ignored */
157 ref.type_name = (char*)strdup(ref.type->name);
158 ref.name = former.name;
160 free(former.type_name);
162 xbt_dynar_push(dynar,&ref);
166 static void parse_statement(char *definition,
167 xbt_dynar_t identifiers,
168 xbt_dynar_t fields_to_push) {
171 s_identifier_t identifier;
173 int expect_id_separator = 0;
176 memset(&identifier,0,sizeof(identifier));
178 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
179 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RA) {
181 THROW0(mismatch_error,0,"End of the englobing structure or union");
184 if (XBT_LOG_ISENABLED(ddt_parse,xbt_log_priority_debug)) {
186 for (colon_pos = gras_ddt_parse_col_pos;
187 definition[colon_pos] != ';';
189 definition[colon_pos] = '\0';
190 DEBUG3("Parse the statement \"%s%s;\" (col_pos=%d)",
192 definition+gras_ddt_parse_col_pos,
193 gras_ddt_parse_col_pos);
194 definition[colon_pos] = ';';
197 if(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
198 PARSE_ERROR1("Unparsable symbol: found a typeless statement (got '%s' instead)",
199 gras_ddt_parse_text);
201 /**** get the type modifier of this statement ****/
202 parse_type_modifier(&identifier.tm);
204 /* FIXME: This does not detect recursive definitions at all? */
205 if (identifier.tm.is_union || identifier.tm.is_enum || identifier.tm.is_struct)
206 PARSE_ERROR0("Cannot handle recursive type definition yet");
208 /**** get the base type, giving "short a" the needed love ****/
209 if (!identifier.tm.is_union &&
210 !identifier.tm.is_enum &&
211 !identifier.tm.is_struct &&
213 (identifier.tm.is_short || identifier.tm.is_long || identifier.tm.is_unsigned) &&
215 strcmp(gras_ddt_parse_text,"char") &&
216 strcmp(gras_ddt_parse_text,"float") &&
217 strcmp(gras_ddt_parse_text,"double") &&
218 strcmp(gras_ddt_parse_text,"int") ) {
220 /* bastard user, they omited "int" ! */
221 identifier.type_name=(char*)strdup("int");
222 DEBUG0("the base type is 'int', which were omited (you vicious user)");
224 identifier.type_name=(char*)strdup(gras_ddt_parse_text);
225 DEBUG1("the base type is '%s'",identifier.type_name);
226 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
229 /**** build the base type for latter use ****/
230 if (identifier.tm.is_union) {
231 PARSE_ERROR0("Cannot handle union yet (get callback from annotation?)");
233 } else if (identifier.tm.is_enum) {
234 PARSE_ERROR0("Cannot handle enum yet");
236 } else if (identifier.tm.is_struct) {
237 sprintf(buffname,"struct %s",identifier.type_name);
238 identifier.type = gras_datadesc_struct(buffname); /* Get created when does not exist */
240 } else if (identifier.tm.is_unsigned) {
241 if (!strcmp(identifier.type_name,"int")) {
242 if (identifier.tm.is_long == 2) {
243 identifier.type = gras_datadesc_by_name("unsigned long long int");
244 } else if (identifier.tm.is_long) {
245 identifier.type = gras_datadesc_by_name("unsigned long int");
246 } else if (identifier.tm.is_short) {
247 identifier.type = gras_datadesc_by_name("unsigned short int");
249 identifier.type = gras_datadesc_by_name("unsigned int");
252 } else if (!strcmp(identifier.type_name, "char")) {
253 identifier.type = gras_datadesc_by_name("unsigned char");
255 } else { /* impossible, gcc parses this shit before us */
259 } else if (!strcmp(identifier.type_name, "float")) {
260 /* no modificator allowed by gcc */
261 identifier.type = gras_datadesc_by_name("float");
263 } else if (!strcmp(identifier.type_name, "double")) {
264 if (identifier.tm.is_long)
265 PARSE_ERROR0("long double not portable and thus not handled");
267 identifier.type = gras_datadesc_by_name("double");
269 } else { /* signed integer elemental */
270 if (!strcmp(identifier.type_name,"int")) {
271 if (identifier.tm.is_long == 2) {
272 identifier.type = gras_datadesc_by_name("signed long long int");
273 } else if (identifier.tm.is_long) {
274 identifier.type = gras_datadesc_by_name("signed long int");
275 } else if (identifier.tm.is_short) {
276 identifier.type = gras_datadesc_by_name("signed short int");
278 identifier.type = gras_datadesc_by_name("int");
281 } else if (!strcmp(identifier.type_name, "char")) {
282 identifier.type = gras_datadesc_by_name("char");
285 DEBUG1("Base type is a constructed one (%s)",identifier.type_name);
286 identifier.type = gras_datadesc_by_name(identifier.type_name);
287 if (!identifier.type)
288 PARSE_ERROR1("Unknown base type '%s'",identifier.type_name);
291 /* Now identifier.type and identifier.name speak about the base type.
292 Stars are not eaten unless 'int' was omitted.
293 We will have to enhance it if we are in fact asked for array or reference */
295 /**** look for the symbols of this type ****/
296 for(expect_id_separator = 0;
298 (/*(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_EMPTY) && FIXME*/
299 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_SEMI_COLON)) ;
301 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump() ) {
303 if(expect_id_separator) {
304 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
305 expect_id_separator = 0;
308 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_LB) {
309 /* Handle fixed size arrays */
310 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
311 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RB) {
312 PARSE_ERROR0("Cannot deal with [] constructs (yet)");
314 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
316 long int size=strtol(gras_ddt_parse_text, &end, 10);
318 if (end == gras_ddt_parse_text || *end != '\0')
319 PARSE_ERROR1("Unparsable size of array (found '%c', expected number)",*end);
321 /* replace the previously pushed type to an array of it */
322 change_to_fixed_array(identifiers,size);
324 /* eat the closing bracket */
325 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
326 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RB)
327 PARSE_ERROR0("Unparsable size of array");
328 DEBUG1("Fixed size array, size=%ld",size);
331 PARSE_ERROR0("Unparsable size of array");
333 /* End of fixed size arrays handling */
335 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
336 /* Handle annotation */
337 s_identifier_t array;
338 char *keyname = NULL;
340 memset(&array,0,sizeof(array));
341 if (strcmp(gras_ddt_parse_text,"GRAS_ANNOTE"))
342 PARSE_ERROR1("Unparsable symbol: Expected 'GRAS_ANNOTE', got '%s'",gras_ddt_parse_text);
344 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
345 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LP)
346 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
348 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
350 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
351 PARSE_ERROR1("Unparsable annotation: Expected key name, got '%s'",gras_ddt_parse_text);
352 keyname = (char*)strdup(gras_ddt_parse_text);
354 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
356 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_COLON)
357 PARSE_ERROR1("Unparsable annotation: expected ',' after the key name, got '%s'",gras_ddt_parse_text);
359 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
363 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
364 PARSE_ERROR1("Unparsable annotation: Expected key value, got '%s'",gras_ddt_parse_text);
365 keyval = (char*)strdup(gras_ddt_parse_text);
367 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
369 /* Done with parsing the annotation. Now deal with it by replacing previously pushed type with the right one */
371 DEBUG2("Anotation: %s=%s",keyname,keyval);
372 if (!strcmp(keyname,"size")) {
374 if (!identifier.tm.is_ref)
375 PARSE_ERROR0("Size annotation for a field not being a reference");
376 identifier.tm.is_ref--;
378 if (!strcmp(keyval,"1")) {
379 change_to_ref(identifiers);
384 for (p = keyval; *p != '\0'; p++)
388 change_to_fixed_array(identifiers,atoi(keyval));
389 change_to_ref(identifiers);
393 change_to_ref_pop_array(identifiers);
394 xbt_dynar_push(fields_to_push,&keyval);
398 PARSE_ERROR1("Unknown annotation type: '%s'",keyname);
401 /* Get all the multipliers */
402 while (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
404 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
406 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
407 PARSE_ERROR1("Unparsable annotation: Expected field name after '*', got '%s'",gras_ddt_parse_text);
409 keyval = xbt_malloc(strlen(gras_ddt_parse_text)+2);
410 sprintf(keyval,"*%s",gras_ddt_parse_text);
412 /* ask caller to push field as a multiplier */
413 xbt_dynar_push(fields_to_push,&keyval);
415 /* skip blanks after this block*/
416 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump())
417 == GRAS_DDT_PARSE_TOKEN_EMPTY );
420 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RP)
421 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",
422 gras_ddt_parse_text);
426 /* End of annotation handling */
428 PARSE_ERROR1("Unparsable symbol: Got '%s' instead of expected comma (',')",gras_ddt_parse_text);
430 } else if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
431 PARSE_ERROR0("Unparsable symbol: Unexpected comma (',')");
434 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
435 identifier.tm.is_ref++; /* We indeed deal with multiple references with multiple annotations */
439 /* found a symbol name. Build the type and push it to dynar */
440 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
442 identifier.name=(char*)strdup(gras_ddt_parse_text);
443 DEBUG1("Found the identifier \"%s\"",identifier.name);
445 xbt_dynar_push(identifiers, &identifier);
446 DEBUG1("Dynar_len=%lu",xbt_dynar_length(identifiers));
447 expect_id_separator = 1;
451 PARSE_ERROR0("Unparasable symbol (maybe a def struct in a def struct or a parser bug ;)");
457 static gras_datadesc_type_t parse_struct(char *definition) {
462 static int anonymous_struct=0;
464 xbt_dynar_t identifiers;
465 s_identifier_t field;
469 xbt_dynar_t fields_to_push;
472 gras_datadesc_type_t struct_type;
475 identifiers = xbt_dynar_new(sizeof(s_identifier_t),NULL);
476 fields_to_push = xbt_dynar_new(sizeof(char*),NULL);
478 /* Create the struct descriptor */
479 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
480 struct_type = gras_datadesc_struct(gras_ddt_parse_text);
481 VERB1("Parse the struct '%s'", gras_ddt_parse_text);
482 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
484 sprintf(buffname,"anonymous struct %d",anonymous_struct++);
485 VERB1("Parse the anonymous struct nb %d", anonymous_struct);
486 struct_type = gras_datadesc_struct(buffname);
489 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LA)
490 PARSE_ERROR1("Unparasable symbol: Expecting struct definition, but got %s instead of '{'",
491 gras_ddt_parse_text);
493 /* Parse the identifiers */
497 parse_statement(definition,identifiers,fields_to_push);
499 if (e.category != mismatch_error)
505 DEBUG1("This statement contained %lu identifiers",xbt_dynar_length(identifiers));
506 /* append the identifiers we've found */
507 xbt_dynar_foreach(identifiers,i, field) {
509 PARSE_ERROR2("Not enough GRAS_ANNOTATE to deal with all dereferencing levels of %s (%d '*' left)",
510 field.name,field.tm.is_ref);
512 VERB2("Append field '%s' to %p",field.name, (void*)struct_type);
513 gras_datadesc_struct_append(struct_type, field.name, field.type);
515 free(field.type_name);
518 xbt_dynar_reset(identifiers);
519 DEBUG1("struct_type=%p",(void*)struct_type);
521 /* Make sure that all fields declaring a size push it into the cbps */
522 xbt_dynar_foreach(fields_to_push,i, name) {
523 DEBUG1("struct_type=%p",(void*)struct_type);
524 if (name[0] == '*') {
525 VERB2("Push field '%s' as a multiplier into size stack of %p",
526 name+1, (void*)struct_type);
527 gras_datadesc_cb_field_push_multiplier(struct_type, name+1);
529 VERB2("Push field '%s' into size stack of %p",
530 name, (void*)struct_type);
531 gras_datadesc_cb_field_push(struct_type, name);
535 xbt_dynar_reset(fields_to_push);
537 gras_datadesc_struct_close(struct_type);
540 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RA)
541 PARSE_ERROR1("Unparasable symbol: Expected '}' at the end of struct definition, got '%s'",
542 gras_ddt_parse_text);
544 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
546 xbt_dynar_free(&identifiers);
547 xbt_dynar_free(&fields_to_push);
552 static gras_datadesc_type_t parse_typedef(char *definition) {
554 s_type_modifier_t tm;
556 gras_datadesc_type_t struct_desc=NULL;
557 gras_datadesc_type_t typedef_desc=NULL;
560 memset(&tm,0,sizeof(tm));
562 /* get the aliased type */
563 parse_type_modifier(&tm);
566 struct_desc = parse_struct(definition);
569 parse_type_modifier(&tm);
572 PARSE_ERROR0("Cannot handle reference without annotation");
574 /* get the aliasing name */
575 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
576 PARSE_ERROR1("Unparsable typedef: Expected the alias name, and got '%s'",
577 gras_ddt_parse_text);
579 /* (FIXME: should) build the alias */
580 PARSE_ERROR0("Cannot handle typedef yet");
588 * gras_datadesc_parse:
590 * Create a datadescription from the result of parsing the C type description
593 gras_datadesc_parse(const char *name,
594 const char *C_statement) {
596 gras_datadesc_type_t res=NULL;
598 int semicolon_count=0;
599 int def_count,C_count;
602 /* reput the \n in place for debug */
603 for (C_count=0; C_statement[C_count] != '\0'; C_count++)
604 if (C_statement[C_count] == ';' || C_statement[C_count] == '{')
606 definition = (char*)xbt_malloc(C_count + semicolon_count + 1);
607 for (C_count=0,def_count=0; C_statement[C_count] != '\0'; C_count++) {
608 definition[def_count++] = C_statement[C_count];
609 if (C_statement[C_count] == ';' || C_statement[C_count] == '{') {
610 definition[def_count++] = '\n';
613 definition[def_count] = '\0';
616 VERB2("_gras_ddt_type_parse(%s) -> %d chars",definition, def_count);
617 gras_ddt_parse_pointer_string_init(definition);
619 /* Do I have a typedef, or a raw struct ?*/
620 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
622 if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"struct"))) {
623 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
624 res = parse_struct(definition);
626 } else if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"typedef"))) {
627 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
628 res = parse_typedef(definition);
631 ERROR1("Failed to parse the following symbol (not a struct neither a typedef) :\n%s",definition);
635 gras_ddt_parse_pointer_string_close();
636 VERB0("end of _gras_ddt_type_parse()");
638 /* register it under the name provided as symbol */
639 if (strcmp(res->name,name)) {
640 ERROR2("In GRAS_DEFINE_TYPE, the provided symbol (here %s) must be the C type name (here %s)",
644 gras_ddt_parse_lex_destroy();