3 /* DataDesc/ddt_parse.c -- automatic parsing of data structures */
5 /* Copyright (c) 2003 Arnaud Legrand. */
6 /* Copyright (c) 2003, 2004 Martin Quinson. */
7 /* All rights reserved. */
9 /* This program is free software; you can redistribute it and/or modify it
10 * under the terms of the license (GNU LGPL) which comes with this package. */
12 #include <ctype.h> /* isdigit */
15 #include "gras/DataDesc/datadesc_private.h"
16 #include "gras/DataDesc/ddt_parse.yy.h"
18 XBT_LOG_NEW_DEFAULT_SUBCATEGORY(ddt_parse,datadesc,
19 "Parsing C data structures to build GRAS data description");
21 typedef struct s_type_modifier{
31 } s_type_modifier_t,*type_modifier_t;
33 typedef struct s_field {
34 gras_datadesc_type_t type;
40 extern char *gras_ddt_parse_text; /* text being considered in the parser */
43 static void parse_type_modifier(type_modifier_t type_modifier) {
46 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
47 /* This only used when parsing 'short *' since this function returns when int, float, double,... is encountered */
48 DEBUG0("This is a reference");
49 type_modifier->is_ref++;
51 } else if (!strcmp(gras_ddt_parse_text,"unsigned")) {
52 DEBUG0("This is an unsigned");
53 type_modifier->is_unsigned = 1;
55 } else if (!strcmp(gras_ddt_parse_text,"short")) {
56 DEBUG0("This is short");
57 type_modifier->is_short = 1;
59 } else if (!strcmp(gras_ddt_parse_text,"long")) {
60 DEBUG0("This is long");
61 type_modifier->is_long++; /* handle "long long" */
63 } else if (!strcmp(gras_ddt_parse_text,"struct")) {
64 DEBUG0("This is a struct");
65 type_modifier->is_struct = 1;
67 } else if (!strcmp(gras_ddt_parse_text,"union")) {
68 DEBUG0("This is an union");
69 type_modifier->is_union = 1;
71 } else if (!strcmp(gras_ddt_parse_text,"enum")) {
72 DEBUG0("This is an enum");
73 type_modifier->is_enum = 1;
75 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_EMPTY) {
79 DEBUG1("Done with modifiers (got %s)",gras_ddt_parse_text);
83 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
84 if((gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD) &&
85 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_STAR)) {
86 DEBUG2("Done with modifiers (got %s,%d)",gras_ddt_parse_text,gras_ddt_parse_tok_num);
93 static void print_type_modifier(s_type_modifier_t tm) {
97 if (tm.is_unsigned) printf("(unsigned) ");
98 if (tm.is_short) printf("(short) ");
99 for (i=0 ; i<tm.is_long ; i++) printf("(long) ");
101 if(tm.is_struct) printf("(struct) ");
102 if(tm.is_enum) printf("(enum) ");
103 if(tm.is_union) printf("(union) ");
105 for (i=0 ; i<tm.is_ref ; i++) printf("(ref) ");
109 static void change_to_fixed_array(xbt_dynar_t dynar, long int size) {
110 s_identifier_t former,array;
111 memset(&array,0,sizeof(array));
114 xbt_dynar_pop(dynar,&former);
115 array.type_name=(char*)xbt_malloc(strlen(former.type->name)+48);
116 DEBUG2("Array specification (size=%ld, elm='%s'), change pushed type",
117 size,former.type_name);
118 sprintf(array.type_name,"%s%s%s%s[%ld]",
119 (former.tm.is_unsigned?"u ":""),
120 (former.tm.is_short?"s ":""),
121 (former.tm.is_long?"l ":""),
124 free(former.type_name);
126 array.type = gras_datadesc_array_fixed(array.type_name, former.type, size); /* redeclaration are ignored */
127 array.name = former.name;
129 xbt_dynar_push(dynar,&array);
132 static void change_to_ref(xbt_dynar_t dynar) {
133 s_identifier_t former,ref;
134 memset(&ref,0,sizeof(ref));
137 xbt_dynar_pop(dynar,&former);
138 ref.type_name=(char*)xbt_malloc(strlen(former.type->name)+2);
139 DEBUG1("Ref specification (elm='%s'), change pushed type", former.type_name);
140 sprintf(ref.type_name,"%s*",former.type_name);
141 free(former.type_name);
143 ref.type = gras_datadesc_ref(ref.type_name, former.type); /* redeclaration are ignored */
144 ref.name = former.name;
146 xbt_dynar_push(dynar,&ref);
150 static void change_to_ref_pop_array(xbt_dynar_t dynar) {
151 s_identifier_t former,ref;
152 memset(&ref,0,sizeof(ref));
155 xbt_dynar_pop(dynar,&former);
156 ref.type = gras_datadesc_ref_pop_arr(former.type); /* redeclaration are ignored */
157 ref.type_name = (char*)strdup(ref.type->name);
158 ref.name = former.name;
160 free(former.type_name);
162 xbt_dynar_push(dynar,&ref);
166 static void parse_statement(char *definition,
167 xbt_dynar_t identifiers,
168 xbt_dynar_t fields_to_push) {
171 s_identifier_t identifier;
173 int expect_id_separator = 0;
176 memset(&identifier,0,sizeof(identifier));
178 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
179 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RA) {
181 THROW0(mismatch_error,0,"End of the englobing structure or union");
184 if (XBT_LOG_ISENABLED(ddt_parse,xbt_log_priority_debug)) {
186 for (colon_pos = gras_ddt_parse_col_pos;
187 definition[colon_pos] != ';';
189 definition[colon_pos] = '\0';
190 DEBUG3("Parse the statement \"%s%s;\" (col_pos=%d)",
192 definition+gras_ddt_parse_col_pos,
193 gras_ddt_parse_col_pos);
194 definition[colon_pos] = ';';
197 if(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
198 PARSE_ERROR1("Unparsable symbol: found a typeless statement (got '%s' instead)",
199 gras_ddt_parse_text);
201 /**** get the type modifier of this statement ****/
202 parse_type_modifier(&identifier.tm);
204 /* FIXME: This does not detect recursive definitions at all? */
205 if (identifier.tm.is_union || identifier.tm.is_enum || identifier.tm.is_struct)
206 PARSE_ERROR0("Cannot handle recursive type definition yet");
208 /**** get the base type, giving "short a" the needed love ****/
209 if (!identifier.tm.is_union &&
210 !identifier.tm.is_enum &&
211 !identifier.tm.is_struct &&
213 (identifier.tm.is_short || identifier.tm.is_long || identifier.tm.is_unsigned) &&
215 strcmp(gras_ddt_parse_text,"char") &&
216 strcmp(gras_ddt_parse_text,"float") &&
217 strcmp(gras_ddt_parse_text,"double") &&
218 strcmp(gras_ddt_parse_text,"int") ) {
220 /* bastard user, they omited "int" ! */
221 identifier.type_name=(char*)strdup("int");
222 DEBUG0("the base type is 'int', which were omited (you vicious user)");
224 identifier.type_name=(char*)strdup(gras_ddt_parse_text);
225 DEBUG1("the base type is '%s'",identifier.type_name);
226 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
229 /**** build the base type for latter use ****/
230 if (identifier.tm.is_union) {
231 PARSE_ERROR0("Cannot handle union yet (get callback from annotation?)");
233 } else if (identifier.tm.is_enum) {
234 PARSE_ERROR0("Cannot handle enum yet");
236 } else if (identifier.tm.is_struct) {
237 sprintf(buffname,"struct %s",identifier.type_name);
238 identifier.type = gras_datadesc_struct(buffname); /* Get created when does not exist */
240 } else if (identifier.tm.is_unsigned) {
241 if (!strcmp(identifier.type_name,"int")) {
242 if (identifier.tm.is_long == 2) {
243 identifier.type = gras_datadesc_by_name("unsigned long long int");
244 } else if (identifier.tm.is_long) {
245 identifier.type = gras_datadesc_by_name("unsigned long int");
246 } else if (identifier.tm.is_short) {
247 identifier.type = gras_datadesc_by_name("unsigned short int");
249 identifier.type = gras_datadesc_by_name("unsigned int");
252 } else if (!strcmp(identifier.type_name, "char")) {
253 identifier.type = gras_datadesc_by_name("unsigned char");
255 } else { /* impossible, gcc parses this shit before us */
259 } else if (!strcmp(identifier.type_name, "float")) {
260 /* no modificator allowed by gcc */
261 identifier.type = gras_datadesc_by_name("float");
263 } else if (!strcmp(identifier.type_name, "double")) {
264 if (identifier.tm.is_long)
265 PARSE_ERROR0("long double not portable and thus not handled");
267 identifier.type = gras_datadesc_by_name("double");
269 } else { /* signed integer elemental */
270 if (!strcmp(identifier.type_name,"int")) {
271 if (identifier.tm.is_long == 2) {
272 identifier.type = gras_datadesc_by_name("signed long long int");
273 } else if (identifier.tm.is_long) {
274 identifier.type = gras_datadesc_by_name("signed long int");
275 } else if (identifier.tm.is_short) {
276 identifier.type = gras_datadesc_by_name("signed short int");
278 identifier.type = gras_datadesc_by_name("int");
281 } else if (!strcmp(identifier.type_name, "char")) {
282 identifier.type = gras_datadesc_by_name("char");
285 DEBUG1("Base type is a constructed one (%s)",identifier.type_name);
286 identifier.type = gras_datadesc_by_name(identifier.type_name);
287 if (!identifier.type)
288 PARSE_ERROR1("Unknown base type '%s'",identifier.type_name);
291 /* Now identifier.type and identifier.name speak about the base type.
292 Stars are not eaten unless 'int' was omitted.
293 We will have to enhance it if we are in fact asked for array or reference */
295 /**** look for the symbols of this type ****/
296 for(expect_id_separator = 0;
298 (/*(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_EMPTY) && FIXME*/
299 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_SEMI_COLON)) ;
301 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump() ) {
303 if(expect_id_separator) {
304 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
305 expect_id_separator = 0;
308 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_LB) {
309 /* Handle fixed size arrays */
310 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
311 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RB) {
312 PARSE_ERROR0("Cannot deal with [] constructs (yet)");
314 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
316 long int size=strtol(gras_ddt_parse_text, &end, 10);
318 if (end == gras_ddt_parse_text || *end != '\0')
319 PARSE_ERROR1("Unparsable size of array (found '%c', expected number)",*end);
321 /* replace the previously pushed type to an array of it */
322 change_to_fixed_array(identifiers,size);
324 /* eat the closing bracket */
325 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
326 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RB)
327 PARSE_ERROR0("Unparsable size of array");
328 DEBUG1("Fixed size array, size=%ld",size);
331 PARSE_ERROR0("Unparsable size of array");
333 /* End of fixed size arrays handling */
335 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
336 /* Handle annotation */
337 s_identifier_t array;
338 char *keyname = NULL;
340 memset(&array,0,sizeof(array));
341 if (strcmp(gras_ddt_parse_text,"GRAS_ANNOTE"))
342 PARSE_ERROR1("Unparsable symbol: Expected 'GRAS_ANNOTE', got '%s'",gras_ddt_parse_text);
344 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
345 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LP)
346 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
348 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
350 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
351 PARSE_ERROR1("Unparsable annotation: Expected key name, got '%s'",gras_ddt_parse_text);
352 keyname = (char*)strdup(gras_ddt_parse_text);
354 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
356 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_COLON)
357 PARSE_ERROR1("Unparsable annotation: expected ',' after the key name, got '%s'",gras_ddt_parse_text);
359 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
361 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
362 PARSE_ERROR1("Unparsable annotation: Expected key value, got '%s'",gras_ddt_parse_text);
363 keyval = (char*)strdup(gras_ddt_parse_text);
365 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
367 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RP)
368 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
370 /* Done with parsing the annotation. Now deal with it by replacing previously pushed type with the right one */
372 DEBUG2("Anotation: %s=%s",keyname,keyval);
373 if (!strcmp(keyname,"size")) {
375 if (!identifier.tm.is_ref)
376 PARSE_ERROR0("Size annotation for a field not being a reference");
377 identifier.tm.is_ref--;
379 if (!strcmp(keyval,"1")) {
380 change_to_ref(identifiers);
386 for (p = keyval; *p != '\0'; p++)
390 change_to_fixed_array(identifiers,atoi(keyval));
391 change_to_ref(identifiers);
396 change_to_ref_pop_array(identifiers);
397 xbt_dynar_push(fields_to_push,&keyval);
404 PARSE_ERROR1("Unknown annotation type: '%s'",keyname);
408 /* End of annotation handling */
410 PARSE_ERROR1("Unparsable symbol: Got '%s' instead of expected comma (',')",gras_ddt_parse_text);
412 } else if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
413 PARSE_ERROR0("Unparsable symbol: Unexpected comma (',')");
416 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
417 identifier.tm.is_ref++; /* We indeed deal with multiple references with multiple annotations */
421 /* found a symbol name. Build the type and push it to dynar */
422 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
424 identifier.name=(char*)strdup(gras_ddt_parse_text);
425 DEBUG1("Found the identifier \"%s\"",identifier.name);
427 xbt_dynar_push(identifiers, &identifier);
428 DEBUG1("Dynar_len=%lu",xbt_dynar_length(identifiers));
429 expect_id_separator = 1;
433 PARSE_ERROR0("Unparasable symbol (maybe a def struct in a def struct or a parser bug ;)");
439 static gras_datadesc_type_t parse_struct(char *definition) {
444 static int anonymous_struct=0;
446 xbt_dynar_t identifiers;
447 s_identifier_t field;
451 xbt_dynar_t fields_to_push;
454 gras_datadesc_type_t struct_type;
457 identifiers = xbt_dynar_new(sizeof(s_identifier_t),NULL);
458 fields_to_push = xbt_dynar_new(sizeof(char*),NULL);
460 /* Create the struct descriptor */
461 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
462 struct_type = gras_datadesc_struct(gras_ddt_parse_text);
463 VERB1("Parse the struct '%s'", gras_ddt_parse_text);
464 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
466 sprintf(buffname,"anonymous struct %d",anonymous_struct++);
467 VERB1("Parse the anonymous struct nb %d", anonymous_struct);
468 struct_type = gras_datadesc_struct(buffname);
471 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LA)
472 PARSE_ERROR1("Unparasable symbol: Expecting struct definition, but got %s instead of '{'",
473 gras_ddt_parse_text);
475 /* Parse the identifiers */
479 parse_statement(definition,identifiers,fields_to_push);
481 if (e.category != mismatch_error)
487 DEBUG1("This statement contained %lu identifiers",xbt_dynar_length(identifiers));
488 /* append the identifiers we've found */
489 xbt_dynar_foreach(identifiers,i, field) {
491 PARSE_ERROR2("Not enough GRAS_ANNOTATE to deal with all dereferencing levels of %s (%d '*' left)",
492 field.name,field.tm.is_ref);
494 VERB2("Append field '%s' to %p",field.name, (void*)struct_type);
495 gras_datadesc_struct_append(struct_type, field.name, field.type);
497 free(field.type_name);
500 xbt_dynar_reset(identifiers);
501 DEBUG1("struct_type=%p",(void*)struct_type);
503 /* Make sure that all fields declaring a size push it into the cbps */
504 xbt_dynar_foreach(fields_to_push,i, name) {
505 DEBUG1("struct_type=%p",(void*)struct_type);
506 VERB2("Push field '%s' into size stack of %p", name, (void*)struct_type);
507 gras_datadesc_cb_field_push(struct_type, name);
510 xbt_dynar_reset(fields_to_push);
512 gras_datadesc_struct_close(struct_type);
515 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RA)
516 PARSE_ERROR1("Unparasable symbol: Expected '}' at the end of struct definition, got '%s'",
517 gras_ddt_parse_text);
519 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
521 xbt_dynar_free(&identifiers);
522 xbt_dynar_free(&fields_to_push);
527 static gras_datadesc_type_t parse_typedef(char *definition) {
529 s_type_modifier_t tm;
531 gras_datadesc_type_t struct_desc=NULL;
532 gras_datadesc_type_t typedef_desc=NULL;
535 memset(&tm,0,sizeof(tm));
537 /* get the aliased type */
538 parse_type_modifier(&tm);
541 struct_desc = parse_struct(definition);
544 parse_type_modifier(&tm);
547 PARSE_ERROR0("Cannot handle reference without annotation");
549 /* get the aliasing name */
550 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
551 PARSE_ERROR1("Unparsable typedef: Expected the alias name, and got '%s'",
552 gras_ddt_parse_text);
554 /* (FIXME: should) build the alias */
555 PARSE_ERROR0("Cannot handle typedef yet");
563 * gras_datadesc_parse:
565 * Create a datadescription from the result of parsing the C type description
568 gras_datadesc_parse(const char *name,
569 const char *C_statement) {
571 gras_datadesc_type_t res=NULL;
573 int semicolon_count=0;
574 int def_count,C_count;
577 /* reput the \n in place for debug */
578 for (C_count=0; C_statement[C_count] != '\0'; C_count++)
579 if (C_statement[C_count] == ';' || C_statement[C_count] == '{')
581 definition = (char*)xbt_malloc(C_count + semicolon_count + 1);
582 for (C_count=0,def_count=0; C_statement[C_count] != '\0'; C_count++) {
583 definition[def_count++] = C_statement[C_count];
584 if (C_statement[C_count] == ';' || C_statement[C_count] == '{') {
585 definition[def_count++] = '\n';
588 definition[def_count] = '\0';
591 VERB2("_gras_ddt_type_parse(%s) -> %d chars",definition, def_count);
592 gras_ddt_parse_pointer_string_init(definition);
594 /* Do I have a typedef, or a raw struct ?*/
595 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
597 if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"struct"))) {
598 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
599 res = parse_struct(definition);
601 } else if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"typedef"))) {
602 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
603 res = parse_typedef(definition);
606 ERROR1("Failed to parse the following symbol (not a struct neither a typedef) :\n%s",definition);
610 gras_ddt_parse_pointer_string_close();
611 VERB0("end of _gras_ddt_type_parse()");
613 /* register it under the name provided as symbol */
614 if (strcmp(res->name,name)) {
615 ERROR2("In GRAS_DEFINE_TYPE, the provided symbol (here %s) must be the C type name (here %s)",
619 gras_ddt_parse_lex_destroy();