3 /* DataDesc/ddt_parse.c -- automatic parsing of data structures */
5 /* Authors: Arnaud Legrand, Martin Quinson */
6 /* Copyright (C) 2003, 2004 Martin Quinson. */
8 /* This program is free software; you can redistribute it and/or modify it
9 under the terms of the license (GNU LGPL) which comes with this package. */
11 #include <ctype.h> /* isdigit */
13 #include "DataDesc/datadesc_private.h"
14 #include "DataDesc/ddt_parse.yy.h"
16 GRAS_LOG_NEW_DEFAULT_SUBCATEGORY(ddt_parse,datadesc);
18 typedef struct s_type_modifier{
30 typedef struct s_field {
31 gras_datadesc_type_t *type;
37 extern char *gras_ddt_parse_text; /* text being considered in the parser */
40 static void parse_type_modifier(type_modifier_t *type_modifier) {
43 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
44 /* This only used when parsing 'short *' since this function returns when int, float, double,... is encountered */
45 DEBUG0("This is a reference");
46 type_modifier->is_ref++;
48 } else if (!strcmp(gras_ddt_parse_text,"unsigned")) {
49 DEBUG0("This is an unsigned");
50 type_modifier->is_unsigned = 1;
52 } else if (!strcmp(gras_ddt_parse_text,"short")) {
53 DEBUG0("This is short");
54 type_modifier->is_short = 1;
56 } else if (!strcmp(gras_ddt_parse_text,"long")) {
57 DEBUG0("This is long");
58 type_modifier->is_long++; /* handle "long long" */
60 } else if (!strcmp(gras_ddt_parse_text,"struct")) {
61 DEBUG0("This is a struct");
62 type_modifier->is_struct = 1;
64 } else if (!strcmp(gras_ddt_parse_text,"union")) {
65 DEBUG0("This is an union");
66 type_modifier->is_union = 1;
68 } else if (!strcmp(gras_ddt_parse_text,"enum")) {
69 DEBUG0("This is an enum");
70 type_modifier->is_enum = 1;
72 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_EMPTY) {
76 DEBUG1("Done with modifiers (got %s)",gras_ddt_parse_text);
80 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
81 if((gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD) &&
82 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_STAR)) {
83 DEBUG2("Done with modifiers (got %s,%d)",gras_ddt_parse_text,gras_ddt_parse_tok_num);
90 static void print_type_modifier(type_modifier_t tm) {
94 if (tm.is_unsigned) printf("(unsigned) ");
95 if (tm.is_short) printf("(short) ");
96 for (i=0 ; i<tm.is_long ; i++) printf("(long) ");
98 if(tm.is_struct) printf("(struct) ");
99 if(tm.is_enum) printf("(enum) ");
100 if(tm.is_union) printf("(union) ");
102 for (i=0 ; i<tm.is_ref ; i++) printf("(ref) ");
106 static gras_error_t change_to_fixed_array(gras_dynar_t *dynar, long int size) {
107 gras_error_t errcode;
108 identifier_t former,array;
109 memset(&array,0,sizeof(array));
112 gras_dynar_pop(dynar,&former);
113 array.type_name=malloc(strlen(former.type->name)+20);
114 DEBUG2("Array specification (size=%ld, elm='%s'), change pushed type",
115 size,former.type_name);
116 sprintf(array.type_name,"%s[%ld]",former.type_name,size);
117 free(former.type_name);
119 TRY(gras_datadesc_array_fixed(array.type_name, former.type, size, &array.type)); /* redeclaration are ignored */
121 array.name = former.name;
123 TRY(gras_dynar_push(dynar,&array));
127 static gras_error_t change_to_ref(gras_dynar_t *dynar) {
128 gras_error_t errcode;
129 identifier_t former,ref;
130 memset(&ref,0,sizeof(ref));
133 gras_dynar_pop(dynar,&former);
134 ref.type_name=malloc(strlen(former.type->name)+2);
135 DEBUG1("Ref specification (elm='%s'), change pushed type", former.type_name);
136 sprintf(ref.type_name,"%s*",former.type_name);
137 free(former.type_name);
139 TRY(gras_datadesc_ref(ref.type_name, former.type, &ref.type)); /* redeclaration are ignored */
141 ref.name = former.name;
143 TRY(gras_dynar_push(dynar,&ref));
148 static gras_error_t change_to_ref_pop_array(gras_dynar_t *dynar) {
149 gras_error_t errcode;
150 identifier_t former,ref;
151 memset(&ref,0,sizeof(ref));
154 gras_dynar_pop(dynar,&former);
155 TRY(gras_datadesc_ref_pop_arr(former.type,&ref.type)); /* redeclaration are ignored */
156 ref.type_name = strdup(ref.type->name);
157 ref.name = former.name;
159 free(former.type_name);
161 TRY(gras_dynar_push(dynar,&ref));
166 static gras_error_t parse_statement(char *definition,
167 gras_dynar_t *identifiers,
168 gras_dynar_t *fields_to_push) {
169 gras_error_t errcode;
172 identifier_t identifier;
174 int expect_id_separator = 0;
177 memset(&identifier,0,sizeof(identifier));
179 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
180 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RA) {
182 return mismatch_error; /* end of the englobing structure or union */
185 if (GRAS_LOG_ISENABLED(ddt_parse,gras_log_priority_debug)) {
187 for (colon_pos = gras_ddt_parse_col_pos;
188 definition[colon_pos] != ';';
190 definition[colon_pos] = '\0';
191 DEBUG3("Parse the statement \"%s%s;\" (col_pos=%d)",
193 definition+gras_ddt_parse_col_pos,
194 gras_ddt_parse_col_pos);
195 definition[colon_pos] = ';';
198 if(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
199 PARSE_ERROR1("Unparsable symbol: found a typeless statement (got '%s' instead)",
200 gras_ddt_parse_text);
202 /**** get the type modifier of this statement ****/
203 parse_type_modifier(&identifier.tm);
205 /* FIXME: This does not detect recursive definitions at all? */
206 if (identifier.tm.is_union || identifier.tm.is_enum || identifier.tm.is_struct)
207 PARSE_ERROR0("Cannot handle recursive type definition yet");
209 /**** get the base type, giving "short a" the needed love ****/
210 if (!identifier.tm.is_union &&
211 !identifier.tm.is_enum &&
212 !identifier.tm.is_struct &&
214 (identifier.tm.is_short || identifier.tm.is_long || identifier.tm.is_unsigned) &&
216 strcmp(gras_ddt_parse_text,"char") &&
217 strcmp(gras_ddt_parse_text,"float") &&
218 strcmp(gras_ddt_parse_text,"double") &&
219 strcmp(gras_ddt_parse_text,"int") ) {
221 /* bastard user, they omited "int" ! */
222 identifier.type_name=strdup("int");
223 DEBUG0("the base type is 'int', which were omited (you vicious user)");
225 identifier.type_name=strdup(gras_ddt_parse_text);
226 DEBUG1("the base type is '%s'",identifier.type_name);
227 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
230 /**** build the base type for latter use ****/
231 if (identifier.tm.is_union) {
232 PARSE_ERROR0("Cannot handle union yet (get callback from annotation?)");
234 } else if (identifier.tm.is_enum) {
235 PARSE_ERROR0("Cannot handle enum yet");
237 } else if (identifier.tm.is_struct) {
238 sprintf(buffname,"struct %s",identifier.type_name);
239 TRY(gras_datadesc_struct(buffname,&identifier.type)); /* Get created when does not exist */
241 } else if (identifier.tm.is_unsigned) {
242 if (!strcmp(identifier.type_name,"int")) {
243 if (identifier.tm.is_long == 2) {
244 identifier.type = gras_datadesc_by_name("unsigned long long int");
245 } else if (identifier.tm.is_long) {
246 identifier.type = gras_datadesc_by_name("unsigned long int");
247 } else if (identifier.tm.is_short) {
248 identifier.type = gras_datadesc_by_name("unsigned short int");
250 identifier.type = gras_datadesc_by_name("unsigned int");
253 } else if (!strcmp(identifier.type_name, "char")) {
254 identifier.type = gras_datadesc_by_name("unsigned char");
256 } else { /* impossible, gcc parses this shit before us */
260 } else if (!strcmp(identifier.type_name, "float")) {
261 /* no modificator allowed by gcc */
262 identifier.type = gras_datadesc_by_name("float");
264 } else if (!strcmp(identifier.type_name, "double")) {
265 if (identifier.tm.is_long)
266 PARSE_ERROR0("long double not portable and thus not handled");
268 identifier.type = gras_datadesc_by_name("double");
270 } else { /* signed integer elemental */
271 if (!strcmp(identifier.type_name,"int")) {
272 if (identifier.tm.is_long == 2) {
273 identifier.type = gras_datadesc_by_name("signed long long int");
274 } else if (identifier.tm.is_long) {
275 identifier.type = gras_datadesc_by_name("signed long int");
276 } else if (identifier.tm.is_short) {
277 identifier.type = gras_datadesc_by_name("signed short int");
279 identifier.type = gras_datadesc_by_name("int");
282 } else if (!strcmp(identifier.type_name, "char")) {
283 identifier.type = gras_datadesc_by_name("char");
285 } else { /* impossible */
286 PARSE_ERROR0("The Impossible Did Happen (once again)");
289 /* Now identifier.type and identifier.name speak about the base type.
290 Stars are not eaten unless 'int' was omitted.
291 We will have to enhance it if we are in fact asked for array or reference */
293 /**** look for the symbols of this type ****/
294 for(expect_id_separator = 0;
296 (//(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_EMPTY) && FIXME
297 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_SEMI_COLON)) ;
299 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump() ) {
301 if(expect_id_separator) {
302 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
303 expect_id_separator = 0;
306 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_LB) {
307 /* Handle fixed size arrays */
308 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
309 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RB) {
310 PARSE_ERROR0("Cannot deal with [] constructs (yet)");
312 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
314 long int size=strtol(gras_ddt_parse_text, &end, 10);
316 if (end == gras_ddt_parse_text || *end != '\0')
317 PARSE_ERROR1("Unparsable size of array (found '%c', expected number)",*end);
319 /* replace the previously pushed type to an array of it */
320 TRY(change_to_fixed_array(identifiers,size));
322 /* eat the closing bracket */
323 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
324 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RB)
325 PARSE_ERROR0("Unparsable size of array");
326 DEBUG1("Fixed size array, size=%ld",size);
329 PARSE_ERROR0("Unparsable size of array");
331 /* End of fixed size arrays handling */
333 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
334 /* Handle annotation */
336 char *keyname = NULL;
338 memset(&array,0,sizeof(array));
339 if (strcmp(gras_ddt_parse_text,"GRAS_ANNOTE"))
340 PARSE_ERROR1("Unparsable symbol: Expected 'GRAS_ANNOTE', got '%s'",gras_ddt_parse_text);
342 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
343 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LP)
344 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
346 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
348 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
349 PARSE_ERROR1("Unparsable annotation: Expected key name, got '%s'",gras_ddt_parse_text);
350 keyname = strdup(gras_ddt_parse_text);
352 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
354 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_COLON)
355 PARSE_ERROR1("Unparsable annotation: expected ',' after the key name, got '%s'",gras_ddt_parse_text);
357 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
359 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
360 PARSE_ERROR1("Unparsable annotation: Expected key value, got '%s'",gras_ddt_parse_text);
361 keyval = strdup(gras_ddt_parse_text);
363 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
365 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RP)
366 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
368 /* Done with parsing the annotation. Now deal with it by replacing previously pushed type with the right one */
370 DEBUG2("Anotation: %s=%s",keyname,keyval);
371 if (!strcmp(keyname,"size")) {
373 if (!identifier.tm.is_ref)
374 PARSE_ERROR0("Size annotation for a field not being a reference");
375 identifier.tm.is_ref--;
377 if (!strcmp(keyval,"1")) {
378 TRY(change_to_ref(identifiers));
384 for (p = keyval; *p != '\0'; p++)
388 TRY(change_to_fixed_array(identifiers,atoi(keyval)));
389 TRY(change_to_ref(identifiers));
394 TRY(change_to_ref_pop_array(identifiers));
395 TRY(gras_dynar_push(fields_to_push,&keyval));
402 PARSE_ERROR1("Unknown annotation type: '%s'",keyname);
406 /* End of annotation handling */
408 PARSE_ERROR1("Unparsable symbol: Got '%s' instead of expected comma (',')",gras_ddt_parse_text);
410 } else if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
411 PARSE_ERROR0("Unparsable symbol: Unexpected comma (',')");
414 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
415 identifier.tm.is_ref++; /* We indeed deal with multiple references with multiple annotations */
419 /* found a symbol name. Build the type and push it to dynar */
420 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
422 identifier.name=strdup(gras_ddt_parse_text);
423 DEBUG1("Found the identifier \"%s\"",identifier.name);
425 TRY(gras_dynar_push(identifiers, &identifier));
426 DEBUG1("Dynar_len=%d",gras_dynar_length(identifiers));
427 expect_id_separator = 1;
431 PARSE_ERROR0("Unparasable symbol (maybe a def struct in a def struct or a parser bug ;)");
438 static gras_datadesc_type_t *parse_struct(char *definition) {
440 gras_error_t errcode;
442 static int anonymous_struct=0;
444 gras_dynar_t *identifiers;
448 gras_dynar_t *fields_to_push;
451 gras_datadesc_type_t *struct_type;
454 errcode=gras_dynar_new(&identifiers,sizeof(identifier_t),NULL);
455 errcode=gras_dynar_new(&fields_to_push,sizeof(char*),NULL);
456 if (errcode != no_error) {
461 /* Create the struct descriptor */
462 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
463 TRYFAIL(gras_datadesc_struct(gras_ddt_parse_text,&struct_type));
464 VERB1("Parse the struct '%s'", gras_ddt_parse_text);
465 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
467 sprintf(buffname,"anonymous struct %d",anonymous_struct++);
468 VERB1("Parse the anonymous struct nb %d", anonymous_struct);
469 TRYFAIL(gras_datadesc_struct(buffname,&struct_type));
472 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LA)
473 PARSE_ERROR1("Unparasable symbol: Expecting struct definition, but got %s instead of '{'",
474 gras_ddt_parse_text);
476 /* Parse the identifiers */
477 for (errcode=parse_statement(definition,identifiers,fields_to_push);
478 errcode == no_error ;
479 errcode=parse_statement(definition,identifiers,fields_to_push)) {
481 DEBUG1("This statement contained %d identifiers",gras_dynar_length(identifiers));
482 /* append the identifiers we've found */
483 gras_dynar_foreach(identifiers,i, field) {
485 PARSE_ERROR2("Not enough GRAS_ANNOTATE to deal with all dereferencing levels of %s (%d '*' left)",
486 field.name,field.tm.is_ref);
488 VERB2("Append field '%s' to %p",field.name, struct_type);
489 TRYFAIL(gras_datadesc_struct_append(struct_type, field.name, field.type));
491 free(field.type_name);
494 gras_dynar_reset(identifiers);
495 DEBUG1("struct_type=%p",struct_type);
497 /* Make sure that all fields declaring a size push it into the cbps */
498 gras_dynar_foreach(fields_to_push,i, name) {
499 DEBUG1("struct_type=%p",struct_type);
500 VERB2("Push field '%s' into size stack of %p", name, struct_type);
501 gras_datadesc_cb_field_push(struct_type, name);
504 gras_dynar_reset(fields_to_push);
506 gras_datadesc_struct_close(struct_type);
507 if (errcode != mismatch_error) {
509 return NULL; /* FIXME: LEAK! */
513 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RA)
514 PARSE_ERROR1("Unparasable symbol: Expected '}' at the end of struct definition, got '%s'",
515 gras_ddt_parse_text);
517 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
519 gras_dynar_free(identifiers);
520 gras_dynar_free(fields_to_push);
525 static gras_datadesc_type_t * parse_typedef(char *definition) {
529 gras_datadesc_type_t *struct_desc=NULL;
530 gras_datadesc_type_t *typedef_desc=NULL;
533 memset(&tm,0,sizeof(tm));
535 /* get the aliased type */
536 parse_type_modifier(&tm);
539 struct_desc = parse_struct(definition);
542 parse_type_modifier(&tm);
545 PARSE_ERROR0("Cannot handle reference without annotation");
547 /* get the aliasing name */
548 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
549 PARSE_ERROR1("Unparsable typedef: Expected the alias name, and got '%s'",
550 gras_ddt_parse_text);
552 /* (FIXME: should) build the alias */
553 PARSE_ERROR0("Cannot handle typedef yet");
561 * gras_datadesc_parse:
563 * Create a datadescription from the result of parsing the C type description
565 gras_datadesc_type_t *
566 gras_datadesc_parse(const char *name,
567 const char *C_statement) {
569 gras_datadesc_type_t * res=NULL;
571 int semicolon_count=0;
572 int def_count,C_count;
575 /* reput the \n in place for debug */
576 for (C_count=0; C_statement[C_count] != '\0'; C_count++)
577 if (C_statement[C_count] == ';' || C_statement[C_count] == '{')
579 definition = malloc(C_count + semicolon_count + 1);
580 for (C_count=0,def_count=0; C_statement[C_count] != '\0'; C_count++) {
581 definition[def_count++] = C_statement[C_count];
582 if (C_statement[C_count] == ';' || C_statement[C_count] == '{') {
583 definition[def_count++] = '\n';
586 definition[def_count] = '\0';
589 VERB2("_gras_ddt_type_parse(%s) -> %d chars",definition, def_count);
590 gras_ddt_parse_pointer_string_init(definition);
592 /* Do I have a typedef, or a raw struct ?*/
593 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
595 if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"struct"))) {
596 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
597 res = parse_struct(definition);
599 } else if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"typedef"))) {
600 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
601 res = parse_typedef(definition);
604 ERROR1("Failed to parse the following symbol (not a struct neither a typedef) :\n%s",definition);
608 gras_ddt_parse_pointer_string_close();
609 VERB0("end of _gras_ddt_type_parse()");
611 /* register it under the name provided as symbol */
612 if (strcmp(res->name,name)) {
613 ERROR2("In GRAS_DEFINE_TYPE, the provided symbol (here %s) must be the C type name (here %s)",