3 /* DataDesc/ddt_parse.c -- automatic parsing of data structures */
5 /* Authors: Arnaud Legrand, Martin Quinson */
6 /* Copyright (C) 2003, 2004 Martin Quinson. */
8 /* This program is free software; you can redistribute it and/or modify it
9 under the terms of the license (GNU LGPL) which comes with this package. */
11 #include <ctype.h> /* isdigit */
13 #include "DataDesc/datadesc_private.h"
14 #include "DataDesc/ddt_parse.yy.h"
16 GRAS_LOG_NEW_DEFAULT_SUBCATEGORY(ddt_parse,datadesc);
18 typedef struct s_type_modifier{
30 typedef struct s_field {
31 gras_datadesc_type_t *type;
37 extern char *gras_ddt_parse_text; /* text being considered in the parser */
40 static void parse_type_modifier(type_modifier_t *type_modifier) {
43 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
44 /* This only used when parsing 'short *' since this function returns when int, float, double,... is encountered */
45 DEBUG0("This is a reference");
46 type_modifier->is_ref++;
48 } else if (!strcmp(gras_ddt_parse_text,"unsigned")) {
49 DEBUG0("This is an unsigned");
50 type_modifier->is_unsigned = 1;
52 } else if (!strcmp(gras_ddt_parse_text,"short")) {
53 DEBUG0("This is short");
54 type_modifier->is_short = 1;
56 } else if (!strcmp(gras_ddt_parse_text,"long")) {
57 DEBUG0("This is long");
58 type_modifier->is_long++; /* handle "long long" */
60 } else if (!strcmp(gras_ddt_parse_text,"struct")) {
61 DEBUG0("This is a struct");
62 type_modifier->is_struct = 1;
64 } else if (!strcmp(gras_ddt_parse_text,"union")) {
65 DEBUG0("This is an union");
66 type_modifier->is_union = 1;
68 } else if (!strcmp(gras_ddt_parse_text,"enum")) {
69 DEBUG0("This is an enum");
70 type_modifier->is_enum = 1;
72 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_EMPTY) {
76 DEBUG1("Done with modifiers (got %s)",gras_ddt_parse_text);
80 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
81 if((gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD) &&
82 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_STAR)) {
83 DEBUG2("Done with modifiers (got %s,%d)",gras_ddt_parse_text,gras_ddt_parse_tok_num);
90 static void print_type_modifier(type_modifier_t tm) {
94 if (tm.is_unsigned) printf("(unsigned) ");
95 if (tm.is_short) printf("(short) ");
96 for (i=0 ; i<tm.is_long ; i++) printf("(long) ");
98 if(tm.is_struct) printf("(struct) ");
99 if(tm.is_enum) printf("(enum) ");
100 if(tm.is_union) printf("(union) ");
102 for (i=0 ; i<tm.is_ref ; i++) printf("(ref) ");
106 static gras_error_t change_to_fixed_array(gras_dynar_t *dynar, long int size) {
107 gras_error_t errcode;
108 identifier_t former,array;
109 memset(&array,0,sizeof(array));
112 gras_dynar_pop(dynar,&former);
113 array.type_name=malloc(strlen(former.type->name)+20);
114 DEBUG2("Array specification (size=%ld, elm='%s'), change pushed type",
115 size,former.type_name);
116 sprintf(array.type_name,"%s[%ld]",former.type_name,size);
117 free(former.type_name);
119 TRY(gras_datadesc_array_fixed(array.type_name, former.type, size, &array.type)); /* redeclaration are ignored */
121 array.name = former.name;
123 TRY(gras_dynar_push(dynar,&array));
127 static gras_error_t change_to_ref(gras_dynar_t *dynar) {
128 gras_error_t errcode;
129 identifier_t former,ref;
130 memset(&ref,0,sizeof(ref));
133 gras_dynar_pop(dynar,&former);
134 ref.type_name=malloc(strlen(former.type->name)+2);
135 DEBUG1("Ref specification (elm='%s'), change pushed type", former.type_name);
136 sprintf(ref.type_name,"%s*",former.type_name);
137 free(former.type_name);
139 TRY(gras_datadesc_ref(ref.type_name, former.type, &ref.type)); /* redeclaration are ignored */
141 ref.name = former.name;
143 TRY(gras_dynar_push(dynar,&ref));
148 static gras_error_t change_to_ref_pop_array(gras_dynar_t *dynar) {
149 gras_error_t errcode;
150 identifier_t former,ref;
151 memset(&ref,0,sizeof(ref));
154 gras_dynar_pop(dynar,&former);
155 TRY(gras_datadesc_ref_pop_arr(former.type,&ref.type)); /* redeclaration are ignored */
156 ref.type_name = strdup(ref.type->name);
157 ref.name = former.name;
159 free(former.type_name);
161 TRY(gras_dynar_push(dynar,&ref));
166 static gras_error_t parse_statement(char *definition,
167 gras_dynar_t *identifiers,
168 gras_dynar_t *fields_to_push) {
169 gras_error_t errcode;
172 identifier_t identifier;
174 int expect_id_separator = 0;
177 memset(&identifier,0,sizeof(identifier));
179 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
180 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RA) {
182 return mismatch_error; /* end of the englobing structure or union */
185 if (GRAS_LOG_ISENABLED(ddt_parse,gras_log_priority_debug)) {
187 for (colon_pos = gras_ddt_parse_col_pos;
188 definition[colon_pos] != ';';
190 definition[colon_pos] = '\0';
191 DEBUG3("Parse the statement \"%s%s;\" (col_pos=%d)",
193 definition+gras_ddt_parse_col_pos,
194 gras_ddt_parse_col_pos);
195 definition[colon_pos] = ';';
198 if(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
199 PARSE_ERROR1("Unparsable symbol: found a typeless statement (got '%s' instead)",
200 gras_ddt_parse_text);
202 /**** get the type modifier of this statement ****/
203 parse_type_modifier(&identifier.tm);
205 /* FIXME: This does not detect recursive definitions at all? */
206 if (identifier.tm.is_union || identifier.tm.is_enum || identifier.tm.is_struct)
207 PARSE_ERROR0("Cannot handle recursive type definition yet");
209 /**** get the base type, giving "short a" the needed love ****/
210 if (!identifier.tm.is_union &&
211 !identifier.tm.is_enum &&
212 !identifier.tm.is_struct &&
214 (identifier.tm.is_short || identifier.tm.is_long || identifier.tm.is_unsigned) &&
216 strcmp(gras_ddt_parse_text,"char") &&
217 strcmp(gras_ddt_parse_text,"float") &&
218 strcmp(gras_ddt_parse_text,"double") &&
219 strcmp(gras_ddt_parse_text,"int") ) {
221 /* bastard user, they omited "int" ! */
222 identifier.type_name=strdup("int");
223 DEBUG0("the base type is 'int', which were omited (you vicious user)");
225 identifier.type_name=strdup(gras_ddt_parse_text);
226 DEBUG1("the base type is '%s'",identifier.type_name);
227 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
230 /**** build the base type for latter use ****/
231 if (identifier.tm.is_union) {
232 PARSE_ERROR0("Cannot handle union yet (get callback from annotation?)");
234 } else if (identifier.tm.is_enum) {
235 PARSE_ERROR0("Cannot handle enum yet");
237 } else if (identifier.tm.is_struct) {
238 sprintf(buffname,"struct %s",identifier.type_name);
239 TRY(gras_datadesc_struct(buffname,&identifier.type)); /* Get created when does not exist */
241 } else if (identifier.tm.is_unsigned) {
242 if (!strcmp(identifier.type_name,"int")) {
243 if (identifier.tm.is_long == 2) {
244 identifier.type = gras_datadesc_by_name("unsigned long long int");
245 } else if (identifier.tm.is_long) {
246 identifier.type = gras_datadesc_by_name("unsigned long int");
247 } else if (identifier.tm.is_short) {
248 identifier.type = gras_datadesc_by_name("unsigned short int");
250 identifier.type = gras_datadesc_by_name("unsigned int");
253 } else if (!strcmp(identifier.type_name, "char")) {
254 identifier.type = gras_datadesc_by_name("unsigned char");
256 } else { /* impossible, gcc parses this shit before us */
260 } else if (!strcmp(identifier.type_name, "float")) {
261 /* no modificator allowed by gcc */
262 identifier.type = gras_datadesc_by_name("float");
264 } else if (!strcmp(identifier.type_name, "double")) {
265 if (identifier.tm.is_long)
266 PARSE_ERROR0("long double not portable and thus not handled");
268 identifier.type = gras_datadesc_by_name("double");
270 } else { /* signed integer elemental */
271 if (!strcmp(identifier.type_name,"int")) {
272 if (identifier.tm.is_long == 2) {
273 identifier.type = gras_datadesc_by_name("signed long long int");
274 } else if (identifier.tm.is_long) {
275 identifier.type = gras_datadesc_by_name("signed long int");
276 } else if (identifier.tm.is_short) {
277 identifier.type = gras_datadesc_by_name("signed short int");
279 identifier.type = gras_datadesc_by_name("int");
282 } else if (!strcmp(identifier.type_name, "char")) {
283 identifier.type = gras_datadesc_by_name("char");
286 DEBUG1("Base type is a constructed one (%s)",identifier.type_name);
287 identifier.type = gras_datadesc_by_name(identifier.type_name);
288 if (!identifier.type)
289 PARSE_ERROR1("Unknown base type '%s'",identifier.type_name);
292 /* Now identifier.type and identifier.name speak about the base type.
293 Stars are not eaten unless 'int' was omitted.
294 We will have to enhance it if we are in fact asked for array or reference */
296 /**** look for the symbols of this type ****/
297 for(expect_id_separator = 0;
299 (//(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_EMPTY) && FIXME
300 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_SEMI_COLON)) ;
302 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump() ) {
304 if(expect_id_separator) {
305 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
306 expect_id_separator = 0;
309 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_LB) {
310 /* Handle fixed size arrays */
311 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
312 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RB) {
313 PARSE_ERROR0("Cannot deal with [] constructs (yet)");
315 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
317 long int size=strtol(gras_ddt_parse_text, &end, 10);
319 if (end == gras_ddt_parse_text || *end != '\0')
320 PARSE_ERROR1("Unparsable size of array (found '%c', expected number)",*end);
322 /* replace the previously pushed type to an array of it */
323 TRY(change_to_fixed_array(identifiers,size));
325 /* eat the closing bracket */
326 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
327 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RB)
328 PARSE_ERROR0("Unparsable size of array");
329 DEBUG1("Fixed size array, size=%ld",size);
332 PARSE_ERROR0("Unparsable size of array");
334 /* End of fixed size arrays handling */
336 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
337 /* Handle annotation */
339 char *keyname = NULL;
341 memset(&array,0,sizeof(array));
342 if (strcmp(gras_ddt_parse_text,"GRAS_ANNOTE"))
343 PARSE_ERROR1("Unparsable symbol: Expected 'GRAS_ANNOTE', got '%s'",gras_ddt_parse_text);
345 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
346 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LP)
347 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
349 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
351 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
352 PARSE_ERROR1("Unparsable annotation: Expected key name, got '%s'",gras_ddt_parse_text);
353 keyname = strdup(gras_ddt_parse_text);
355 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
357 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_COLON)
358 PARSE_ERROR1("Unparsable annotation: expected ',' after the key name, got '%s'",gras_ddt_parse_text);
360 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
362 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
363 PARSE_ERROR1("Unparsable annotation: Expected key value, got '%s'",gras_ddt_parse_text);
364 keyval = strdup(gras_ddt_parse_text);
366 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
368 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RP)
369 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
371 /* Done with parsing the annotation. Now deal with it by replacing previously pushed type with the right one */
373 DEBUG2("Anotation: %s=%s",keyname,keyval);
374 if (!strcmp(keyname,"size")) {
376 if (!identifier.tm.is_ref)
377 PARSE_ERROR0("Size annotation for a field not being a reference");
378 identifier.tm.is_ref--;
380 if (!strcmp(keyval,"1")) {
381 TRY(change_to_ref(identifiers));
387 for (p = keyval; *p != '\0'; p++)
391 TRY(change_to_fixed_array(identifiers,atoi(keyval)));
392 TRY(change_to_ref(identifiers));
397 TRY(change_to_ref_pop_array(identifiers));
398 TRY(gras_dynar_push(fields_to_push,&keyval));
405 PARSE_ERROR1("Unknown annotation type: '%s'",keyname);
409 /* End of annotation handling */
411 PARSE_ERROR1("Unparsable symbol: Got '%s' instead of expected comma (',')",gras_ddt_parse_text);
413 } else if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
414 PARSE_ERROR0("Unparsable symbol: Unexpected comma (',')");
417 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
418 identifier.tm.is_ref++; /* We indeed deal with multiple references with multiple annotations */
422 /* found a symbol name. Build the type and push it to dynar */
423 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
425 identifier.name=strdup(gras_ddt_parse_text);
426 DEBUG1("Found the identifier \"%s\"",identifier.name);
428 TRY(gras_dynar_push(identifiers, &identifier));
429 DEBUG1("Dynar_len=%d",gras_dynar_length(identifiers));
430 expect_id_separator = 1;
434 PARSE_ERROR0("Unparasable symbol (maybe a def struct in a def struct or a parser bug ;)");
441 static gras_datadesc_type_t *parse_struct(char *definition) {
443 gras_error_t errcode;
445 static int anonymous_struct=0;
447 gras_dynar_t *identifiers;
451 gras_dynar_t *fields_to_push;
454 gras_datadesc_type_t *struct_type;
457 errcode=gras_dynar_new(&identifiers,sizeof(identifier_t),NULL);
458 errcode=gras_dynar_new(&fields_to_push,sizeof(char*),NULL);
459 if (errcode != no_error) {
464 /* Create the struct descriptor */
465 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
466 TRYFAIL(gras_datadesc_struct(gras_ddt_parse_text,&struct_type));
467 VERB1("Parse the struct '%s'", gras_ddt_parse_text);
468 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
470 sprintf(buffname,"anonymous struct %d",anonymous_struct++);
471 VERB1("Parse the anonymous struct nb %d", anonymous_struct);
472 TRYFAIL(gras_datadesc_struct(buffname,&struct_type));
475 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LA)
476 PARSE_ERROR1("Unparasable symbol: Expecting struct definition, but got %s instead of '{'",
477 gras_ddt_parse_text);
479 /* Parse the identifiers */
480 for (errcode=parse_statement(definition,identifiers,fields_to_push);
481 errcode == no_error ;
482 errcode=parse_statement(definition,identifiers,fields_to_push)) {
484 DEBUG1("This statement contained %d identifiers",gras_dynar_length(identifiers));
485 /* append the identifiers we've found */
486 gras_dynar_foreach(identifiers,i, field) {
488 PARSE_ERROR2("Not enough GRAS_ANNOTATE to deal with all dereferencing levels of %s (%d '*' left)",
489 field.name,field.tm.is_ref);
491 VERB2("Append field '%s' to %p",field.name, struct_type);
492 TRYFAIL(gras_datadesc_struct_append(struct_type, field.name, field.type));
494 free(field.type_name);
497 gras_dynar_reset(identifiers);
498 DEBUG1("struct_type=%p",struct_type);
500 /* Make sure that all fields declaring a size push it into the cbps */
501 gras_dynar_foreach(fields_to_push,i, name) {
502 DEBUG1("struct_type=%p",struct_type);
503 VERB2("Push field '%s' into size stack of %p", name, struct_type);
504 gras_datadesc_cb_field_push(struct_type, name);
507 gras_dynar_reset(fields_to_push);
509 gras_datadesc_struct_close(struct_type);
510 if (errcode != mismatch_error) {
512 return NULL; /* FIXME: LEAK! */
516 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RA)
517 PARSE_ERROR1("Unparasable symbol: Expected '}' at the end of struct definition, got '%s'",
518 gras_ddt_parse_text);
520 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
522 gras_dynar_free(identifiers);
523 gras_dynar_free(fields_to_push);
528 static gras_datadesc_type_t * parse_typedef(char *definition) {
532 gras_datadesc_type_t *struct_desc=NULL;
533 gras_datadesc_type_t *typedef_desc=NULL;
536 memset(&tm,0,sizeof(tm));
538 /* get the aliased type */
539 parse_type_modifier(&tm);
542 struct_desc = parse_struct(definition);
545 parse_type_modifier(&tm);
548 PARSE_ERROR0("Cannot handle reference without annotation");
550 /* get the aliasing name */
551 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
552 PARSE_ERROR1("Unparsable typedef: Expected the alias name, and got '%s'",
553 gras_ddt_parse_text);
555 /* (FIXME: should) build the alias */
556 PARSE_ERROR0("Cannot handle typedef yet");
564 * gras_datadesc_parse:
566 * Create a datadescription from the result of parsing the C type description
568 gras_datadesc_type_t *
569 gras_datadesc_parse(const char *name,
570 const char *C_statement) {
572 gras_datadesc_type_t * res=NULL;
574 int semicolon_count=0;
575 int def_count,C_count;
578 /* reput the \n in place for debug */
579 for (C_count=0; C_statement[C_count] != '\0'; C_count++)
580 if (C_statement[C_count] == ';' || C_statement[C_count] == '{')
582 definition = malloc(C_count + semicolon_count + 1);
583 for (C_count=0,def_count=0; C_statement[C_count] != '\0'; C_count++) {
584 definition[def_count++] = C_statement[C_count];
585 if (C_statement[C_count] == ';' || C_statement[C_count] == '{') {
586 definition[def_count++] = '\n';
589 definition[def_count] = '\0';
592 VERB2("_gras_ddt_type_parse(%s) -> %d chars",definition, def_count);
593 gras_ddt_parse_pointer_string_init(definition);
595 /* Do I have a typedef, or a raw struct ?*/
596 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
598 if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"struct"))) {
599 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
600 res = parse_struct(definition);
602 } else if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"typedef"))) {
603 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
604 res = parse_typedef(definition);
607 ERROR1("Failed to parse the following symbol (not a struct neither a typedef) :\n%s",definition);
611 gras_ddt_parse_pointer_string_close();
612 VERB0("end of _gras_ddt_type_parse()");
614 /* register it under the name provided as symbol */
615 if (strcmp(res->name,name)) {
616 ERROR2("In GRAS_DEFINE_TYPE, the provided symbol (here %s) must be the C type name (here %s)",