3 /* DataDesc/ddt_parse.c -- automatic parsing of data structures */
5 /* Authors: Arnaud Legrand, Martin Quinson */
6 /* Copyright (C) 2003, 2004 Martin Quinson. */
8 /* This program is free software; you can redistribute it and/or modify it
9 under the terms of the license (GNU LGPL) which comes with this package. */
11 #include <ctype.h> /* isdigit */
13 #include "gras/DataDesc/datadesc_private.h"
14 #include "gras/DataDesc/ddt_parse.yy.h"
16 GRAS_LOG_NEW_DEFAULT_SUBCATEGORY(ddt_parse,datadesc,
17 "Parsing C data structures to build GRAS data description");
19 typedef struct s_type_modifier{
31 typedef struct s_field {
32 gras_datadesc_type_t *type;
38 extern char *gras_ddt_parse_text; /* text being considered in the parser */
41 static void parse_type_modifier(type_modifier_t *type_modifier) {
44 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
45 /* This only used when parsing 'short *' since this function returns when int, float, double,... is encountered */
46 DEBUG0("This is a reference");
47 type_modifier->is_ref++;
49 } else if (!strcmp(gras_ddt_parse_text,"unsigned")) {
50 DEBUG0("This is an unsigned");
51 type_modifier->is_unsigned = 1;
53 } else if (!strcmp(gras_ddt_parse_text,"short")) {
54 DEBUG0("This is short");
55 type_modifier->is_short = 1;
57 } else if (!strcmp(gras_ddt_parse_text,"long")) {
58 DEBUG0("This is long");
59 type_modifier->is_long++; /* handle "long long" */
61 } else if (!strcmp(gras_ddt_parse_text,"struct")) {
62 DEBUG0("This is a struct");
63 type_modifier->is_struct = 1;
65 } else if (!strcmp(gras_ddt_parse_text,"union")) {
66 DEBUG0("This is an union");
67 type_modifier->is_union = 1;
69 } else if (!strcmp(gras_ddt_parse_text,"enum")) {
70 DEBUG0("This is an enum");
71 type_modifier->is_enum = 1;
73 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_EMPTY) {
77 DEBUG1("Done with modifiers (got %s)",gras_ddt_parse_text);
81 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
82 if((gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD) &&
83 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_STAR)) {
84 DEBUG2("Done with modifiers (got %s,%d)",gras_ddt_parse_text,gras_ddt_parse_tok_num);
91 static void print_type_modifier(type_modifier_t tm) {
95 if (tm.is_unsigned) printf("(unsigned) ");
96 if (tm.is_short) printf("(short) ");
97 for (i=0 ; i<tm.is_long ; i++) printf("(long) ");
99 if(tm.is_struct) printf("(struct) ");
100 if(tm.is_enum) printf("(enum) ");
101 if(tm.is_union) printf("(union) ");
103 for (i=0 ; i<tm.is_ref ; i++) printf("(ref) ");
107 static gras_error_t change_to_fixed_array(gras_dynar_t *dynar, long int size) {
108 gras_error_t errcode;
109 identifier_t former,array;
110 memset(&array,0,sizeof(array));
113 gras_dynar_pop(dynar,&former);
114 array.type_name=(char*)gras_malloc(strlen(former.type->name)+20);
115 DEBUG2("Array specification (size=%ld, elm='%s'), change pushed type",
116 size,former.type_name);
117 sprintf(array.type_name,"%s[%ld]",former.type_name,size);
118 gras_free(former.type_name);
120 TRY(gras_datadesc_array_fixed(array.type_name, former.type, size, &array.type)); /* redeclaration are ignored */
122 array.name = former.name;
124 TRY(gras_dynar_push(dynar,&array));
128 static gras_error_t change_to_ref(gras_dynar_t *dynar) {
129 gras_error_t errcode;
130 identifier_t former,ref;
131 memset(&ref,0,sizeof(ref));
134 gras_dynar_pop(dynar,&former);
135 ref.type_name=(char*)gras_malloc(strlen(former.type->name)+2);
136 DEBUG1("Ref specification (elm='%s'), change pushed type", former.type_name);
137 sprintf(ref.type_name,"%s*",former.type_name);
138 gras_free(former.type_name);
140 TRY(gras_datadesc_ref(ref.type_name, former.type, &ref.type)); /* redeclaration are ignored */
142 ref.name = former.name;
144 TRY(gras_dynar_push(dynar,&ref));
149 static gras_error_t change_to_ref_pop_array(gras_dynar_t *dynar) {
150 gras_error_t errcode;
151 identifier_t former,ref;
152 memset(&ref,0,sizeof(ref));
155 gras_dynar_pop(dynar,&former);
156 TRY(gras_datadesc_ref_pop_arr(former.type,&ref.type)); /* redeclaration are ignored */
157 ref.type_name = strdup(ref.type->name);
158 ref.name = former.name;
160 gras_free(former.type_name);
162 TRY(gras_dynar_push(dynar,&ref));
167 static gras_error_t parse_statement(char *definition,
168 gras_dynar_t *identifiers,
169 gras_dynar_t *fields_to_push) {
170 gras_error_t errcode;
173 identifier_t identifier;
175 int expect_id_separator = 0;
178 memset(&identifier,0,sizeof(identifier));
180 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
181 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RA) {
183 return mismatch_error; /* end of the englobing structure or union */
186 if (GRAS_LOG_ISENABLED(ddt_parse,gras_log_priority_debug)) {
188 for (colon_pos = gras_ddt_parse_col_pos;
189 definition[colon_pos] != ';';
191 definition[colon_pos] = '\0';
192 DEBUG3("Parse the statement \"%s%s;\" (col_pos=%d)",
194 definition+gras_ddt_parse_col_pos,
195 gras_ddt_parse_col_pos);
196 definition[colon_pos] = ';';
199 if(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
200 PARSE_ERROR1("Unparsable symbol: found a typeless statement (got '%s' instead)",
201 gras_ddt_parse_text);
203 /**** get the type modifier of this statement ****/
204 parse_type_modifier(&identifier.tm);
206 /* FIXME: This does not detect recursive definitions at all? */
207 if (identifier.tm.is_union || identifier.tm.is_enum || identifier.tm.is_struct)
208 PARSE_ERROR0("Cannot handle recursive type definition yet");
210 /**** get the base type, giving "short a" the needed love ****/
211 if (!identifier.tm.is_union &&
212 !identifier.tm.is_enum &&
213 !identifier.tm.is_struct &&
215 (identifier.tm.is_short || identifier.tm.is_long || identifier.tm.is_unsigned) &&
217 strcmp(gras_ddt_parse_text,"char") &&
218 strcmp(gras_ddt_parse_text,"float") &&
219 strcmp(gras_ddt_parse_text,"double") &&
220 strcmp(gras_ddt_parse_text,"int") ) {
222 /* bastard user, they omited "int" ! */
223 identifier.type_name=strdup("int");
224 DEBUG0("the base type is 'int', which were omited (you vicious user)");
226 identifier.type_name=strdup(gras_ddt_parse_text);
227 DEBUG1("the base type is '%s'",identifier.type_name);
228 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
231 /**** build the base type for latter use ****/
232 if (identifier.tm.is_union) {
233 PARSE_ERROR0("Cannot handle union yet (get callback from annotation?)");
235 } else if (identifier.tm.is_enum) {
236 PARSE_ERROR0("Cannot handle enum yet");
238 } else if (identifier.tm.is_struct) {
239 sprintf(buffname,"struct %s",identifier.type_name);
240 TRY(gras_datadesc_struct(buffname,&identifier.type)); /* Get created when does not exist */
242 } else if (identifier.tm.is_unsigned) {
243 if (!strcmp(identifier.type_name,"int")) {
244 if (identifier.tm.is_long == 2) {
245 identifier.type = gras_datadesc_by_name("unsigned long long int");
246 } else if (identifier.tm.is_long) {
247 identifier.type = gras_datadesc_by_name("unsigned long int");
248 } else if (identifier.tm.is_short) {
249 identifier.type = gras_datadesc_by_name("unsigned short int");
251 identifier.type = gras_datadesc_by_name("unsigned int");
254 } else if (!strcmp(identifier.type_name, "char")) {
255 identifier.type = gras_datadesc_by_name("unsigned char");
257 } else { /* impossible, gcc parses this shit before us */
261 } else if (!strcmp(identifier.type_name, "float")) {
262 /* no modificator allowed by gcc */
263 identifier.type = gras_datadesc_by_name("float");
265 } else if (!strcmp(identifier.type_name, "double")) {
266 if (identifier.tm.is_long)
267 PARSE_ERROR0("long double not portable and thus not handled");
269 identifier.type = gras_datadesc_by_name("double");
271 } else { /* signed integer elemental */
272 if (!strcmp(identifier.type_name,"int")) {
273 if (identifier.tm.is_long == 2) {
274 identifier.type = gras_datadesc_by_name("signed long long int");
275 } else if (identifier.tm.is_long) {
276 identifier.type = gras_datadesc_by_name("signed long int");
277 } else if (identifier.tm.is_short) {
278 identifier.type = gras_datadesc_by_name("signed short int");
280 identifier.type = gras_datadesc_by_name("int");
283 } else if (!strcmp(identifier.type_name, "char")) {
284 identifier.type = gras_datadesc_by_name("char");
287 DEBUG1("Base type is a constructed one (%s)",identifier.type_name);
288 identifier.type = gras_datadesc_by_name(identifier.type_name);
289 if (!identifier.type)
290 PARSE_ERROR1("Unknown base type '%s'",identifier.type_name);
293 /* Now identifier.type and identifier.name speak about the base type.
294 Stars are not eaten unless 'int' was omitted.
295 We will have to enhance it if we are in fact asked for array or reference */
297 /**** look for the symbols of this type ****/
298 for(expect_id_separator = 0;
300 (//(gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_EMPTY) && FIXME
301 (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_SEMI_COLON)) ;
303 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump() ) {
305 if(expect_id_separator) {
306 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
307 expect_id_separator = 0;
310 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_LB) {
311 /* Handle fixed size arrays */
312 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
313 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_RB) {
314 PARSE_ERROR0("Cannot deal with [] constructs (yet)");
316 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
318 long int size=strtol(gras_ddt_parse_text, &end, 10);
320 if (end == gras_ddt_parse_text || *end != '\0')
321 PARSE_ERROR1("Unparsable size of array (found '%c', expected number)",*end);
323 /* replace the previously pushed type to an array of it */
324 TRY(change_to_fixed_array(identifiers,size));
326 /* eat the closing bracket */
327 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
328 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RB)
329 PARSE_ERROR0("Unparsable size of array");
330 DEBUG1("Fixed size array, size=%ld",size);
333 PARSE_ERROR0("Unparsable size of array");
335 /* End of fixed size arrays handling */
337 } else if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
338 /* Handle annotation */
340 char *keyname = NULL;
342 memset(&array,0,sizeof(array));
343 if (strcmp(gras_ddt_parse_text,"GRAS_ANNOTE"))
344 PARSE_ERROR1("Unparsable symbol: Expected 'GRAS_ANNOTE', got '%s'",gras_ddt_parse_text);
346 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
347 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LP)
348 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
350 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
352 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
353 PARSE_ERROR1("Unparsable annotation: Expected key name, got '%s'",gras_ddt_parse_text);
354 keyname = strdup(gras_ddt_parse_text);
356 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
358 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_COLON)
359 PARSE_ERROR1("Unparsable annotation: expected ',' after the key name, got '%s'",gras_ddt_parse_text);
361 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
363 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
364 PARSE_ERROR1("Unparsable annotation: Expected key value, got '%s'",gras_ddt_parse_text);
365 keyval = strdup(gras_ddt_parse_text);
367 while ( (gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump()) == GRAS_DDT_PARSE_TOKEN_EMPTY );
369 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RP)
370 PARSE_ERROR1("Unparsable annotation: Expected parenthesis, got '%s'",gras_ddt_parse_text);
372 /* Done with parsing the annotation. Now deal with it by replacing previously pushed type with the right one */
374 DEBUG2("Anotation: %s=%s",keyname,keyval);
375 if (!strcmp(keyname,"size")) {
377 if (!identifier.tm.is_ref)
378 PARSE_ERROR0("Size annotation for a field not being a reference");
379 identifier.tm.is_ref--;
381 if (!strcmp(keyval,"1")) {
382 TRY(change_to_ref(identifiers));
388 for (p = keyval; *p != '\0'; p++)
392 TRY(change_to_fixed_array(identifiers,atoi(keyval)));
393 TRY(change_to_ref(identifiers));
398 TRY(change_to_ref_pop_array(identifiers));
399 TRY(gras_dynar_push(fields_to_push,&keyval));
406 PARSE_ERROR1("Unknown annotation type: '%s'",keyname);
410 /* End of annotation handling */
412 PARSE_ERROR1("Unparsable symbol: Got '%s' instead of expected comma (',')",gras_ddt_parse_text);
414 } else if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_COLON) {
415 PARSE_ERROR0("Unparsable symbol: Unexpected comma (',')");
418 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_STAR) {
419 identifier.tm.is_ref++; /* We indeed deal with multiple references with multiple annotations */
423 /* found a symbol name. Build the type and push it to dynar */
424 if(gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
426 identifier.name=strdup(gras_ddt_parse_text);
427 DEBUG1("Found the identifier \"%s\"",identifier.name);
429 TRY(gras_dynar_push(identifiers, &identifier));
430 DEBUG1("Dynar_len=%d",gras_dynar_length(identifiers));
431 expect_id_separator = 1;
435 PARSE_ERROR0("Unparasable symbol (maybe a def struct in a def struct or a parser bug ;)");
442 static gras_datadesc_type_t *parse_struct(char *definition) {
444 gras_error_t errcode;
446 static int anonymous_struct=0;
448 gras_dynar_t *identifiers;
452 gras_dynar_t *fields_to_push;
455 gras_datadesc_type_t *struct_type;
458 errcode=gras_dynar_new(&identifiers,sizeof(identifier_t),NULL);
459 errcode=gras_dynar_new(&fields_to_push,sizeof(char*),NULL);
460 if (errcode != no_error) {
465 /* Create the struct descriptor */
466 if (gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) {
467 TRYFAIL(gras_datadesc_struct(gras_ddt_parse_text,&struct_type));
468 VERB1("Parse the struct '%s'", gras_ddt_parse_text);
469 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
471 sprintf(buffname,"anonymous struct %d",anonymous_struct++);
472 VERB1("Parse the anonymous struct nb %d", anonymous_struct);
473 TRYFAIL(gras_datadesc_struct(buffname,&struct_type));
476 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_LA)
477 PARSE_ERROR1("Unparasable symbol: Expecting struct definition, but got %s instead of '{'",
478 gras_ddt_parse_text);
480 /* Parse the identifiers */
481 for (errcode=parse_statement(definition,identifiers,fields_to_push);
482 errcode == no_error ;
483 errcode=parse_statement(definition,identifiers,fields_to_push)) {
485 DEBUG1("This statement contained %d identifiers",gras_dynar_length(identifiers));
486 /* append the identifiers we've found */
487 gras_dynar_foreach(identifiers,i, field) {
489 PARSE_ERROR2("Not enough GRAS_ANNOTATE to deal with all dereferencing levels of %s (%d '*' left)",
490 field.name,field.tm.is_ref);
492 VERB2("Append field '%s' to %p",field.name, struct_type);
493 TRYFAIL(gras_datadesc_struct_append(struct_type, field.name, field.type));
494 gras_free(field.name);
495 gras_free(field.type_name);
498 gras_dynar_reset(identifiers);
499 DEBUG1("struct_type=%p",struct_type);
501 /* Make sure that all fields declaring a size push it into the cbps */
502 gras_dynar_foreach(fields_to_push,i, name) {
503 DEBUG1("struct_type=%p",struct_type);
504 VERB2("Push field '%s' into size stack of %p", name, struct_type);
505 gras_datadesc_cb_field_push(struct_type, name);
508 gras_dynar_reset(fields_to_push);
510 gras_datadesc_struct_close(struct_type);
511 if (errcode != mismatch_error) {
513 return NULL; /* FIXME: LEAK! */
517 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_RA)
518 PARSE_ERROR1("Unparasable symbol: Expected '}' at the end of struct definition, got '%s'",
519 gras_ddt_parse_text);
521 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
523 gras_dynar_free(identifiers);
524 gras_dynar_free(fields_to_push);
529 static gras_datadesc_type_t * parse_typedef(char *definition) {
533 gras_datadesc_type_t *struct_desc=NULL;
534 gras_datadesc_type_t *typedef_desc=NULL;
537 memset(&tm,0,sizeof(tm));
539 /* get the aliased type */
540 parse_type_modifier(&tm);
543 struct_desc = parse_struct(definition);
546 parse_type_modifier(&tm);
549 PARSE_ERROR0("Cannot handle reference without annotation");
551 /* get the aliasing name */
552 if (gras_ddt_parse_tok_num != GRAS_DDT_PARSE_TOKEN_WORD)
553 PARSE_ERROR1("Unparsable typedef: Expected the alias name, and got '%s'",
554 gras_ddt_parse_text);
556 /* (FIXME: should) build the alias */
557 PARSE_ERROR0("Cannot handle typedef yet");
565 * gras_datadesc_parse:
567 * Create a datadescription from the result of parsing the C type description
569 gras_datadesc_type_t *
570 gras_datadesc_parse(const char *name,
571 const char *C_statement) {
573 gras_datadesc_type_t * res=NULL;
575 int semicolon_count=0;
576 int def_count,C_count;
579 /* reput the \n in place for debug */
580 for (C_count=0; C_statement[C_count] != '\0'; C_count++)
581 if (C_statement[C_count] == ';' || C_statement[C_count] == '{')
583 definition = (char*)gras_malloc(C_count + semicolon_count + 1);
584 for (C_count=0,def_count=0; C_statement[C_count] != '\0'; C_count++) {
585 definition[def_count++] = C_statement[C_count];
586 if (C_statement[C_count] == ';' || C_statement[C_count] == '{') {
587 definition[def_count++] = '\n';
590 definition[def_count] = '\0';
593 VERB2("_gras_ddt_type_parse(%s) -> %d chars",definition, def_count);
594 gras_ddt_parse_pointer_string_init(definition);
596 /* Do I have a typedef, or a raw struct ?*/
597 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
599 if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"struct"))) {
600 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
601 res = parse_struct(definition);
603 } else if ((gras_ddt_parse_tok_num == GRAS_DDT_PARSE_TOKEN_WORD) && (!strcmp(gras_ddt_parse_text,"typedef"))) {
604 gras_ddt_parse_tok_num = gras_ddt_parse_lex_n_dump();
605 res = parse_typedef(definition);
608 ERROR1("Failed to parse the following symbol (not a struct neither a typedef) :\n%s",definition);
612 gras_ddt_parse_pointer_string_close();
613 VERB0("end of _gras_ddt_type_parse()");
614 gras_free(definition);
615 /* register it under the name provided as symbol */
616 if (strcmp(res->name,name)) {
617 ERROR2("In GRAS_DEFINE_TYPE, the provided symbol (here %s) must be the C type name (here %s)",