diff options
-rw-r--r-- | .gitignore | 9 | ||||
-rw-r--r-- | CMakeLists.txt | 18 | ||||
-rw-r--r-- | GnuLdLexer.g4 | 209 | ||||
-rw-r--r-- | GnuLdParser.g4 | 1345 | ||||
-rw-r--r-- | README.md | 23 | ||||
-rw-r--r-- | elfinfo.cpp | 271 | ||||
-rw-r--r-- | ld.cpp | 267 | ||||
-rw-r--r-- | ld.h | 11 |
8 files changed, 2153 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..85f656f --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +.idea +*.iws +*.jar +*.tokens +build +/antlr4 +/antlr4-build +/antlr4-install +/target diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..2749ba8 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.2) + +project(elfinfo CXX ASM) + +# If this fails, check out README.md +find_package(Antlr4) + +antlr4_add_target(TARGET GnuLd STATIC LEXER GnuLdLexer.g4 PARSER GnuLdParser.g4) + +add_executable(elfinfo elfinfo.cpp ld.cpp ld.h) +target_compile_options(elfinfo PUBLIC "--std=c++14") +target_link_libraries(elfinfo elf GnuLd Antlr4::antlr4_shared) + +INSTALL(TARGETS elfinfo + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib +) diff --git a/GnuLdLexer.g4 b/GnuLdLexer.g4 new file mode 100644 index 0000000..fba488e --- /dev/null +++ b/GnuLdLexer.g4 @@ -0,0 +1,209 @@ +lexer grammar GnuLdLexer; + + +//%token <token> +LBRACE : '{'; +RBRACE : '}'; + +// <integer> +// <token> assign_op atype attributes_opt sect_constraint opt_align_with_input +// <name> filename +//%type <versyms> vers_defns +//%type <versnode> vers_tag +//%type <deflist> verdep + +ABSOLUTE : 'ABSOLUTE'; +ADDR : 'ADDR'; +AFTER : 'AFTER'; +ALIAS : 'ALIAS'; +//ALIGN : 'ALIGN'; +ALIGN_K : 'ALIGN'; +ALIGNMOD : 'ALIGNMOD'; +ALIGNOF : 'ALIGNOF'; +ALIGN_WITH_INPUT : 'ALIGN_WITH_INPUT'; +AS_NEEDED : 'AS_NEEDED'; +ASSERT_K : 'ASSERT'; +AT : 'AT'; +BASE : 'BASE'; +BEFORE : 'BEFORE'; +BIND : 'BIND'; +BLOCK : 'BLOCK'; +BYTE : 'BYTE'; +CASE : 'CASE'; +CHIP : 'CHIP'; +CONSTANT : 'CONSTANT'; +CONSTRUCTORS : 'CONSTRUCTORS'; +COPY : 'COPY'; +CREATE_OBJECT_SYMBOLS : 'CREATE_OBJECT_SYMBOLS'; +DATA_SEGMENT_ALIGN : 'DATA_SEGMENT_ALIGN'; +DATA_SEGMENT : 'DATA_SEGMENT'; +DATA_SEGMENT_END : 'DATA_SEGMENT_END'; +DATA_SEGMENT_RELRO_END : 'DATA_SEGMENT_RELRO_END'; +DEFINED : 'DEFINED'; +DEFSYMEND : 'DEFSYMEND'; +DSECT : 'DSECT'; +ENDWORD : 'ENDWORD'; +ENTRY : 'ENTRY'; +EXCLUDE_FILE : 'EXCLUDE_FILE'; +EXTERN : 'EXTERN'; +FILL : 'FILL'; +FLOAT : 'FLOAT'; +FORCE_COMMON_ALLOCATION : 'FORCE_COMMON_ALLOCATION'; +FORMAT : 'FORMAT'; +GLOBAL : 'GLOBAL'; +GROUP : 'GROUP'; +HIDDEN_ : 'HIDDEN'; +HLL : 'HLL'; +INCLUDE : 'INCLUDE'; +INFO : 'INFO'; +INHIBIT_COMMON_ALLOCATION : 'INHIBIT_COMMON_ALLOCATION'; +//INPUT_DEFSYM : 'INPUT_DEFSYM'; +//INPUT_DYNAMIC_LIST : 'INPUT_DYNAMIC_LIST'; +INPUT : 'INPUT'; +//INPUT_MRI_SCRIPT : 'INPUT_MRI_SCRIPT'; +//INPUT_SCRIPT : 'INPUT_SCRIPT'; +//INPUT_SECTION_FLAGS : 'INPUT_SECTION_FLAGS'; +//INPUT_VERSION_SCRIPT : 'INPUT_VERSION_SCRIPT'; +INSERT_K : 'INSERT'; +KEEP : 'KEEP'; +LD_FEATURE : 'LD_FEATURE'; +LENGTH : 'LENGTH'; +LIST : 'LIST'; +LOADADDR : 'LOADADDR'; +LOAD : 'LOAD'; +LOCAL : 'LOCAL'; +LOG2CEIL : 'LOG2CEIL'; +LONG : 'LONG'; +MAP : 'MAP'; +MAX_K : 'MAX'; +MEMORY : 'MEMORY'; +MIN_K : 'MIN'; +NAMEWORD : 'NAMEWORD'; +NEWLINE : 'NEWLINE'; +NEXT : 'NEXT'; +NOCROSSREFS : 'NOCROSSREFS'; +NOCROSSREFS_TO : 'NOCROSSREFS_TO'; +NOFLOAT : 'NOFLOAT'; +NOLOAD : 'NOLOAD'; +ONLY_IF_RO : 'ONLY_IF_RO'; +ONLY_IF_RW : 'ONLY_IF_RW'; +ORDER : 'ORDER'; +ORIGIN : 'ORIGIN'; // TODO: or 'org' or 'o'. +OUTPUT_ARCH : 'OUTPUT_ARCH'; +OUTPUT_FORMAT : 'OUTPUT_FORMAT'; +OUTPUT : 'OUTPUT'; +OVERLAY : 'OVERLAY'; +PHDRS : 'PHDRS'; +PROVIDE_HIDDEN : 'PROVIDE_HIDDEN'; +PROVIDE : 'PROVIDE'; +PUBLIC : 'PUBLIC'; +QUAD : 'QUAD'; +REGION_ALIAS : 'REGION_ALIAS'; +REL : 'REL'; +SEARCH_DIR : 'SEARCH_DIR'; +SECTIONS : 'SECTIONS'; +SECT : 'SECT'; +SEGMENT_START : 'SEGMENT_START'; +SHORT : 'SHORT'; +SIZEOF_HEADERS : 'SIZEOF_HEADERS'; +SIZEOF : 'SIZEOF'; +SORT_BY_ALIGNMENT : 'SORT_BY_ALIGNMENT'; +SORT_BY_INIT_PRIORITY : 'SORT_BY_INIT_PRIORITY'; +SORT_BY_NAME : 'SORT_BY_NAME'; +SORT_NONE : 'SORT_NONE'; +SPECIAL : 'SPECIAL'; +SQUAD : 'SQUAD'; +START : 'START'; +STARTUP : 'STARTUP'; +SUBALIGN : 'SUBALIGN'; +SYSLIB : 'SYSLIB'; +TARGET_K : 'TARGET'; +TRUNCATE : 'TRUNCATE'; +VERS_IDENTIFIER : 'VERS_IDENTIFIER'; +VERSIONK : 'VERSIONK'; +VERS_TAG : 'VERS_TAG'; + +/* +Names are very liberal, they can be full strings and start with a dot. +*/ + +QUOTE : '"' -> skip, pushMode(STRING); + +//SPACE : ' '; +//SPACES : ' '+; + +//name : +// '"' (NAME | SPACE | SPACES)+ '"' # nameQuoted +// | NAME # namePlain; + +//NAME : [\._a-zA-Z][\._a-zA-Z0-9]*; +NAME : [*\._a-zA-Z][*\.\/_a-zA-Z0-9]*; + +// TODO: ld supports some really fancy expressions here, like "0101010b", "ffH", "ffx", "$Aa" etc +//INT : '0x' [0-9a-fA-F]+ +// | [0-9]+; +INT : INT_NUMBER INT_SIZE?; +fragment +INT_NUMBER : INT_HEX + | INT_DECIMAL; +fragment +INT_HEX : '0x' [0-9a-fA-F]+; +fragment +INT_DECIMAL : [0-9]+; +fragment +INT_SIZE : 'M' | 'm' | 'K' | 'k'; + +END : 'END'; + +LNAME : '-l' NAME; +PLUSEQ : '+='; +MINUSEQ : '-='; +MULTEQ : '*='; +DIVEQ : '/='; +LSHIFTEQ : '<<='; +RSHIFTEQ : '>>='; +ANDEQ : '&='; +OREQ : '|='; +LSHIFT : '<<'; +RSHIFT : '>>'; +EQEQ : '=='; +EQ : '='; +NE : '!='; +LE : '<='; +GE : '>='; +ANDAND : '&&'; +OROR : '||'; + +// Extra tokens +COLON : ':'; +EXLAMATION : '!'; +DASH : '-'; +PLUS : '+'; +TILDE : '~'; +SLASH : '/'; +MOD : '%'; +LT : '<'; +GT : '>'; +HAT : '^'; +BAR : '|'; +COMMA : ','; +SEMICOLON : ';'; +LPAREN : '('; +RPAREN : ')'; +STAR : '*'; +QUESTION : '?'; +AMPERSAND : '&'; +LBRACKET : '['; +RBRACKET : ']'; + +BlockComment + : '/*' .*? '*/' -> skip + ; + +WS + : [ \t\r\n]+ -> skip + ; + +mode STRING; +STRING_ANY : ~'"'; +STRING_END_QUOTE : '"' -> skip, popMode; diff --git a/GnuLdParser.g4 b/GnuLdParser.g4 new file mode 100644 index 0000000..1ed1119 --- /dev/null +++ b/GnuLdParser.g4 @@ -0,0 +1,1345 @@ +parser grammar GnuLdParser; + +options { + tokenVocab = GnuLdLexer; +} + +/* +TODO: check right associative annotations +*/ + +/* +%token <bigint> INT +%token <name> NAME LNAME +*/ + +/* +%right UNARY +%token END +%left <token> LPAREN +*/ + + +file: + /*INPUT_SCRIPT*/ script_file + | /*INPUT_MRI_SCRIPT*/ mri_script_file + | /*INPUT_VERSION_SCRIPT*/ version_script_file + | /*INPUT_DYNAMIC_LIST*/ dynamic_list_file + | /*INPUT_DEFSYM*/ defsym_expr + ; + +filename: NAME; + + +defsym_expr: +// { ldlex_defsym(); } + NAME EQ exp +// { +// ldlex_popstate(); +// lang_add_assignment (exp_defsym ($2, $4)); +// } + ; + +/* SYNTAX WITHIN AN MRI SCRIPT FILE */ +mri_script_file: +// { +// ldlex_mri_script (); +// PUSH_ERROR (_("MRI style script")); +// } + mri_script_lines +// { +// ldlex_popstate (); +// mri_draw_tree (); +// POP_ERROR (); +// } + ; + +mri_script_lines: + mri_script_lines mri_script_command NEWLINE + | + ; + +mri_script_command: + CHIP exp + | CHIP exp COMMA exp + | NAME /*{ + einfo(_("%P%F: unrecognised keyword in MRI style script '%s'\n"),$1); + }*/ + | LIST /*{ + config.map_filename = "-"; + }*/ + | ORDER ordernamelist + | ENDWORD + | PUBLIC NAME EQ exp + // { mri_public($2, $4); } + | PUBLIC NAME COMMA exp + // { mri_public($2, $4); } + | PUBLIC NAME exp + // { mri_public($2, $3); } + | FORMAT NAME + // { mri_format($2); } + | SECT NAME COMMA exp + // { mri_output_section($2, $4);} + | SECT NAME exp + // { mri_output_section($2, $3);} + | SECT NAME EQ exp + // { mri_output_section($2, $4);} + | ALIGN_K NAME EQ exp + // { mri_align($2,$4); } + | ALIGN_K NAME COMMA exp + // { mri_align($2,$4); } + | ALIGNMOD NAME EQ exp + // { mri_alignmod($2,$4); } + | ALIGNMOD NAME COMMA exp + // { mri_alignmod($2,$4); } + | ABSOLUTE mri_abs_name_list + | LOAD mri_load_name_list + | NAMEWORD NAME + // { mri_name($2); } + | ALIAS NAME COMMA NAME + // { mri_alias($2,$4,0);} + | ALIAS NAME COMMA INT + // { mri_alias ($2, 0, (int) $4.integer); } + | BASE exp + // { mri_base($2); } + | TRUNCATE INT + // { mri_truncate ((unsigned int) $2.integer); } + | CASE casesymlist + | EXTERN extern_name_list + | INCLUDE filename + // { ldlex_script (); ldfile_open_command_file($2); } + mri_script_lines END + // { ldlex_popstate (); } + | START NAME + // { lang_add_entry ($2, FALSE); } + | + ; + +ordernamelist: + ordernamelist COMMA NAME // { mri_order($3); } + | ordernamelist NAME // { mri_order($2); } + | + ; + +mri_load_name_list: + NAME + // { mri_load($1); } + | mri_load_name_list COMMA NAME // { mri_load($3); } + ; + +mri_abs_name_list: + NAME + // { mri_only_load($1); } + | mri_abs_name_list COMMA NAME + // { mri_only_load($3); } + ; + +casesymlist: + /* empty */ // { $$ = NULL; } + | NAME + | casesymlist COMMA NAME + ; + +/* Parsed as expressions so that commas separate entries */ +extern_name_list: + // { ldlex_expression (); } + extern_name_list_body + // { ldlex_popstate (); } + ; + +extern_name_list_body: + NAME + // { ldlang_add_undef ($1, FALSE); } + | extern_name_list_body NAME + // { ldlang_add_undef ($2, FALSE); } + | extern_name_list_body COMMA NAME + // { ldlang_add_undef ($3, FALSE); } + ; + +script_file: + // { ldlex_both(); } + ifile_list + // { ldlex_popstate(); } + ; + +ifile_list: + ifile_list ifile_p1 + | + ; + + +ifile_p1: + memory + | sections + | phdrs + | startup + | high_level_library + | low_level_library + | floating_point_support + | statement_anywhere + | version + | SEMICOLON + | TARGET_K LPAREN NAME RPAREN + // { lang_add_target($3); } + | SEARCH_DIR LPAREN filename RPAREN + // { ldfile_add_library_path ($3, FALSE); } + | OUTPUT LPAREN filename RPAREN + // { lang_add_output($3, 1); } + | OUTPUT_FORMAT LPAREN NAME RPAREN + // { lang_add_output_format ($3, (char *) NULL, + // (char *) NULL, 1); } + | OUTPUT_FORMAT LPAREN NAME COMMA NAME COMMA NAME RPAREN + // { lang_add_output_format ($3, $5, $7, 1); } + | OUTPUT_ARCH LPAREN NAME RPAREN + // { ldfile_set_output_arch ($3, bfd_arch_unknown); } + | FORCE_COMMON_ALLOCATION + // { command_line.force_common_definition = TRUE ; } + | INHIBIT_COMMON_ALLOCATION + // { command_line.inhibit_common_definition = TRUE ; } + | INPUT LPAREN input_list RPAREN + | GROUP + // { lang_enter_group (); } + LPAREN input_list RPAREN + // { lang_leave_group (); } + | MAP LPAREN filename RPAREN + // { lang_add_map($3); } + | INCLUDE filename + // { ldlex_script (); ldfile_open_command_file($2); } + ifile_list END + // { ldlex_popstate (); } + | NOCROSSREFS LPAREN nocrossref_list RPAREN + // { + // lang_add_nocrossref ($3); + // } + | NOCROSSREFS_TO LPAREN nocrossref_list RPAREN + // { + // lang_add_nocrossref_to ($3); + // } + | EXTERN LPAREN extern_name_list RPAREN + | INSERT_K AFTER NAME + // { lang_add_insert ($3, 0); } + | INSERT_K BEFORE NAME + // { lang_add_insert ($3, 1); } + | REGION_ALIAS LPAREN NAME COMMA NAME RPAREN + // { lang_memory_region_alias ($3, $5); } + | LD_FEATURE LPAREN NAME RPAREN + // { lang_ld_feature ($3); } + ; + +input_list: + // { ldlex_inputlist(); } + input_list1 + // { ldlex_popstate(); } + ; + +input_list1: + NAME + // { lang_add_input_file($1,lang_input_file_is_search_file_enum, + // (char *)NULL); } + | input_list1 COMMA NAME + // { lang_add_input_file($3,lang_input_file_is_search_file_enum, + // (char *)NULL); } + | input_list1 NAME + // { lang_add_input_file($2,lang_input_file_is_search_file_enum, + // (char *)NULL); } + | LNAME + // { lang_add_input_file($1,lang_input_file_is_l_enum, + // (char *)NULL); } + | input_list1 COMMA LNAME + // { lang_add_input_file($3,lang_input_file_is_l_enum, + // (char *)NULL); } + | input_list1 LNAME + // { lang_add_input_file($2,lang_input_file_is_l_enum, + // (char *)NULL); } + | AS_NEEDED LPAREN + // { $<integer>$ = input_flags.add_DT_NEEDED_for_regular; + // input_flags.add_DT_NEEDED_for_regular = TRUE; } + // input_list1 RPAREN + // { input_flags.add_DT_NEEDED_for_regular = $<integer>3; } + | input_list1 COMMA AS_NEEDED LPAREN + // { $<integer>$ = input_flags.add_DT_NEEDED_for_regular; + // input_flags.add_DT_NEEDED_for_regular = TRUE; } + // input_list1 RPAREN + // { input_flags.add_DT_NEEDED_for_regular = $<integer>5; } + | input_list1 AS_NEEDED LPAREN + // { $<integer>$ = input_flags.add_DT_NEEDED_for_regular; + // input_flags.add_DT_NEEDED_for_regular = TRUE; } + // input_list1 RPAREN + // { input_flags.add_DT_NEEDED_for_regular = $<integer>4; } + ; + +sections: + SECTIONS LBRACE sec_or_group_p1 RBRACE + ; + +sec_or_group_p1: + sec_or_group_p1 section + | sec_or_group_p1 statement_anywhere + | + ; + +statement_anywhere: + ENTRY LPAREN NAME RPAREN + // { lang_add_entry ($3, FALSE); } + | assignment end + | ASSERT_K /*{ldlex_expression ();}*/ LPAREN exp COMMA string RPAREN + // { ldlex_popstate (); + // lang_add_assignment (exp_assert ($4, $6)); } + ; + +/* The '*' and '?' cases are there because the lexer returns them as + separate tokens rather than as name. */ +wildcard_name: + NAME + // { + // $$ = $1; + // } + | STAR + // { + // $$ = "*"; + // } + | QUESTION + // { + // $$ = "?"; + // } + ; + +wildcard_spec: + wildcard_name +// { +// $$.name = $1; +// $$.sorted = none; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | EXCLUDE_FILE LPAREN exclude_name_list RPAREN wildcard_name +// { +// $$.name = $5; +// $$.sorted = none; +// $$.exclude_name_list = $3; +// $$.section_flag_list = NULL; +// } + | SORT_BY_NAME LPAREN wildcard_name RPAREN +// { +// $$.name = $3; +// $$.sorted = by_name; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | SORT_BY_ALIGNMENT LPAREN wildcard_name RPAREN +// { +// $$.name = $3; +// $$.sorted = by_alignment; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | SORT_NONE LPAREN wildcard_name RPAREN +// { +// $$.name = $3; +// $$.sorted = by_none; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | SORT_BY_NAME LPAREN SORT_BY_ALIGNMENT LPAREN wildcard_name RPAREN RPAREN +// { +// $$.name = $5; +// $$.sorted = by_name_alignment; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | SORT_BY_NAME LPAREN SORT_BY_NAME LPAREN wildcard_name RPAREN RPAREN +// { +// $$.name = $5; +// $$.sorted = by_name; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | SORT_BY_ALIGNMENT LPAREN SORT_BY_NAME LPAREN wildcard_name RPAREN RPAREN +// { +// $$.name = $5; +// $$.sorted = by_alignment_name; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | SORT_BY_ALIGNMENT LPAREN SORT_BY_ALIGNMENT LPAREN wildcard_name RPAREN RPAREN +// { +// $$.name = $5; +// $$.sorted = by_alignment; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | SORT_BY_NAME LPAREN EXCLUDE_FILE LPAREN exclude_name_list RPAREN wildcard_name RPAREN +// { +// $$.name = $7; +// $$.sorted = by_name; +// $$.exclude_name_list = $5; +// $$.section_flag_list = NULL; +// } + | SORT_BY_INIT_PRIORITY LPAREN wildcard_name RPAREN +// { +// $$.name = $3; +// $$.sorted = by_init_priority; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + ; + +sect_flag_list: NAME +// { +// struct flag_info_list *n; +// n = ((struct flag_info_list *) xmalloc (sizeof *n)); +// if ($1[0] == '!') +// { +// n->with = without_flags; +// n->name = &$1[1]; +// } +// else +// { +// n->with = with_flags; +// n->name = $1; +// } +// n->valid = FALSE; +// n->next = NULL; +// $$ = n; +// } + | sect_flag_list AMPERSAND NAME +// { +// struct flag_info_list *n; +// n = ((struct flag_info_list *) xmalloc (sizeof *n)); +// if ($3[0] == '!') +// { +// n->with = without_flags; +// n->name = &$3[1]; +// } +// else +// { +// n->with = with_flags; +// n->name = $3; +// } +// n->valid = FALSE; +// n->next = $1; +// $$ = n; +// } + ; + +sect_flags: + /*not used by antlr: INPUT_SECTION_FLAGS*/ LPAREN sect_flag_list RPAREN +// { +// struct flag_info *n; +// n = ((struct flag_info *) xmalloc (sizeof *n)); +// n->flag_list = $3; +// n->flags_initialized = FALSE; +// n->not_with_flags = 0; +// n->only_with_flags = 0; +// $$ = n; +// } + ; + +exclude_name_list: + exclude_name_list wildcard_name +// { +// struct name_list *tmp; +// tmp = (struct name_list *) xmalloc (sizeof *tmp); +// tmp->name = $2; +// tmp->next = $1; +// $$ = tmp; +// } + | + wildcard_name +// { +// struct name_list *tmp; +// tmp = (struct name_list *) xmalloc (sizeof *tmp); +// tmp->name = $1; +// tmp->next = NULL; +// $$ = tmp; +// } + ; + +file_name_list: + file_name_list opt_comma wildcard_spec +// { +// struct wildcard_list *tmp; +// tmp = (struct wildcard_list *) xmalloc (sizeof *tmp); +// tmp->next = $1; +// tmp->spec = $3; +// $$ = tmp; +// } + | + wildcard_spec +// { +// struct wildcard_list *tmp; +// tmp = (struct wildcard_list *) xmalloc (sizeof *tmp); +// tmp->next = NULL; +// tmp->spec = $1; +// $$ = tmp; +// } + ; + +input_section_spec_no_keep: + NAME +// { +// struct wildcard_spec tmp; +// tmp.name = $1; +// tmp.exclude_name_list = NULL; +// tmp.sorted = none; +// tmp.section_flag_list = NULL; +// lang_add_wild (&tmp, NULL, ldgram_had_keep); +// } + | sect_flags NAME +// { +// struct wildcard_spec tmp; +// tmp.name = $2; +// tmp.exclude_name_list = NULL; +// tmp.sorted = none; +// tmp.section_flag_list = $1; +// lang_add_wild (&tmp, NULL, ldgram_had_keep); +// } + | LBRACKET file_name_list RBRACKET +// { +// lang_add_wild (NULL, $2, ldgram_had_keep); +// } + | sect_flags LBRACKET file_name_list RBRACKET +// { +// struct wildcard_spec tmp; +// tmp.name = NULL; +// tmp.exclude_name_list = NULL; +// tmp.sorted = none; +// tmp.section_flag_list = $1; +// lang_add_wild (&tmp, $3, ldgram_had_keep); +// } + | wildcard_spec LPAREN file_name_list RPAREN +// { +// lang_add_wild (&$1, $3, ldgram_had_keep); +// } + | sect_flags wildcard_spec LPAREN file_name_list RPAREN +// { +// $2.section_flag_list = $1; +// lang_add_wild (&$2, $4, ldgram_had_keep); +// } + ; + +input_section_spec: + input_section_spec_no_keep + | KEEP LPAREN +// { ldgram_had_keep = TRUE; } + input_section_spec_no_keep RPAREN +// { ldgram_had_keep = FALSE; } + ; + +statement: + assignment end + | CREATE_OBJECT_SYMBOLS +// { +// lang_add_attribute(lang_object_symbols_statement_enum); +// } + | SEMICOLON + | CONSTRUCTORS +// { +// +// lang_add_attribute(lang_constructors_statement_enum); +// } + | SORT_BY_NAME LPAREN CONSTRUCTORS RPAREN +// { +// constructors_sorted = TRUE; +// lang_add_attribute (lang_constructors_statement_enum); +// } + | input_section_spec + | length LPAREN mustbe_exp RPAREN +// { +// lang_add_data ((int) $1, $3); +// } + + | FILL LPAREN fill_exp RPAREN +// { +// lang_add_fill ($3); +// } + | ASSERT_K /*{ldlex_expression ();}*/ LPAREN exp COMMA NAME RPAREN end +// { ldlex_popstate (); +// lang_add_assignment (exp_assert ($4, $6)); } + | INCLUDE filename +// { ldlex_script (); ldfile_open_command_file($2); } + statement_list_opt END +// { ldlex_popstate (); } + ; + +statement_list: + statement_list statement + | statement + ; + +statement_list_opt: + /* empty */ + | statement_list + ; + +length: + QUAD +// { $$ = $1; } + | SQUAD +// { $$ = $1; } + | LONG +// { $$ = $1; } + | SHORT +// { $$ = $1; } + | BYTE +// { $$ = $1; } + ; + +fill_exp: + mustbe_exp +// { +// $$ = exp_get_fill ($1, 0, "fill value"); +// } + ; + +fill_opt: + EQ fill_exp + // { $$ = $2; } + | // { $$ = (fill_type *) 0; } + ; + +assign_op: + PLUSEQ +// { $$ = '+'; } + | MINUSEQ +// { $$ = '-'; } + | MULTEQ +// { $$ = '*'; } + | DIVEQ +// { $$ = '/'; } + | LSHIFTEQ +// { $$ = LSHIFT; } + | RSHIFTEQ +// { $$ = RSHIFT; } + | ANDEQ +// { $$ = '&'; } + | OREQ +// { $$ = '|'; } + + ; + +end: SEMICOLON | COMMA + ; + + +assignment: + NAME EQ mustbe_exp +// { +// lang_add_assignment (exp_assign ($1, $3, FALSE)); +// } + | NAME assign_op mustbe_exp +// { +// lang_add_assignment (exp_assign ($1, +// exp_binop ($2, +// exp_nameop (NAME, +// $1), +// $3), FALSE)); +// } + | HIDDEN_ LPAREN NAME EQ mustbe_exp RPAREN +// { +// lang_add_assignment (exp_assign ($3, $5, TRUE)); +// } + | PROVIDE LPAREN NAME EQ mustbe_exp RPAREN +// { +// lang_add_assignment (exp_provide ($3, $5, FALSE)); +// } + | PROVIDE_HIDDEN LPAREN NAME EQ mustbe_exp RPAREN +// { +// lang_add_assignment (exp_provide ($3, $5, TRUE)); +// } + ; + + +opt_comma: + COMMA | ; + + +memory: + MEMORY LBRACE memory_spec_list_opt RBRACE + ; + +memory_spec_list_opt: memory_spec_list | ; + +memory_spec_list: + memory_spec_list opt_comma memory_spec + | memory_spec + ; + + +memory_spec: NAME +// { region = lang_memory_region_lookup ($1, TRUE); } + attributes_opt COLON + origin_spec opt_comma length_spec +// {} + | INCLUDE filename +// { ldlex_script (); ldfile_open_command_file($2); } + memory_spec_list_opt END +// { ldlex_popstate (); } + ; + +origin_spec: + ORIGIN EQ mustbe_exp +// { +// region->origin_exp = $3; +// region->current = region->origin; +// } + ; + +length_spec: + LENGTH EQ mustbe_exp +// { +// region->length_exp = $3; +// } + ; + +attributes_opt: + /* empty */ + /* { *//* dummy action to avoid bison 1.25 error message *//* } + |*/ LPAREN attributes_list RPAREN + ; + +attributes_list: + attributes_string + | attributes_list attributes_string + ; + +attributes_string: + NAME # attributeNormal +// { lang_set_flags (region, $1, 0); } + | EXLAMATION NAME # attributeInverted +// { lang_set_flags (region, $2, 1); } + ; + +/* +This would be best but the tokenizer would have to be made context sensitive which is too much work given how +easy it is to check the flags after parsing. + +attributes_string : + attribute + | '!' attribute # attributeInverted + ; + +attribute: 'r' | 'w' | 'x' | 'a' | 'i' | 'l' ; +*/ +startup: + STARTUP LPAREN filename RPAREN +// { lang_startup($3); } + ; + +high_level_library: + HLL LPAREN high_level_library_NAME_list RPAREN + | HLL LPAREN RPAREN +// { ldemul_hll((char *)NULL); } + ; + +high_level_library_NAME_list: + high_level_library_NAME_list opt_comma filename +// { ldemul_hll($3); } + | filename +// { ldemul_hll($1); } + + ; + +low_level_library: + SYSLIB LPAREN low_level_library_NAME_list RPAREN + ; low_level_library_NAME_list: + low_level_library_NAME_list opt_comma filename +// { ldemul_syslib($3); } + | + ; + +floating_point_support: + FLOAT +// { lang_float(TRUE); } + | NOFLOAT +// { lang_float(FALSE); } + ; + +nocrossref_list: + /* empty */ +// { +// $$ = NULL; +// } + | NAME nocrossref_list +// { +// struct lang_nocrossref *n; +// +// n = (struct lang_nocrossref *) xmalloc (sizeof *n); +// n->name = $1; +// n->next = $2; +// $$ = n; +// } + | NAME COMMA nocrossref_list +// { +// struct lang_nocrossref *n; +// +// n = (struct lang_nocrossref *) xmalloc (sizeof *n); +// n->name = $1; +// n->next = $3; +// $$ = n; +// } + ; + +mustbe_exp: // { ldlex_expression (); } + exp + // { ldlex_popstate (); $$=$2;} + ; + +exp : + DASH exp # expNegate // TODO: %prec UNARY +// { $$ = exp_unop ('-', $2); } + | LPAREN exp RPAREN # expParen +// { $$ = $2; } + | NEXT LPAREN exp RPAREN # expNextParen // TODO: %prec UNARY +// { $$ = exp_unop ((int) $1,$3); } + | EXLAMATION exp # expInvert // TODO: %prec UNARY +// { $$ = exp_unop ('!', $2); } + | PLUS exp # expPlus // TODO: %prec UNARY +// { $$ = $2; } + | TILDE exp # expMinus // TODO: %prec UNARY +// { $$ = exp_unop ('~', $2);} + | exp STAR exp # expMul +// { $$ = exp_binop ('*', $1, $3); } + | exp SLASH exp # expDiv +// { $$ = exp_binop ('/', $1, $3); } + | exp MOD exp # expMod +// { $$ = exp_binop ('%', $1, $3); } + | exp PLUS exp # expAdd +// { $$ = exp_binop ('+', $1, $3); } + | exp DASH exp # expSub +// { $$ = exp_binop ('-' , $1, $3); } + | exp LSHIFT exp # expLshift +// { $$ = exp_binop (LSHIFT , $1, $3); } + | exp RSHIFT exp # expRshift +// { $$ = exp_binop (RSHIFT , $1, $3); } + | exp EQEQ exp # expEq +// { $$ = exp_binop (EQ , $1, $3); } + | exp NE exp # expNe +// { $$ = exp_binop (NE , $1, $3); } + | exp LE exp # expLe +// { $$ = exp_binop (LE , $1, $3); } + | exp GE exp # expGe +// { $$ = exp_binop (GE , $1, $3); } + | exp LT exp # expLt +// { $$ = exp_binop ('<' , $1, $3); } + | exp GT exp # expGt +// { $$ = exp_binop ('>' , $1, $3); } + | exp AMPERSAND exp # expAnd +// { $$ = exp_binop ('&' , $1, $3); } + | exp HAT exp # expXor +// { $$ = exp_binop ('^' , $1, $3); } + | exp BAR exp # expOr +// { $$ = exp_binop ('|' , $1, $3); } + | exp QUESTION exp COLON exp # expTrinary +// { $$ = exp_trinop ('?' , $1, $3, $5); } + | exp ANDAND exp # expAndand +// { $$ = exp_binop (ANDAND , $1, $3); } + | exp OROR exp # expOror +// { $$ = exp_binop (OROR , $1, $3); } + | DEFINED LPAREN NAME RPAREN # expDefined +// { $$ = exp_nameop (DEFINED, $3); } + | INT # expInt +// { $$ = exp_bigintop ($1.integer, $1.str); } + | SIZEOF_HEADERS # expSizeofHeaders +// { $$ = exp_nameop (SIZEOF_HEADERS,0); } + | ALIGNOF LPAREN NAME RPAREN # expAlignof +// { $$ = exp_nameop (ALIGNOF,$3); } + | SIZEOF LPAREN NAME RPAREN # expSizeof +// { $$ = exp_nameop (SIZEOF,$3); } + | ADDR LPAREN NAME RPAREN # expAddr +// { $$ = exp_nameop (ADDR,$3); } + | LOADADDR LPAREN NAME RPAREN # expLoadaddr +// { $$ = exp_nameop (LOADADDR,$3); } + | CONSTANT LPAREN NAME RPAREN # expConstant +// { $$ = exp_nameop (CONSTANT,$3); } + | ABSOLUTE LPAREN exp RPAREN # expAbsolute +// { $$ = exp_unop (ABSOLUTE, $3); } + | ALIGN_K LPAREN exp RPAREN # expAlign +// { $$ = exp_unop (ALIGN_K,$3); } + | ALIGN_K LPAREN exp COMMA exp RPAREN # expAlignK +// { $$ = exp_binop (ALIGN_K,$3,$5); } + | DATA_SEGMENT_ALIGN LPAREN exp COMMA exp RPAREN # expDataSegmentAlign +// { $$ = exp_binop (DATA_SEGMENT_ALIGN, $3, $5); } + | DATA_SEGMENT_RELRO_END LPAREN exp COMMA exp RPAREN # expDataSegmentRelRoEnd +// { $$ = exp_binop (DATA_SEGMENT_RELRO_END, $5, $3); + | DATA_SEGMENT_END LPAREN exp RPAREN # expDataSegmentEnd +// { $$ = exp_unop (DATA_SEGMENT_END, $3); } + | SEGMENT_START LPAREN NAME COMMA exp RPAREN # expSegmentStart +// { /* The operands to the expression node are +// placed in the opposite order from the way +// in which they appear in the script as +// that allows us to reuse more code in +// fold_binary. */ +// $$ = exp_binop (SEGMENT_START, +// $5, +// exp_nameop (NAME, $3)); } + | BLOCK LPAREN exp RPAREN # expBlock +// { $$ = exp_unop (ALIGN_K,$3); } + | NAME # expName +// { $$ = exp_nameop (NAME,$1); } + | MAX_K LPAREN exp COMMA exp RPAREN # expMax +// { $$ = exp_binop (MAX_K, $3, $5 ); } + | MIN_K LPAREN exp COMMA exp RPAREN # expMin +// { $$ = exp_binop (MIN_K, $3, $5 ); } + | ASSERT_K LPAREN exp COMMA NAME RPAREN # expAssert +// { $$ = exp_assert ($3, $5); } + | ORIGIN LPAREN NAME RPAREN # expOrigin +// { $$ = exp_nameop (ORIGIN, $3); } + | LENGTH LPAREN NAME RPAREN # expLengthExp +// { $$ = exp_nameop (LENGTH, $3); } + | LOG2CEIL LPAREN exp RPAREN # expLog2ceil +// { $$ = exp_unop (LOG2CEIL, $3); } + ; + + +memspec_at_opt: + AT GT NAME // { $$ = $3; } + | // { $$ = 0; } + ; + +opt_at: + AT LPAREN exp RPAREN // { $$ = $3; } + | // { $$ = 0; } + ; + +opt_align: + ALIGN_K LPAREN exp RPAREN // { $$ = $3; } + | // { $$ = 0; } + ; + +opt_align_with_input: + ALIGN_WITH_INPUT // { $$ = ALIGN_WITH_INPUT; } + | // { $$ = 0; } + ; + +opt_subalign: + SUBALIGN LPAREN exp RPAREN // { $$ = $3; } + | // { $$ = 0; } + ; + +sect_constraint: + ONLY_IF_RO // { $$ = ONLY_IF_RO; } + | ONLY_IF_RW // { $$ = ONLY_IF_RW; } + | SPECIAL // { $$ = SPECIAL; } + | // { $$ = 0; } + ; + +section: NAME // { ldlex_expression(); } + opt_exp_with_type + opt_at + opt_align + opt_align_with_input + opt_subalign // { ldlex_popstate (); ldlex_script (); } + sect_constraint + LBRACE +// { +// lang_enter_output_section_statement($1, $3, +// sectype, +// $5, $7, $4, $9, $6); +// } + statement_list_opt + RBRACE // { ldlex_popstate (); ldlex_expression (); } + memspec_opt memspec_at_opt phdr_opt fill_opt +// { +// ldlex_popstate (); +// lang_leave_output_section_statement ($18, $15, $17, $16); +// } + opt_comma + {} + | OVERLAY +// { ldlex_expression (); } + opt_exp_without_type opt_nocrossrefs opt_at opt_subalign +// { ldlex_popstate (); ldlex_script (); } + LBRACE +// { +// lang_enter_overlay ($3, $6); +// } + overlay_section + RBRACE +// { ldlex_popstate (); ldlex_expression (); } + memspec_opt memspec_at_opt phdr_opt fill_opt +// { +// ldlex_popstate (); +// lang_leave_overlay ($5, (int) $4, +// $16, $13, $15, $14); +// } + opt_comma + | /* The GROUP case is just enough to support the gcc + svr3.ifile script. It is not intended to be full + support. I'm not even sure what GROUP is supposed + to mean. */ + GROUP // { ldlex_expression (); } + opt_exp_with_type +// { +// ldlex_popstate (); +// lang_add_assignment (exp_assign (".", $3, FALSE)); +// } + LBRACE sec_or_group_p1 RBRACE + | INCLUDE filename + // { ldlex_script (); ldfile_open_command_file($2); } + sec_or_group_p1 END + // { ldlex_popstate (); } + ; + +type: + NOLOAD // { sectype = noload_section; } + | DSECT // { sectype = noalloc_section; } + | COPY // { sectype = noalloc_section; } + | INFO // { sectype = noalloc_section; } + | OVERLAY // { sectype = noalloc_section; } + ; + +atype: + LPAREN type RPAREN + | /* EMPTY */ // { sectype = normal_section; } + | LPAREN RPAREN // { sectype = normal_section; } + ; + +opt_exp_with_type: + exp atype COLON // { $$ = $1; } + | atype COLON // { $$ = (etree_type *)NULL; } + | /* The BIND cases are to support the gcc svr3.ifile + script. They aren't intended to implement full + support for the BIND keyword. I'm not even sure + what BIND is supposed to mean. */ + BIND LPAREN exp RPAREN atype COLON // { $$ = $3; } + | BIND LPAREN exp RPAREN BLOCK LPAREN exp RPAREN atype COLON + // { $$ = $3; } + ; + +opt_exp_without_type: + exp COLON // { $$ = $1; } + | COLON // { $$ = (etree_type *) NULL; } + ; + +opt_nocrossrefs: + /* empty */ +// { $$ = 0; } + | NOCROSSREFS +// { $$ = 1; } + ; + +memspec_opt: + GT NAME + // { $$ = $2; } + | // { $$ = DEFAULT_MEMORY_REGION; } + ; + +phdr_opt: + /* empty */ +// { +// $$ = NULL; +// } + | phdr_opt COLON NAME +// { +// struct lang_output_section_phdr_list *n; +// +// n = ((struct lang_output_section_phdr_list *) +// xmalloc (sizeof *n)); +// n->name = $3; +// n->used = FALSE; +// n->next = $1; +// $$ = n; +// } + ; + +overlay_section: + /* empty */ + | overlay_section + NAME +// { +// ldlex_script (); +// lang_enter_overlay_section ($2); +// } + LBRACE statement_list_opt RBRACE +// { ldlex_popstate (); ldlex_expression (); } + phdr_opt fill_opt +// { +// ldlex_popstate (); +// lang_leave_overlay_section ($9, $8); +// } + opt_comma + ; + +phdrs: + PHDRS LBRACE phdr_list RBRACE + ; + +phdr_list: + /* empty */ + | phdr_list phdr + ; + +phdr: + NAME // { ldlex_expression (); } + phdr_type phdr_qualifiers // { ldlex_popstate (); } + SEMICOLON +// { +// lang_new_phdr ($1, $3, $4.filehdr, $4.phdrs, $4.at, +// $4.flags); +// } + ; + +phdr_type: + exp +// { +// $$ = $1; +// +// if ($1->type.node_class == etree_name +// && $1->type.node_code ==name) +// { +// const char *s; +// unsigned int i; +// static const char * const phdr_types[] = +// { +// "PT_NULL", "PT_LOAD", "PT_DYNAMIC", +// "PT_INTERP", "PT_NOTE", "PT_SHLIB", +// "PT_PHDR", "PT_TLS" +// }; +// +// s = $1->name.name; +// for (i = 0; +// i < sizeof phdr_types / sizeof phdr_types[0]; +// i++) +// if (strcmp (s, phdr_types[i]) == 0) +// { +// $$ = exp_intop (i); +// break; +// } +// if (i == sizeof phdr_types / sizeof phdr_types[0]) +// { +// if (strcmp (s, "PT_GNU_EH_FRAME") == 0) +// $$ = exp_intop (0x6474e550); +// else if (strcmp (s, "PT_GNU_STACK") == 0) +// $$ = exp_intop (0x6474e551); +// else +// { +// einfo (_("\ +//%X%P:%S: unknown phdr type `%s' (try integer literal)\n"), +// NULL, s); +// $$ = exp_intop (0); +// } +// } +// } +// } + ; + +phdr_qualifiers: + /* empty */ +// { +// memset (&$$, 0, sizeof (struct phdr_info)); +// } + | NAME phdr_val phdr_qualifiers +// { +// $$ = $3; +// if (strcmp ($1, "FILEHDR") == 0 && $2 == NULL) +// $$.filehdr = TRUE; +// else if (strcmp ($1, "PHDRS") == 0 && $2 == NULL) +// $$.phdrs = TRUE; +// else if (strcmp ($1, "FLAGS") == 0 && $2 != NULL) +// $$.flags = $2; +// else +// einfo (_("%X%P:%S: PHDRS syntax error at `%s'\n"), +// NULL, $1); +// } + | AT LPAREN exp RPAREN phdr_qualifiers +// { +// $$ = $5; +// $$.at = $3; +// } + ; + +phdr_val: + /* empty */ +// { +// $$ = NULL; +// } + | LPAREN exp RPAREN +// { +// $$ = $2; +// } + ; + +dynamic_list_file: +// { +// ldlex_version_file (); +// PUSH_ERROR (_("dynamic list")); +// } + dynamic_list_nodes +// { +// ldlex_popstate (); +// POP_ERROR (); +// } + ; + +dynamic_list_nodes: + dynamic_list_node + | dynamic_list_nodes dynamic_list_node + ; + +dynamic_list_node: + LBRACE dynamic_list_tag RBRACE SEMICOLON + ; + +dynamic_list_tag: + vers_defns SEMICOLON +// { +// lang_append_dynamic_list ($1); +// } + ; + +/* This syntax is used within an external version script file. */ + +version_script_file: +// { +// ldlex_version_file (); +// PUSH_ERROR (_("VERSION script")); +// } + vers_nodes +// { +// ldlex_popstate (); +// POP_ERROR (); +// } + ; + +/* This is used within a normal linker script file. */ + +version: +// { +// ldlex_version_script (); +// } + VERSIONK LBRACE vers_nodes RBRACE +// { +// ldlex_popstate (); +// } + ; + +vers_nodes: + vers_node + | vers_nodes vers_node + ; + +vers_node: + LBRACE vers_tag RBRACE SEMICOLON +// { +// lang_register_vers_node (NULL, $2, NULL); +// } + | VERS_TAG LBRACE vers_tag RBRACE SEMICOLON +// { +// lang_register_vers_node ($1, $3, NULL); +// } + | VERS_TAG LBRACE vers_tag RBRACE verdep SEMICOLON +// { +// lang_register_vers_node ($1, $3, $5); +// } + ; + +verdep: + VERS_TAG +// { +// $$ = lang_add_vers_depend (NULL, $1); +// } + | verdep VERS_TAG +// { +// $$ = lang_add_vers_depend ($1, $2); +// } + ; + +vers_tag: + /* empty */ +// { +// $$ = lang_new_vers_node (NULL, NULL); +// } + | vers_defns SEMICOLON +// { +// $$ = lang_new_vers_node ($1, NULL); +// } + | GLOBAL COLON vers_defns SEMICOLON +// { +// $$ = lang_new_vers_node ($3, NULL); +// } + | LOCAL COLON vers_defns SEMICOLON +// { +// $$ = lang_new_vers_node (NULL, $3); +// } + | GLOBAL COLON vers_defns SEMICOLON LOCAL COLON vers_defns SEMICOLON +// { +// $$ = lang_new_vers_node ($3, $7); +// } + ; + +vers_defns: + VERS_IDENTIFIER +// { +// $$ = lang_new_vers_pattern (NULL, $1, ldgram_vers_current_lang, FALSE); +// } + | NAME +// { +// $$ = lang_new_vers_pattern (NULL, $1, ldgram_vers_current_lang, TRUE); +// } + | vers_defns SEMICOLON VERS_IDENTIFIER +// { +// $$ = lang_new_vers_pattern ($1, $3, ldgram_vers_current_lang, FALSE); +// } + | vers_defns SEMICOLON NAME +// { +// $$ = lang_new_vers_pattern ($1, $3, ldgram_vers_current_lang, TRUE); +// } + | vers_defns SEMICOLON EXTERN NAME LBRACE +// { +// $<name>$ = ldgram_vers_current_lang; +// ldgram_vers_current_lang = $4; +// } + vers_defns opt_semicolon RBRACE +// { +// struct bfd_elf_version_expr *pat; +// for (pat = $7; pat->next != NULL; pat = pat->next); +// pat->next = $1; +// $$ = $7; +// ldgram_vers_current_lang = $<name>6; +// } + | EXTERN NAME LBRACE +// { +// $<name>$ = ldgram_vers_current_lang; +// ldgram_vers_current_lang = $2; +// } + vers_defns opt_semicolon RBRACE +// { +// $$ = $5; +// ldgram_vers_current_lang = $<name>4; +// } + | GLOBAL +// { +// $$ = lang_new_vers_pattern (NULL, "global", ldgram_vers_current_lang, FALSE); +// } + | vers_defns SEMICOLON GLOBAL +// { +// $$ = lang_new_vers_pattern ($1, "global", ldgram_vers_current_lang, FALSE); +// } + | LOCAL +// { +// $$ = lang_new_vers_pattern (NULL, "local", ldgram_vers_current_lang, FALSE); +// } + | vers_defns SEMICOLON LOCAL +// { +// $$ = lang_new_vers_pattern ($1, "local", ldgram_vers_current_lang, FALSE); +// } + | EXTERN +// { +// $$ = lang_new_vers_pattern (NULL, "extern", ldgram_vers_current_lang, FALSE); +// } + | vers_defns SEMICOLON EXTERN +// { +// $$ = lang_new_vers_pattern ($1, "extern", ldgram_vers_current_lang, FALSE); +// } + ; + +opt_semicolon: + /* empty */ + | SEMICOLON + ; + +//name +// : NAME; + +string: STRING_ANY*; diff --git a/README.md b/README.md new file mode 100644 index 0000000..6744d1e --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +# elfinfo - Extract info from ELF files + +# Building + +This code currently depend on experimental patches for Antlr4's C++ runtime which has to be build first. This should +more or less do it: + + git clone https://github.com/trygvis/antlr4 + cd antlr4 + mkdir build + cd build + cmake -DCMAKE_INSTALL_PREFIX=$HOME/opt/antlr-cpp .. + make + make install + +This will build and install Antlr4 into $HOME/opt/antlr-cpp. + +To build this code follow a similar approach: + + mkdir build + cmake .. -DAntlr4_DIR=$HOME/opt/antlr-cpp/lib/cmake/Antlr4 + make + make install diff --git a/elfinfo.cpp b/elfinfo.cpp new file mode 100644 index 0000000..a0138e2 --- /dev/null +++ b/elfinfo.cpp @@ -0,0 +1,271 @@ +#include <cctype> +#include <cstdlib> +#include <gelf.h> +#include <err.h> +#include <sysexits.h> +#include <fcntl.h> +#include <cstdio> +#include <cstring> +#include <vector> +#include <algorithm> +#include <getopt.h> +#include <inttypes.h> +#include <elf.h> + +#include "ld.h" + +using std::vector; + +enum class SectionType { + TEXT, DATA +}; + +const char *to_str(const SectionType &type) { + switch (type) { + case SectionType::TEXT: + return "text"; + case SectionType::DATA: + return "data"; + } +} + +struct Section { + SectionType type; + const Elf64_Addr start; + const Elf64_Addr end; + const Elf64_Xword size; + Elf64_Addr used; + + Section(SectionType type, Elf64_Addr start, Elf64_Xword size) : type(type), start(start), size(size), + end(start + size), used(0) { + } + + bool contains(Elf64_Addr address, Elf64_Xword size) const { + return contains(address) && contains(address + size); + } + + bool contains(Elf64_Addr address) const { + return start <= address && address < end; + } +}; + +vector<Section> sections; + +char *filename = nullptr; + +char *ld_filename = nullptr; + +char *program; + +__attribute__((noreturn)) +void usage(const char *reason = nullptr) { + if (reason) { + fprintf(stderr, "%s\n", reason); + } + fprintf(stderr, "usage: %s -f file [-l ld] [-t start:size] [-d start:size]\n", program); + fprintf(stderr, " -t/-d/-b: add text/data section\n"); + fprintf(stderr, "At least one section or a ld file has to be specified\n"); + exit(EX_USAGE); +} + +void parse_start_size(char *input, Elf64_Addr &start, Elf64_Xword &size) { + char *str_size = strchr(input, ':'); + + if (!str_size) { + usage("bad section specification, missing ':'"); + } + + *str_size = '\0'; + str_size++; + + if (sscanf(input, "%" SCNi64, &start) != 1) { + usage("bad section specification, could not parse start number"); + } + + size_t str_size_len = strlen(str_size); + + if (str_size_len < 1) { + usage("bad section specification"); + } + + char suffix = str_size[str_size_len - 1]; + int modifier; + + if (!isdigit(suffix)) { + switch (suffix) { + case 'k': + case 'K': + modifier = 1024; + break; + case 'm': + case 'M': + modifier = 1024 * 1024; + break; + default: + usage("bad size modifier, only 'k' and 'M' are allowed"); + } + } else { + modifier = 1; + } + + if (sscanf(str_size, "%" SCNi64, &size) != 1) { + usage("bad section specification, could not parse size number"); + } + size = size * modifier; +} + +bool debug = false; + +void parse_args(int argc, char **argv) { + int c; + + while ((c = getopt(argc, argv, "Df:l:t:d:")) != -1) { + switch (c) { + case 'D': + debug = true; + break; + case 't': + case 'd': { + Elf64_Addr start; + Elf64_Xword size; + parse_start_size(optarg, start, size); + SectionType type = c == 't' ? SectionType::TEXT : SectionType::DATA; + sections.push_back(Section(type, start, size)); + break; + } + case 'f': + filename = optarg; + break; + case 'l': + ld_filename = optarg; + break; + case '?': + if (optopt == 'c') + errx(EX_USAGE, "Option -%c requires an argument.\n", optopt); + else + errx(EX_USAGE, "Unknown option `-%c'.\n", optopt); + default: + abort(); + } + } + + if (!filename || (sections.empty() && !ld_filename)) { + usage(); + } +} + +void to_iso(Elf64_Addr i, char *buf) { + const char *suffix; + if (i > 1024 * 1024) { + i /= 1024 * 1024; + suffix = "M"; + } else if (i > 1024) { + i /= 1024; + suffix = "k"; + } else { + suffix = ""; + } + sprintf(buf, "%" PRIu64 "%s", i, suffix); +} + +int main(int argc, char **argv) { + program = argv[0]; + parse_args(argc, argv); + + if (ld_filename) { + ld_file file = ld_file_loader::load(ld_filename); + } + + if (elf_version(EV_CURRENT) == EV_NONE) + errx(EX_SOFTWARE, "ELF library initialization failed: %s", elf_errmsg(-1)); + + int fd; + if ((fd = open(filename, O_RDONLY, 0)) < 0) + err(EX_NOINPUT, "open \"%s\" failed", argv[1]); + + Elf *e; + if ((e = elf_begin(fd, ELF_C_READ, NULL)) == NULL) + errx(EX_SOFTWARE, "elf_begin() failed: %s.", elf_errmsg(-1)); + if (elf_kind(e) != ELF_K_ELF) + errx(EX_DATAERR, "%s is not an ELF object.", argv[1]); + + size_t shstrndx; + if (elf_getshdrstrndx(e, &shstrndx) != 0) + errx(EX_SOFTWARE, "elf_getshdrstrndx() failed: %s.", elf_errmsg(-1)); + + size_t program_header_count; + if (elf_getphdrnum(e, &program_header_count) != 0) + errx(EX_DATAERR, "elf_getphdrnum() failed: %s.", elf_errmsg(-1)); + + size_t text_size = 0, data_size = 0, bss_size = 0; + for (int i = 0; i < program_header_count; i++) { + GElf_Phdr phdr; + + if (gelf_getphdr(e, i, &phdr) != &phdr) + errx(EX_SOFTWARE, "getphdr() failed: %s.", elf_errmsg(-1)); + + if (phdr.p_type == PT_LOAD) { + SectionType expectedType; + size_t *size; + + if (phdr.p_flags == (PF_X | PF_W | PF_R) || phdr.p_flags == (PF_X | PF_R)) { + if (debug) { + printf("Adding PH #%d as text\n", i); + } + + expectedType = SectionType::TEXT; + size = &text_size; + } else if (phdr.p_flags == (PF_R | PF_W)) { + expectedType = SectionType::DATA; + if (phdr.p_filesz > 0) { + if (debug) { + printf("Adding PH #%d as data\n", i); + } + size = &data_size; + } + else { + if (debug) { + printf("Adding PH #%d as bss\n", i); + } + size = &bss_size; + } + } else { + warnx("Unknown flag combination: 0x%02x", phdr.p_flags); + warnx("Unknown flag combination: 0x%02x", PF_X | PF_R); + continue; + } + + auto s = std::find_if(sections.begin(), sections.end(), [&](Section §ion) { + return section.type == expectedType && section.contains(phdr.p_vaddr, phdr.p_memsz); + }); + + if (s == sections.end()) { + fprintf(stderr, + "Could not find a section for elf header #%d of type %s, at address 0x%08" PRIx64 " with size %" PRId64 "\n", + i, to_str(expectedType), phdr.p_vaddr, phdr.p_memsz); + } + else { + (*s).used += phdr.p_memsz; + + *size += phdr.p_memsz; + } + } else { + // ignored + }; + } + + printf("Size by sections\n"); + printf("Type Start End Size Used\n"); + std::for_each(sections.begin(), sections.end(), [&](Section &s) { + char size[100]; + to_iso(s.size, size); + int used_pct = (int) (double(s.used) / double(s.size) * 100.0); + printf("%4s %08" PRIx64 " %08" PRIx64 " %5s %8" PRId64 " %3d%%\n", to_str(s.type), s.start, s.end, size, s.used, + used_pct); + }); + + printf("\n"); + printf("Size by type\n"); + printf("text=%zu, data=%zu, bss=%zu\n", text_size, data_size, bss_size); + return EXIT_SUCCESS; +} @@ -0,0 +1,267 @@ +#include "ld.h" +#include "GnuLdLexer.h" +#include "GnuLdParser.h" +#include "GnuLdParserBaseListener.h" +#include "antlr4-runtime.h" + +#include <vector> +#include <set> +#include <map> +#include <locale> +#include <string> +#include <algorithm> +#include <iostream> + +using antlr4::ANTLRFileStream; +using namespace std; + +enum class MemoryAttribute { + R, W, X +}; + +class MemoryArea { +public: + string name; + uint64_t origin; + uint64_t length; + set<MemoryAttribute> attributes; +}; + +static MemoryAttribute valueOf(char c) { + switch (c) { + case 'r': + case 'R': + return MemoryAttribute::R; + case 'w': + case 'W': + return MemoryAttribute::W; + case 'x': + case 'X': + return MemoryAttribute::X; + default: + throw std::domain_error("Invalid memory attribute: " + c); + } +} + +static bool endsWith(const string &a, const string &b) { + return b.length() <= a.length() && a.compare(a.length() - b.length(), b.length(), b); +} + +using ParseTree = antlr4::tree::ParseTree; + +template<typename V> +class ParseTreeProperty { +public: + virtual V get(ParseTree *const node) { + try { + cout << "node= " << node->getText() << endl; + return _annotations.at(node); + } catch (std::out_of_range &e) { + cout << "out of range: " << node->getText() << endl; + throw e; + } + } + + virtual void put(ParseTree *const node, V value) { + _annotations[node] = value; + } + + virtual V removeFrom(ParseTree *const node) { + return _annotations.erase(node); + } + +protected: + std::map<ParseTree *, V> _annotations; + +private: +}; + +class ElfinfoGnuLdBaseListener : public GnuLdParserBaseListener { +private: +public: + vector<MemoryArea> memoryAreas; + + ParseTreeProperty<uint64_t> expr; +// map<ParserRuleContext*, uint64_t> expr; + + static uint64_t parseInt(const string &s) { + string str; + transform(begin(s), end(s), begin(str), ::tolower); + int base = 10; + if (str.compare(0, 2, "0x")) { + base = 16; + str = str.substr(0, 2); + } + + int factor = 1; + if (endsWith(str, "k")) { + factor = 1024; + str = str.substr(0, str.length() - 1); + } else if (endsWith(str, "k")) { + factor = 1024 * 1024; + str = str.substr(0, str.length() - 1); + } + + unsigned long long i = strtoull(str.c_str(), NULL, base); + + if (factor > 1) { + i = i * factor; + } + return i; + } + + virtual void exitExpAlign(GnuLdParser::ExpAlignContext *ctx) override { + expr.put(ctx, 0); + } + + void exitExpInt(GnuLdParser::ExpIntContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.exitExpInt: ctx->INT()->getText() = " + ctx->INT()->getText()); + uint64_t i = parseInt(ctx->INT()->getText()); + expr.put(ctx, i); + } + + void exitExpSub(GnuLdParser::ExpSubContext *ctx) override { + uint64_t a = expr.get(ctx->exp(0).get()); + uint64_t b = expr.get(ctx->exp(1).get()); + uint64_t x = a - b; + expr.put(ctx, x); + } + + void exitExpAdd(GnuLdParser::ExpAddContext *ctx) override { + uint64_t a = expr.get(ctx->exp(0).get()); + uint64_t b = expr.get(ctx->exp(1).get()); + uint64_t x = a + b; + expr.put(ctx, x); + } + + void exitExpName(GnuLdParser::ExpNameContext *ctx) override { + expr.put(ctx, 0); + } + + void exitExpAddr(GnuLdParser::ExpAddrContext *ctx) override { + expr.put(ctx, 0); + } + + void exitExpSizeof(GnuLdParser::ExpSizeofContext *ctx) override { + expr.put(ctx, 0); + } + + void exitExpLengthExp(GnuLdParser::ExpLengthExpContext *ctx) override { + MemoryArea + ma = getMemoryArea(ctx->NAME()->getText()); +// System.out.println("ma.length = " + ma.length); + expr.put(ctx, ma.length); + } + + void exitExpOrigin(GnuLdParser::ExpOriginContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.exitExpOrigin: " + ctx->getText()); + MemoryArea + ma = getMemoryArea(ctx->NAME()->getText()); +// System.out.println("ma.origin = " + ma.origin); + expr.put(ctx, ma.origin); + } + + MemoryArea getMemoryArea(const string &name) { + for (MemoryArea &ma : memoryAreas) { + if (ma.name == name) { + return ma; + } + } + throw new RuntimeException("No such memory area: " + name); + } + + void enterMustbe_exp(GnuLdParser::Mustbe_expContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.enterMustbe_exp"); + } + + void exitMustbe_exp(GnuLdParser::Mustbe_expContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.exitMustbe_exp"); + + expr.put(ctx, expr.get(ctx->exp().get())); + } + + + void enterOrigin_spec(GnuLdParser::Origin_specContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.enterOrigin_spec"); + } + + void exitOrigin_spec(GnuLdParser::Origin_specContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.exitOrigin_spec"); + } + + void enterLength_spec(GnuLdParser::Length_specContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.enterLength_spec"); + } + + void exitLength_spec(GnuLdParser::Length_specContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.exitLength_spec"); + } + + void enterMemory_spec(GnuLdParser::Memory_specContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.enterMemory_spec"); + } + + void exitMemory_spec(GnuLdParser::Memory_specContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.exitMemory_spec"); + MemoryArea ma; + ma.name = ctx->NAME()->getText(); + ma.attributes = attributes; +// System.out.println("ctx->origin_spec() = " + ctx->origin_spec()); + ma.origin = expr.get(ctx->origin_spec().get()->mustbe_exp().get()); + ma.length = expr.get(ctx->length_spec().get()->mustbe_exp().get()); + memoryAreas.push_back(ma); + } + + MemoryAttribute attribute; + bool attributesInverted; + set<MemoryAttribute> attributes; + + void exitAttributes_opt(GnuLdParser::Attributes_optContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.exitAttributes_opt"); + attributes.clear(); + } + + void enterAttributeInverted(GnuLdParser::AttributeInvertedContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.enterAttributeInverted"); + + if (!attributes.empty()) { + throw new RuntimeException( + "Attributes for memory areas can only be attributesInverted (with '!') as the first character in a specification; foo(!rw), not foo(x!rw)."); + } +// const string& name = ctx->name()->getText(); + const string &name = ctx->NAME()->getText(); +// System.out.println("ctx->ATTRIBUTE()->getText() = " + name); + + attributesInverted = true; + } + + void enterAttributeNormal(GnuLdParser::AttributeNormalContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.enterAttributeNormal"); + + const string &name = ctx->NAME()->getText(); +// System.out.println("ctx->ATTRIBUTE()->getText() = " + name); + for (int i = 0; i < name.length(); i++) { + attribute = valueOf(name[i]); + attributes.insert(attribute); + } + attributesInverted = false; + } +}; + +ld_file ld_file_loader::load(std::string path) { + ANTLRFileStream input(path); + GnuLdLexer lexer(&input); + CommonTokenStream tokens(&lexer); + tokens.fill(); + + for (auto token : tokens.getTokens()) { + std::cout << token->toString() << std::endl; + } + + GnuLdParser parser(&tokens); + ElfinfoGnuLdBaseListener listener; + parser.addParseListener(&listener); + auto file = parser.file(); + std::cout << file->toStringTree(&parser) << std::endl << std::endl; + return {}; +} @@ -0,0 +1,11 @@ +#pragma once + +#include <string> + +class ld_file { +}; + +class ld_file_loader { +public: + static ld_file load(std::string path); +}; |