From 4675ec55f6bdb8f826cc94a9585ff9229c277983 Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Sat, 16 Jul 2016 13:31:03 +0200 Subject: o Initial import of code from intel d2000 playground. --- .gitignore | 9 + CMakeLists.txt | 18 + GnuLdLexer.g4 | 209 +++++++++ GnuLdParser.g4 | 1345 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 23 + elfinfo.cpp | 271 ++++++++++++ ld.cpp | 267 +++++++++++ ld.h | 11 + 8 files changed, 2153 insertions(+) create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 GnuLdLexer.g4 create mode 100644 GnuLdParser.g4 create mode 100644 README.md create mode 100644 elfinfo.cpp create mode 100644 ld.cpp create mode 100644 ld.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..85f656f --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +.idea +*.iws +*.jar +*.tokens +build +/antlr4 +/antlr4-build +/antlr4-install +/target diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..2749ba8 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.2) + +project(elfinfo CXX ASM) + +# If this fails, check out README.md +find_package(Antlr4) + +antlr4_add_target(TARGET GnuLd STATIC LEXER GnuLdLexer.g4 PARSER GnuLdParser.g4) + +add_executable(elfinfo elfinfo.cpp ld.cpp ld.h) +target_compile_options(elfinfo PUBLIC "--std=c++14") +target_link_libraries(elfinfo elf GnuLd Antlr4::antlr4_shared) + +INSTALL(TARGETS elfinfo + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib +) diff --git a/GnuLdLexer.g4 b/GnuLdLexer.g4 new file mode 100644 index 0000000..fba488e --- /dev/null +++ b/GnuLdLexer.g4 @@ -0,0 +1,209 @@ +lexer grammar GnuLdLexer; + + +//%token +LBRACE : '{'; +RBRACE : '}'; + +// +// assign_op atype attributes_opt sect_constraint opt_align_with_input +// filename +//%type vers_defns +//%type vers_tag +//%type verdep + +ABSOLUTE : 'ABSOLUTE'; +ADDR : 'ADDR'; +AFTER : 'AFTER'; +ALIAS : 'ALIAS'; +//ALIGN : 'ALIGN'; +ALIGN_K : 'ALIGN'; +ALIGNMOD : 'ALIGNMOD'; +ALIGNOF : 'ALIGNOF'; +ALIGN_WITH_INPUT : 'ALIGN_WITH_INPUT'; +AS_NEEDED : 'AS_NEEDED'; +ASSERT_K : 'ASSERT'; +AT : 'AT'; +BASE : 'BASE'; +BEFORE : 'BEFORE'; +BIND : 'BIND'; +BLOCK : 'BLOCK'; +BYTE : 'BYTE'; +CASE : 'CASE'; +CHIP : 'CHIP'; +CONSTANT : 'CONSTANT'; +CONSTRUCTORS : 'CONSTRUCTORS'; +COPY : 'COPY'; +CREATE_OBJECT_SYMBOLS : 'CREATE_OBJECT_SYMBOLS'; +DATA_SEGMENT_ALIGN : 'DATA_SEGMENT_ALIGN'; +DATA_SEGMENT : 'DATA_SEGMENT'; +DATA_SEGMENT_END : 'DATA_SEGMENT_END'; +DATA_SEGMENT_RELRO_END : 'DATA_SEGMENT_RELRO_END'; +DEFINED : 'DEFINED'; +DEFSYMEND : 'DEFSYMEND'; +DSECT : 'DSECT'; +ENDWORD : 'ENDWORD'; +ENTRY : 'ENTRY'; +EXCLUDE_FILE : 'EXCLUDE_FILE'; +EXTERN : 'EXTERN'; +FILL : 'FILL'; +FLOAT : 'FLOAT'; +FORCE_COMMON_ALLOCATION : 'FORCE_COMMON_ALLOCATION'; +FORMAT : 'FORMAT'; +GLOBAL : 'GLOBAL'; +GROUP : 'GROUP'; +HIDDEN_ : 'HIDDEN'; +HLL : 'HLL'; +INCLUDE : 'INCLUDE'; +INFO : 'INFO'; +INHIBIT_COMMON_ALLOCATION : 'INHIBIT_COMMON_ALLOCATION'; +//INPUT_DEFSYM : 'INPUT_DEFSYM'; +//INPUT_DYNAMIC_LIST : 'INPUT_DYNAMIC_LIST'; +INPUT : 'INPUT'; +//INPUT_MRI_SCRIPT : 'INPUT_MRI_SCRIPT'; +//INPUT_SCRIPT : 'INPUT_SCRIPT'; +//INPUT_SECTION_FLAGS : 'INPUT_SECTION_FLAGS'; +//INPUT_VERSION_SCRIPT : 'INPUT_VERSION_SCRIPT'; +INSERT_K : 'INSERT'; +KEEP : 'KEEP'; +LD_FEATURE : 'LD_FEATURE'; +LENGTH : 'LENGTH'; +LIST : 'LIST'; +LOADADDR : 'LOADADDR'; +LOAD : 'LOAD'; +LOCAL : 'LOCAL'; +LOG2CEIL : 'LOG2CEIL'; +LONG : 'LONG'; +MAP : 'MAP'; +MAX_K : 'MAX'; +MEMORY : 'MEMORY'; +MIN_K : 'MIN'; +NAMEWORD : 'NAMEWORD'; +NEWLINE : 'NEWLINE'; +NEXT : 'NEXT'; +NOCROSSREFS : 'NOCROSSREFS'; +NOCROSSREFS_TO : 'NOCROSSREFS_TO'; +NOFLOAT : 'NOFLOAT'; +NOLOAD : 'NOLOAD'; +ONLY_IF_RO : 'ONLY_IF_RO'; +ONLY_IF_RW : 'ONLY_IF_RW'; +ORDER : 'ORDER'; +ORIGIN : 'ORIGIN'; // TODO: or 'org' or 'o'. +OUTPUT_ARCH : 'OUTPUT_ARCH'; +OUTPUT_FORMAT : 'OUTPUT_FORMAT'; +OUTPUT : 'OUTPUT'; +OVERLAY : 'OVERLAY'; +PHDRS : 'PHDRS'; +PROVIDE_HIDDEN : 'PROVIDE_HIDDEN'; +PROVIDE : 'PROVIDE'; +PUBLIC : 'PUBLIC'; +QUAD : 'QUAD'; +REGION_ALIAS : 'REGION_ALIAS'; +REL : 'REL'; +SEARCH_DIR : 'SEARCH_DIR'; +SECTIONS : 'SECTIONS'; +SECT : 'SECT'; +SEGMENT_START : 'SEGMENT_START'; +SHORT : 'SHORT'; +SIZEOF_HEADERS : 'SIZEOF_HEADERS'; +SIZEOF : 'SIZEOF'; +SORT_BY_ALIGNMENT : 'SORT_BY_ALIGNMENT'; +SORT_BY_INIT_PRIORITY : 'SORT_BY_INIT_PRIORITY'; +SORT_BY_NAME : 'SORT_BY_NAME'; +SORT_NONE : 'SORT_NONE'; +SPECIAL : 'SPECIAL'; +SQUAD : 'SQUAD'; +START : 'START'; +STARTUP : 'STARTUP'; +SUBALIGN : 'SUBALIGN'; +SYSLIB : 'SYSLIB'; +TARGET_K : 'TARGET'; +TRUNCATE : 'TRUNCATE'; +VERS_IDENTIFIER : 'VERS_IDENTIFIER'; +VERSIONK : 'VERSIONK'; +VERS_TAG : 'VERS_TAG'; + +/* +Names are very liberal, they can be full strings and start with a dot. +*/ + +QUOTE : '"' -> skip, pushMode(STRING); + +//SPACE : ' '; +//SPACES : ' '+; + +//name : +// '"' (NAME | SPACE | SPACES)+ '"' # nameQuoted +// | NAME # namePlain; + +//NAME : [\._a-zA-Z][\._a-zA-Z0-9]*; +NAME : [*\._a-zA-Z][*\.\/_a-zA-Z0-9]*; + +// TODO: ld supports some really fancy expressions here, like "0101010b", "ffH", "ffx", "$Aa" etc +//INT : '0x' [0-9a-fA-F]+ +// | [0-9]+; +INT : INT_NUMBER INT_SIZE?; +fragment +INT_NUMBER : INT_HEX + | INT_DECIMAL; +fragment +INT_HEX : '0x' [0-9a-fA-F]+; +fragment +INT_DECIMAL : [0-9]+; +fragment +INT_SIZE : 'M' | 'm' | 'K' | 'k'; + +END : 'END'; + +LNAME : '-l' NAME; +PLUSEQ : '+='; +MINUSEQ : '-='; +MULTEQ : '*='; +DIVEQ : '/='; +LSHIFTEQ : '<<='; +RSHIFTEQ : '>>='; +ANDEQ : '&='; +OREQ : '|='; +LSHIFT : '<<'; +RSHIFT : '>>'; +EQEQ : '=='; +EQ : '='; +NE : '!='; +LE : '<='; +GE : '>='; +ANDAND : '&&'; +OROR : '||'; + +// Extra tokens +COLON : ':'; +EXLAMATION : '!'; +DASH : '-'; +PLUS : '+'; +TILDE : '~'; +SLASH : '/'; +MOD : '%'; +LT : '<'; +GT : '>'; +HAT : '^'; +BAR : '|'; +COMMA : ','; +SEMICOLON : ';'; +LPAREN : '('; +RPAREN : ')'; +STAR : '*'; +QUESTION : '?'; +AMPERSAND : '&'; +LBRACKET : '['; +RBRACKET : ']'; + +BlockComment + : '/*' .*? '*/' -> skip + ; + +WS + : [ \t\r\n]+ -> skip + ; + +mode STRING; +STRING_ANY : ~'"'; +STRING_END_QUOTE : '"' -> skip, popMode; diff --git a/GnuLdParser.g4 b/GnuLdParser.g4 new file mode 100644 index 0000000..1ed1119 --- /dev/null +++ b/GnuLdParser.g4 @@ -0,0 +1,1345 @@ +parser grammar GnuLdParser; + +options { + tokenVocab = GnuLdLexer; +} + +/* +TODO: check right associative annotations +*/ + +/* +%token INT +%token NAME LNAME +*/ + +/* +%right UNARY +%token END +%left LPAREN +*/ + + +file: + /*INPUT_SCRIPT*/ script_file + | /*INPUT_MRI_SCRIPT*/ mri_script_file + | /*INPUT_VERSION_SCRIPT*/ version_script_file + | /*INPUT_DYNAMIC_LIST*/ dynamic_list_file + | /*INPUT_DEFSYM*/ defsym_expr + ; + +filename: NAME; + + +defsym_expr: +// { ldlex_defsym(); } + NAME EQ exp +// { +// ldlex_popstate(); +// lang_add_assignment (exp_defsym ($2, $4)); +// } + ; + +/* SYNTAX WITHIN AN MRI SCRIPT FILE */ +mri_script_file: +// { +// ldlex_mri_script (); +// PUSH_ERROR (_("MRI style script")); +// } + mri_script_lines +// { +// ldlex_popstate (); +// mri_draw_tree (); +// POP_ERROR (); +// } + ; + +mri_script_lines: + mri_script_lines mri_script_command NEWLINE + | + ; + +mri_script_command: + CHIP exp + | CHIP exp COMMA exp + | NAME /*{ + einfo(_("%P%F: unrecognised keyword in MRI style script '%s'\n"),$1); + }*/ + | LIST /*{ + config.map_filename = "-"; + }*/ + | ORDER ordernamelist + | ENDWORD + | PUBLIC NAME EQ exp + // { mri_public($2, $4); } + | PUBLIC NAME COMMA exp + // { mri_public($2, $4); } + | PUBLIC NAME exp + // { mri_public($2, $3); } + | FORMAT NAME + // { mri_format($2); } + | SECT NAME COMMA exp + // { mri_output_section($2, $4);} + | SECT NAME exp + // { mri_output_section($2, $3);} + | SECT NAME EQ exp + // { mri_output_section($2, $4);} + | ALIGN_K NAME EQ exp + // { mri_align($2,$4); } + | ALIGN_K NAME COMMA exp + // { mri_align($2,$4); } + | ALIGNMOD NAME EQ exp + // { mri_alignmod($2,$4); } + | ALIGNMOD NAME COMMA exp + // { mri_alignmod($2,$4); } + | ABSOLUTE mri_abs_name_list + | LOAD mri_load_name_list + | NAMEWORD NAME + // { mri_name($2); } + | ALIAS NAME COMMA NAME + // { mri_alias($2,$4,0);} + | ALIAS NAME COMMA INT + // { mri_alias ($2, 0, (int) $4.integer); } + | BASE exp + // { mri_base($2); } + | TRUNCATE INT + // { mri_truncate ((unsigned int) $2.integer); } + | CASE casesymlist + | EXTERN extern_name_list + | INCLUDE filename + // { ldlex_script (); ldfile_open_command_file($2); } + mri_script_lines END + // { ldlex_popstate (); } + | START NAME + // { lang_add_entry ($2, FALSE); } + | + ; + +ordernamelist: + ordernamelist COMMA NAME // { mri_order($3); } + | ordernamelist NAME // { mri_order($2); } + | + ; + +mri_load_name_list: + NAME + // { mri_load($1); } + | mri_load_name_list COMMA NAME // { mri_load($3); } + ; + +mri_abs_name_list: + NAME + // { mri_only_load($1); } + | mri_abs_name_list COMMA NAME + // { mri_only_load($3); } + ; + +casesymlist: + /* empty */ // { $$ = NULL; } + | NAME + | casesymlist COMMA NAME + ; + +/* Parsed as expressions so that commas separate entries */ +extern_name_list: + // { ldlex_expression (); } + extern_name_list_body + // { ldlex_popstate (); } + ; + +extern_name_list_body: + NAME + // { ldlang_add_undef ($1, FALSE); } + | extern_name_list_body NAME + // { ldlang_add_undef ($2, FALSE); } + | extern_name_list_body COMMA NAME + // { ldlang_add_undef ($3, FALSE); } + ; + +script_file: + // { ldlex_both(); } + ifile_list + // { ldlex_popstate(); } + ; + +ifile_list: + ifile_list ifile_p1 + | + ; + + +ifile_p1: + memory + | sections + | phdrs + | startup + | high_level_library + | low_level_library + | floating_point_support + | statement_anywhere + | version + | SEMICOLON + | TARGET_K LPAREN NAME RPAREN + // { lang_add_target($3); } + | SEARCH_DIR LPAREN filename RPAREN + // { ldfile_add_library_path ($3, FALSE); } + | OUTPUT LPAREN filename RPAREN + // { lang_add_output($3, 1); } + | OUTPUT_FORMAT LPAREN NAME RPAREN + // { lang_add_output_format ($3, (char *) NULL, + // (char *) NULL, 1); } + | OUTPUT_FORMAT LPAREN NAME COMMA NAME COMMA NAME RPAREN + // { lang_add_output_format ($3, $5, $7, 1); } + | OUTPUT_ARCH LPAREN NAME RPAREN + // { ldfile_set_output_arch ($3, bfd_arch_unknown); } + | FORCE_COMMON_ALLOCATION + // { command_line.force_common_definition = TRUE ; } + | INHIBIT_COMMON_ALLOCATION + // { command_line.inhibit_common_definition = TRUE ; } + | INPUT LPAREN input_list RPAREN + | GROUP + // { lang_enter_group (); } + LPAREN input_list RPAREN + // { lang_leave_group (); } + | MAP LPAREN filename RPAREN + // { lang_add_map($3); } + | INCLUDE filename + // { ldlex_script (); ldfile_open_command_file($2); } + ifile_list END + // { ldlex_popstate (); } + | NOCROSSREFS LPAREN nocrossref_list RPAREN + // { + // lang_add_nocrossref ($3); + // } + | NOCROSSREFS_TO LPAREN nocrossref_list RPAREN + // { + // lang_add_nocrossref_to ($3); + // } + | EXTERN LPAREN extern_name_list RPAREN + | INSERT_K AFTER NAME + // { lang_add_insert ($3, 0); } + | INSERT_K BEFORE NAME + // { lang_add_insert ($3, 1); } + | REGION_ALIAS LPAREN NAME COMMA NAME RPAREN + // { lang_memory_region_alias ($3, $5); } + | LD_FEATURE LPAREN NAME RPAREN + // { lang_ld_feature ($3); } + ; + +input_list: + // { ldlex_inputlist(); } + input_list1 + // { ldlex_popstate(); } + ; + +input_list1: + NAME + // { lang_add_input_file($1,lang_input_file_is_search_file_enum, + // (char *)NULL); } + | input_list1 COMMA NAME + // { lang_add_input_file($3,lang_input_file_is_search_file_enum, + // (char *)NULL); } + | input_list1 NAME + // { lang_add_input_file($2,lang_input_file_is_search_file_enum, + // (char *)NULL); } + | LNAME + // { lang_add_input_file($1,lang_input_file_is_l_enum, + // (char *)NULL); } + | input_list1 COMMA LNAME + // { lang_add_input_file($3,lang_input_file_is_l_enum, + // (char *)NULL); } + | input_list1 LNAME + // { lang_add_input_file($2,lang_input_file_is_l_enum, + // (char *)NULL); } + | AS_NEEDED LPAREN + // { $$ = input_flags.add_DT_NEEDED_for_regular; + // input_flags.add_DT_NEEDED_for_regular = TRUE; } + // input_list1 RPAREN + // { input_flags.add_DT_NEEDED_for_regular = $3; } + | input_list1 COMMA AS_NEEDED LPAREN + // { $$ = input_flags.add_DT_NEEDED_for_regular; + // input_flags.add_DT_NEEDED_for_regular = TRUE; } + // input_list1 RPAREN + // { input_flags.add_DT_NEEDED_for_regular = $5; } + | input_list1 AS_NEEDED LPAREN + // { $$ = input_flags.add_DT_NEEDED_for_regular; + // input_flags.add_DT_NEEDED_for_regular = TRUE; } + // input_list1 RPAREN + // { input_flags.add_DT_NEEDED_for_regular = $4; } + ; + +sections: + SECTIONS LBRACE sec_or_group_p1 RBRACE + ; + +sec_or_group_p1: + sec_or_group_p1 section + | sec_or_group_p1 statement_anywhere + | + ; + +statement_anywhere: + ENTRY LPAREN NAME RPAREN + // { lang_add_entry ($3, FALSE); } + | assignment end + | ASSERT_K /*{ldlex_expression ();}*/ LPAREN exp COMMA string RPAREN + // { ldlex_popstate (); + // lang_add_assignment (exp_assert ($4, $6)); } + ; + +/* The '*' and '?' cases are there because the lexer returns them as + separate tokens rather than as name. */ +wildcard_name: + NAME + // { + // $$ = $1; + // } + | STAR + // { + // $$ = "*"; + // } + | QUESTION + // { + // $$ = "?"; + // } + ; + +wildcard_spec: + wildcard_name +// { +// $$.name = $1; +// $$.sorted = none; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | EXCLUDE_FILE LPAREN exclude_name_list RPAREN wildcard_name +// { +// $$.name = $5; +// $$.sorted = none; +// $$.exclude_name_list = $3; +// $$.section_flag_list = NULL; +// } + | SORT_BY_NAME LPAREN wildcard_name RPAREN +// { +// $$.name = $3; +// $$.sorted = by_name; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | SORT_BY_ALIGNMENT LPAREN wildcard_name RPAREN +// { +// $$.name = $3; +// $$.sorted = by_alignment; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | SORT_NONE LPAREN wildcard_name RPAREN +// { +// $$.name = $3; +// $$.sorted = by_none; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | SORT_BY_NAME LPAREN SORT_BY_ALIGNMENT LPAREN wildcard_name RPAREN RPAREN +// { +// $$.name = $5; +// $$.sorted = by_name_alignment; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | SORT_BY_NAME LPAREN SORT_BY_NAME LPAREN wildcard_name RPAREN RPAREN +// { +// $$.name = $5; +// $$.sorted = by_name; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | SORT_BY_ALIGNMENT LPAREN SORT_BY_NAME LPAREN wildcard_name RPAREN RPAREN +// { +// $$.name = $5; +// $$.sorted = by_alignment_name; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | SORT_BY_ALIGNMENT LPAREN SORT_BY_ALIGNMENT LPAREN wildcard_name RPAREN RPAREN +// { +// $$.name = $5; +// $$.sorted = by_alignment; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + | SORT_BY_NAME LPAREN EXCLUDE_FILE LPAREN exclude_name_list RPAREN wildcard_name RPAREN +// { +// $$.name = $7; +// $$.sorted = by_name; +// $$.exclude_name_list = $5; +// $$.section_flag_list = NULL; +// } + | SORT_BY_INIT_PRIORITY LPAREN wildcard_name RPAREN +// { +// $$.name = $3; +// $$.sorted = by_init_priority; +// $$.exclude_name_list = NULL; +// $$.section_flag_list = NULL; +// } + ; + +sect_flag_list: NAME +// { +// struct flag_info_list *n; +// n = ((struct flag_info_list *) xmalloc (sizeof *n)); +// if ($1[0] == '!') +// { +// n->with = without_flags; +// n->name = &$1[1]; +// } +// else +// { +// n->with = with_flags; +// n->name = $1; +// } +// n->valid = FALSE; +// n->next = NULL; +// $$ = n; +// } + | sect_flag_list AMPERSAND NAME +// { +// struct flag_info_list *n; +// n = ((struct flag_info_list *) xmalloc (sizeof *n)); +// if ($3[0] == '!') +// { +// n->with = without_flags; +// n->name = &$3[1]; +// } +// else +// { +// n->with = with_flags; +// n->name = $3; +// } +// n->valid = FALSE; +// n->next = $1; +// $$ = n; +// } + ; + +sect_flags: + /*not used by antlr: INPUT_SECTION_FLAGS*/ LPAREN sect_flag_list RPAREN +// { +// struct flag_info *n; +// n = ((struct flag_info *) xmalloc (sizeof *n)); +// n->flag_list = $3; +// n->flags_initialized = FALSE; +// n->not_with_flags = 0; +// n->only_with_flags = 0; +// $$ = n; +// } + ; + +exclude_name_list: + exclude_name_list wildcard_name +// { +// struct name_list *tmp; +// tmp = (struct name_list *) xmalloc (sizeof *tmp); +// tmp->name = $2; +// tmp->next = $1; +// $$ = tmp; +// } + | + wildcard_name +// { +// struct name_list *tmp; +// tmp = (struct name_list *) xmalloc (sizeof *tmp); +// tmp->name = $1; +// tmp->next = NULL; +// $$ = tmp; +// } + ; + +file_name_list: + file_name_list opt_comma wildcard_spec +// { +// struct wildcard_list *tmp; +// tmp = (struct wildcard_list *) xmalloc (sizeof *tmp); +// tmp->next = $1; +// tmp->spec = $3; +// $$ = tmp; +// } + | + wildcard_spec +// { +// struct wildcard_list *tmp; +// tmp = (struct wildcard_list *) xmalloc (sizeof *tmp); +// tmp->next = NULL; +// tmp->spec = $1; +// $$ = tmp; +// } + ; + +input_section_spec_no_keep: + NAME +// { +// struct wildcard_spec tmp; +// tmp.name = $1; +// tmp.exclude_name_list = NULL; +// tmp.sorted = none; +// tmp.section_flag_list = NULL; +// lang_add_wild (&tmp, NULL, ldgram_had_keep); +// } + | sect_flags NAME +// { +// struct wildcard_spec tmp; +// tmp.name = $2; +// tmp.exclude_name_list = NULL; +// tmp.sorted = none; +// tmp.section_flag_list = $1; +// lang_add_wild (&tmp, NULL, ldgram_had_keep); +// } + | LBRACKET file_name_list RBRACKET +// { +// lang_add_wild (NULL, $2, ldgram_had_keep); +// } + | sect_flags LBRACKET file_name_list RBRACKET +// { +// struct wildcard_spec tmp; +// tmp.name = NULL; +// tmp.exclude_name_list = NULL; +// tmp.sorted = none; +// tmp.section_flag_list = $1; +// lang_add_wild (&tmp, $3, ldgram_had_keep); +// } + | wildcard_spec LPAREN file_name_list RPAREN +// { +// lang_add_wild (&$1, $3, ldgram_had_keep); +// } + | sect_flags wildcard_spec LPAREN file_name_list RPAREN +// { +// $2.section_flag_list = $1; +// lang_add_wild (&$2, $4, ldgram_had_keep); +// } + ; + +input_section_spec: + input_section_spec_no_keep + | KEEP LPAREN +// { ldgram_had_keep = TRUE; } + input_section_spec_no_keep RPAREN +// { ldgram_had_keep = FALSE; } + ; + +statement: + assignment end + | CREATE_OBJECT_SYMBOLS +// { +// lang_add_attribute(lang_object_symbols_statement_enum); +// } + | SEMICOLON + | CONSTRUCTORS +// { +// +// lang_add_attribute(lang_constructors_statement_enum); +// } + | SORT_BY_NAME LPAREN CONSTRUCTORS RPAREN +// { +// constructors_sorted = TRUE; +// lang_add_attribute (lang_constructors_statement_enum); +// } + | input_section_spec + | length LPAREN mustbe_exp RPAREN +// { +// lang_add_data ((int) $1, $3); +// } + + | FILL LPAREN fill_exp RPAREN +// { +// lang_add_fill ($3); +// } + | ASSERT_K /*{ldlex_expression ();}*/ LPAREN exp COMMA NAME RPAREN end +// { ldlex_popstate (); +// lang_add_assignment (exp_assert ($4, $6)); } + | INCLUDE filename +// { ldlex_script (); ldfile_open_command_file($2); } + statement_list_opt END +// { ldlex_popstate (); } + ; + +statement_list: + statement_list statement + | statement + ; + +statement_list_opt: + /* empty */ + | statement_list + ; + +length: + QUAD +// { $$ = $1; } + | SQUAD +// { $$ = $1; } + | LONG +// { $$ = $1; } + | SHORT +// { $$ = $1; } + | BYTE +// { $$ = $1; } + ; + +fill_exp: + mustbe_exp +// { +// $$ = exp_get_fill ($1, 0, "fill value"); +// } + ; + +fill_opt: + EQ fill_exp + // { $$ = $2; } + | // { $$ = (fill_type *) 0; } + ; + +assign_op: + PLUSEQ +// { $$ = '+'; } + | MINUSEQ +// { $$ = '-'; } + | MULTEQ +// { $$ = '*'; } + | DIVEQ +// { $$ = '/'; } + | LSHIFTEQ +// { $$ = LSHIFT; } + | RSHIFTEQ +// { $$ = RSHIFT; } + | ANDEQ +// { $$ = '&'; } + | OREQ +// { $$ = '|'; } + + ; + +end: SEMICOLON | COMMA + ; + + +assignment: + NAME EQ mustbe_exp +// { +// lang_add_assignment (exp_assign ($1, $3, FALSE)); +// } + | NAME assign_op mustbe_exp +// { +// lang_add_assignment (exp_assign ($1, +// exp_binop ($2, +// exp_nameop (NAME, +// $1), +// $3), FALSE)); +// } + | HIDDEN_ LPAREN NAME EQ mustbe_exp RPAREN +// { +// lang_add_assignment (exp_assign ($3, $5, TRUE)); +// } + | PROVIDE LPAREN NAME EQ mustbe_exp RPAREN +// { +// lang_add_assignment (exp_provide ($3, $5, FALSE)); +// } + | PROVIDE_HIDDEN LPAREN NAME EQ mustbe_exp RPAREN +// { +// lang_add_assignment (exp_provide ($3, $5, TRUE)); +// } + ; + + +opt_comma: + COMMA | ; + + +memory: + MEMORY LBRACE memory_spec_list_opt RBRACE + ; + +memory_spec_list_opt: memory_spec_list | ; + +memory_spec_list: + memory_spec_list opt_comma memory_spec + | memory_spec + ; + + +memory_spec: NAME +// { region = lang_memory_region_lookup ($1, TRUE); } + attributes_opt COLON + origin_spec opt_comma length_spec +// {} + | INCLUDE filename +// { ldlex_script (); ldfile_open_command_file($2); } + memory_spec_list_opt END +// { ldlex_popstate (); } + ; + +origin_spec: + ORIGIN EQ mustbe_exp +// { +// region->origin_exp = $3; +// region->current = region->origin; +// } + ; + +length_spec: + LENGTH EQ mustbe_exp +// { +// region->length_exp = $3; +// } + ; + +attributes_opt: + /* empty */ + /* { *//* dummy action to avoid bison 1.25 error message *//* } + |*/ LPAREN attributes_list RPAREN + ; + +attributes_list: + attributes_string + | attributes_list attributes_string + ; + +attributes_string: + NAME # attributeNormal +// { lang_set_flags (region, $1, 0); } + | EXLAMATION NAME # attributeInverted +// { lang_set_flags (region, $2, 1); } + ; + +/* +This would be best but the tokenizer would have to be made context sensitive which is too much work given how +easy it is to check the flags after parsing. + +attributes_string : + attribute + | '!' attribute # attributeInverted + ; + +attribute: 'r' | 'w' | 'x' | 'a' | 'i' | 'l' ; +*/ +startup: + STARTUP LPAREN filename RPAREN +// { lang_startup($3); } + ; + +high_level_library: + HLL LPAREN high_level_library_NAME_list RPAREN + | HLL LPAREN RPAREN +// { ldemul_hll((char *)NULL); } + ; + +high_level_library_NAME_list: + high_level_library_NAME_list opt_comma filename +// { ldemul_hll($3); } + | filename +// { ldemul_hll($1); } + + ; + +low_level_library: + SYSLIB LPAREN low_level_library_NAME_list RPAREN + ; low_level_library_NAME_list: + low_level_library_NAME_list opt_comma filename +// { ldemul_syslib($3); } + | + ; + +floating_point_support: + FLOAT +// { lang_float(TRUE); } + | NOFLOAT +// { lang_float(FALSE); } + ; + +nocrossref_list: + /* empty */ +// { +// $$ = NULL; +// } + | NAME nocrossref_list +// { +// struct lang_nocrossref *n; +// +// n = (struct lang_nocrossref *) xmalloc (sizeof *n); +// n->name = $1; +// n->next = $2; +// $$ = n; +// } + | NAME COMMA nocrossref_list +// { +// struct lang_nocrossref *n; +// +// n = (struct lang_nocrossref *) xmalloc (sizeof *n); +// n->name = $1; +// n->next = $3; +// $$ = n; +// } + ; + +mustbe_exp: // { ldlex_expression (); } + exp + // { ldlex_popstate (); $$=$2;} + ; + +exp : + DASH exp # expNegate // TODO: %prec UNARY +// { $$ = exp_unop ('-', $2); } + | LPAREN exp RPAREN # expParen +// { $$ = $2; } + | NEXT LPAREN exp RPAREN # expNextParen // TODO: %prec UNARY +// { $$ = exp_unop ((int) $1,$3); } + | EXLAMATION exp # expInvert // TODO: %prec UNARY +// { $$ = exp_unop ('!', $2); } + | PLUS exp # expPlus // TODO: %prec UNARY +// { $$ = $2; } + | TILDE exp # expMinus // TODO: %prec UNARY +// { $$ = exp_unop ('~', $2);} + | exp STAR exp # expMul +// { $$ = exp_binop ('*', $1, $3); } + | exp SLASH exp # expDiv +// { $$ = exp_binop ('/', $1, $3); } + | exp MOD exp # expMod +// { $$ = exp_binop ('%', $1, $3); } + | exp PLUS exp # expAdd +// { $$ = exp_binop ('+', $1, $3); } + | exp DASH exp # expSub +// { $$ = exp_binop ('-' , $1, $3); } + | exp LSHIFT exp # expLshift +// { $$ = exp_binop (LSHIFT , $1, $3); } + | exp RSHIFT exp # expRshift +// { $$ = exp_binop (RSHIFT , $1, $3); } + | exp EQEQ exp # expEq +// { $$ = exp_binop (EQ , $1, $3); } + | exp NE exp # expNe +// { $$ = exp_binop (NE , $1, $3); } + | exp LE exp # expLe +// { $$ = exp_binop (LE , $1, $3); } + | exp GE exp # expGe +// { $$ = exp_binop (GE , $1, $3); } + | exp LT exp # expLt +// { $$ = exp_binop ('<' , $1, $3); } + | exp GT exp # expGt +// { $$ = exp_binop ('>' , $1, $3); } + | exp AMPERSAND exp # expAnd +// { $$ = exp_binop ('&' , $1, $3); } + | exp HAT exp # expXor +// { $$ = exp_binop ('^' , $1, $3); } + | exp BAR exp # expOr +// { $$ = exp_binop ('|' , $1, $3); } + | exp QUESTION exp COLON exp # expTrinary +// { $$ = exp_trinop ('?' , $1, $3, $5); } + | exp ANDAND exp # expAndand +// { $$ = exp_binop (ANDAND , $1, $3); } + | exp OROR exp # expOror +// { $$ = exp_binop (OROR , $1, $3); } + | DEFINED LPAREN NAME RPAREN # expDefined +// { $$ = exp_nameop (DEFINED, $3); } + | INT # expInt +// { $$ = exp_bigintop ($1.integer, $1.str); } + | SIZEOF_HEADERS # expSizeofHeaders +// { $$ = exp_nameop (SIZEOF_HEADERS,0); } + | ALIGNOF LPAREN NAME RPAREN # expAlignof +// { $$ = exp_nameop (ALIGNOF,$3); } + | SIZEOF LPAREN NAME RPAREN # expSizeof +// { $$ = exp_nameop (SIZEOF,$3); } + | ADDR LPAREN NAME RPAREN # expAddr +// { $$ = exp_nameop (ADDR,$3); } + | LOADADDR LPAREN NAME RPAREN # expLoadaddr +// { $$ = exp_nameop (LOADADDR,$3); } + | CONSTANT LPAREN NAME RPAREN # expConstant +// { $$ = exp_nameop (CONSTANT,$3); } + | ABSOLUTE LPAREN exp RPAREN # expAbsolute +// { $$ = exp_unop (ABSOLUTE, $3); } + | ALIGN_K LPAREN exp RPAREN # expAlign +// { $$ = exp_unop (ALIGN_K,$3); } + | ALIGN_K LPAREN exp COMMA exp RPAREN # expAlignK +// { $$ = exp_binop (ALIGN_K,$3,$5); } + | DATA_SEGMENT_ALIGN LPAREN exp COMMA exp RPAREN # expDataSegmentAlign +// { $$ = exp_binop (DATA_SEGMENT_ALIGN, $3, $5); } + | DATA_SEGMENT_RELRO_END LPAREN exp COMMA exp RPAREN # expDataSegmentRelRoEnd +// { $$ = exp_binop (DATA_SEGMENT_RELRO_END, $5, $3); + | DATA_SEGMENT_END LPAREN exp RPAREN # expDataSegmentEnd +// { $$ = exp_unop (DATA_SEGMENT_END, $3); } + | SEGMENT_START LPAREN NAME COMMA exp RPAREN # expSegmentStart +// { /* The operands to the expression node are +// placed in the opposite order from the way +// in which they appear in the script as +// that allows us to reuse more code in +// fold_binary. */ +// $$ = exp_binop (SEGMENT_START, +// $5, +// exp_nameop (NAME, $3)); } + | BLOCK LPAREN exp RPAREN # expBlock +// { $$ = exp_unop (ALIGN_K,$3); } + | NAME # expName +// { $$ = exp_nameop (NAME,$1); } + | MAX_K LPAREN exp COMMA exp RPAREN # expMax +// { $$ = exp_binop (MAX_K, $3, $5 ); } + | MIN_K LPAREN exp COMMA exp RPAREN # expMin +// { $$ = exp_binop (MIN_K, $3, $5 ); } + | ASSERT_K LPAREN exp COMMA NAME RPAREN # expAssert +// { $$ = exp_assert ($3, $5); } + | ORIGIN LPAREN NAME RPAREN # expOrigin +// { $$ = exp_nameop (ORIGIN, $3); } + | LENGTH LPAREN NAME RPAREN # expLengthExp +// { $$ = exp_nameop (LENGTH, $3); } + | LOG2CEIL LPAREN exp RPAREN # expLog2ceil +// { $$ = exp_unop (LOG2CEIL, $3); } + ; + + +memspec_at_opt: + AT GT NAME // { $$ = $3; } + | // { $$ = 0; } + ; + +opt_at: + AT LPAREN exp RPAREN // { $$ = $3; } + | // { $$ = 0; } + ; + +opt_align: + ALIGN_K LPAREN exp RPAREN // { $$ = $3; } + | // { $$ = 0; } + ; + +opt_align_with_input: + ALIGN_WITH_INPUT // { $$ = ALIGN_WITH_INPUT; } + | // { $$ = 0; } + ; + +opt_subalign: + SUBALIGN LPAREN exp RPAREN // { $$ = $3; } + | // { $$ = 0; } + ; + +sect_constraint: + ONLY_IF_RO // { $$ = ONLY_IF_RO; } + | ONLY_IF_RW // { $$ = ONLY_IF_RW; } + | SPECIAL // { $$ = SPECIAL; } + | // { $$ = 0; } + ; + +section: NAME // { ldlex_expression(); } + opt_exp_with_type + opt_at + opt_align + opt_align_with_input + opt_subalign // { ldlex_popstate (); ldlex_script (); } + sect_constraint + LBRACE +// { +// lang_enter_output_section_statement($1, $3, +// sectype, +// $5, $7, $4, $9, $6); +// } + statement_list_opt + RBRACE // { ldlex_popstate (); ldlex_expression (); } + memspec_opt memspec_at_opt phdr_opt fill_opt +// { +// ldlex_popstate (); +// lang_leave_output_section_statement ($18, $15, $17, $16); +// } + opt_comma + {} + | OVERLAY +// { ldlex_expression (); } + opt_exp_without_type opt_nocrossrefs opt_at opt_subalign +// { ldlex_popstate (); ldlex_script (); } + LBRACE +// { +// lang_enter_overlay ($3, $6); +// } + overlay_section + RBRACE +// { ldlex_popstate (); ldlex_expression (); } + memspec_opt memspec_at_opt phdr_opt fill_opt +// { +// ldlex_popstate (); +// lang_leave_overlay ($5, (int) $4, +// $16, $13, $15, $14); +// } + opt_comma + | /* The GROUP case is just enough to support the gcc + svr3.ifile script. It is not intended to be full + support. I'm not even sure what GROUP is supposed + to mean. */ + GROUP // { ldlex_expression (); } + opt_exp_with_type +// { +// ldlex_popstate (); +// lang_add_assignment (exp_assign (".", $3, FALSE)); +// } + LBRACE sec_or_group_p1 RBRACE + | INCLUDE filename + // { ldlex_script (); ldfile_open_command_file($2); } + sec_or_group_p1 END + // { ldlex_popstate (); } + ; + +type: + NOLOAD // { sectype = noload_section; } + | DSECT // { sectype = noalloc_section; } + | COPY // { sectype = noalloc_section; } + | INFO // { sectype = noalloc_section; } + | OVERLAY // { sectype = noalloc_section; } + ; + +atype: + LPAREN type RPAREN + | /* EMPTY */ // { sectype = normal_section; } + | LPAREN RPAREN // { sectype = normal_section; } + ; + +opt_exp_with_type: + exp atype COLON // { $$ = $1; } + | atype COLON // { $$ = (etree_type *)NULL; } + | /* The BIND cases are to support the gcc svr3.ifile + script. They aren't intended to implement full + support for the BIND keyword. I'm not even sure + what BIND is supposed to mean. */ + BIND LPAREN exp RPAREN atype COLON // { $$ = $3; } + | BIND LPAREN exp RPAREN BLOCK LPAREN exp RPAREN atype COLON + // { $$ = $3; } + ; + +opt_exp_without_type: + exp COLON // { $$ = $1; } + | COLON // { $$ = (etree_type *) NULL; } + ; + +opt_nocrossrefs: + /* empty */ +// { $$ = 0; } + | NOCROSSREFS +// { $$ = 1; } + ; + +memspec_opt: + GT NAME + // { $$ = $2; } + | // { $$ = DEFAULT_MEMORY_REGION; } + ; + +phdr_opt: + /* empty */ +// { +// $$ = NULL; +// } + | phdr_opt COLON NAME +// { +// struct lang_output_section_phdr_list *n; +// +// n = ((struct lang_output_section_phdr_list *) +// xmalloc (sizeof *n)); +// n->name = $3; +// n->used = FALSE; +// n->next = $1; +// $$ = n; +// } + ; + +overlay_section: + /* empty */ + | overlay_section + NAME +// { +// ldlex_script (); +// lang_enter_overlay_section ($2); +// } + LBRACE statement_list_opt RBRACE +// { ldlex_popstate (); ldlex_expression (); } + phdr_opt fill_opt +// { +// ldlex_popstate (); +// lang_leave_overlay_section ($9, $8); +// } + opt_comma + ; + +phdrs: + PHDRS LBRACE phdr_list RBRACE + ; + +phdr_list: + /* empty */ + | phdr_list phdr + ; + +phdr: + NAME // { ldlex_expression (); } + phdr_type phdr_qualifiers // { ldlex_popstate (); } + SEMICOLON +// { +// lang_new_phdr ($1, $3, $4.filehdr, $4.phdrs, $4.at, +// $4.flags); +// } + ; + +phdr_type: + exp +// { +// $$ = $1; +// +// if ($1->type.node_class == etree_name +// && $1->type.node_code ==name) +// { +// const char *s; +// unsigned int i; +// static const char * const phdr_types[] = +// { +// "PT_NULL", "PT_LOAD", "PT_DYNAMIC", +// "PT_INTERP", "PT_NOTE", "PT_SHLIB", +// "PT_PHDR", "PT_TLS" +// }; +// +// s = $1->name.name; +// for (i = 0; +// i < sizeof phdr_types / sizeof phdr_types[0]; +// i++) +// if (strcmp (s, phdr_types[i]) == 0) +// { +// $$ = exp_intop (i); +// break; +// } +// if (i == sizeof phdr_types / sizeof phdr_types[0]) +// { +// if (strcmp (s, "PT_GNU_EH_FRAME") == 0) +// $$ = exp_intop (0x6474e550); +// else if (strcmp (s, "PT_GNU_STACK") == 0) +// $$ = exp_intop (0x6474e551); +// else +// { +// einfo (_("\ +//%X%P:%S: unknown phdr type `%s' (try integer literal)\n"), +// NULL, s); +// $$ = exp_intop (0); +// } +// } +// } +// } + ; + +phdr_qualifiers: + /* empty */ +// { +// memset (&$$, 0, sizeof (struct phdr_info)); +// } + | NAME phdr_val phdr_qualifiers +// { +// $$ = $3; +// if (strcmp ($1, "FILEHDR") == 0 && $2 == NULL) +// $$.filehdr = TRUE; +// else if (strcmp ($1, "PHDRS") == 0 && $2 == NULL) +// $$.phdrs = TRUE; +// else if (strcmp ($1, "FLAGS") == 0 && $2 != NULL) +// $$.flags = $2; +// else +// einfo (_("%X%P:%S: PHDRS syntax error at `%s'\n"), +// NULL, $1); +// } + | AT LPAREN exp RPAREN phdr_qualifiers +// { +// $$ = $5; +// $$.at = $3; +// } + ; + +phdr_val: + /* empty */ +// { +// $$ = NULL; +// } + | LPAREN exp RPAREN +// { +// $$ = $2; +// } + ; + +dynamic_list_file: +// { +// ldlex_version_file (); +// PUSH_ERROR (_("dynamic list")); +// } + dynamic_list_nodes +// { +// ldlex_popstate (); +// POP_ERROR (); +// } + ; + +dynamic_list_nodes: + dynamic_list_node + | dynamic_list_nodes dynamic_list_node + ; + +dynamic_list_node: + LBRACE dynamic_list_tag RBRACE SEMICOLON + ; + +dynamic_list_tag: + vers_defns SEMICOLON +// { +// lang_append_dynamic_list ($1); +// } + ; + +/* This syntax is used within an external version script file. */ + +version_script_file: +// { +// ldlex_version_file (); +// PUSH_ERROR (_("VERSION script")); +// } + vers_nodes +// { +// ldlex_popstate (); +// POP_ERROR (); +// } + ; + +/* This is used within a normal linker script file. */ + +version: +// { +// ldlex_version_script (); +// } + VERSIONK LBRACE vers_nodes RBRACE +// { +// ldlex_popstate (); +// } + ; + +vers_nodes: + vers_node + | vers_nodes vers_node + ; + +vers_node: + LBRACE vers_tag RBRACE SEMICOLON +// { +// lang_register_vers_node (NULL, $2, NULL); +// } + | VERS_TAG LBRACE vers_tag RBRACE SEMICOLON +// { +// lang_register_vers_node ($1, $3, NULL); +// } + | VERS_TAG LBRACE vers_tag RBRACE verdep SEMICOLON +// { +// lang_register_vers_node ($1, $3, $5); +// } + ; + +verdep: + VERS_TAG +// { +// $$ = lang_add_vers_depend (NULL, $1); +// } + | verdep VERS_TAG +// { +// $$ = lang_add_vers_depend ($1, $2); +// } + ; + +vers_tag: + /* empty */ +// { +// $$ = lang_new_vers_node (NULL, NULL); +// } + | vers_defns SEMICOLON +// { +// $$ = lang_new_vers_node ($1, NULL); +// } + | GLOBAL COLON vers_defns SEMICOLON +// { +// $$ = lang_new_vers_node ($3, NULL); +// } + | LOCAL COLON vers_defns SEMICOLON +// { +// $$ = lang_new_vers_node (NULL, $3); +// } + | GLOBAL COLON vers_defns SEMICOLON LOCAL COLON vers_defns SEMICOLON +// { +// $$ = lang_new_vers_node ($3, $7); +// } + ; + +vers_defns: + VERS_IDENTIFIER +// { +// $$ = lang_new_vers_pattern (NULL, $1, ldgram_vers_current_lang, FALSE); +// } + | NAME +// { +// $$ = lang_new_vers_pattern (NULL, $1, ldgram_vers_current_lang, TRUE); +// } + | vers_defns SEMICOLON VERS_IDENTIFIER +// { +// $$ = lang_new_vers_pattern ($1, $3, ldgram_vers_current_lang, FALSE); +// } + | vers_defns SEMICOLON NAME +// { +// $$ = lang_new_vers_pattern ($1, $3, ldgram_vers_current_lang, TRUE); +// } + | vers_defns SEMICOLON EXTERN NAME LBRACE +// { +// $$ = ldgram_vers_current_lang; +// ldgram_vers_current_lang = $4; +// } + vers_defns opt_semicolon RBRACE +// { +// struct bfd_elf_version_expr *pat; +// for (pat = $7; pat->next != NULL; pat = pat->next); +// pat->next = $1; +// $$ = $7; +// ldgram_vers_current_lang = $6; +// } + | EXTERN NAME LBRACE +// { +// $$ = ldgram_vers_current_lang; +// ldgram_vers_current_lang = $2; +// } + vers_defns opt_semicolon RBRACE +// { +// $$ = $5; +// ldgram_vers_current_lang = $4; +// } + | GLOBAL +// { +// $$ = lang_new_vers_pattern (NULL, "global", ldgram_vers_current_lang, FALSE); +// } + | vers_defns SEMICOLON GLOBAL +// { +// $$ = lang_new_vers_pattern ($1, "global", ldgram_vers_current_lang, FALSE); +// } + | LOCAL +// { +// $$ = lang_new_vers_pattern (NULL, "local", ldgram_vers_current_lang, FALSE); +// } + | vers_defns SEMICOLON LOCAL +// { +// $$ = lang_new_vers_pattern ($1, "local", ldgram_vers_current_lang, FALSE); +// } + | EXTERN +// { +// $$ = lang_new_vers_pattern (NULL, "extern", ldgram_vers_current_lang, FALSE); +// } + | vers_defns SEMICOLON EXTERN +// { +// $$ = lang_new_vers_pattern ($1, "extern", ldgram_vers_current_lang, FALSE); +// } + ; + +opt_semicolon: + /* empty */ + | SEMICOLON + ; + +//name +// : NAME; + +string: STRING_ANY*; diff --git a/README.md b/README.md new file mode 100644 index 0000000..6744d1e --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +# elfinfo - Extract info from ELF files + +# Building + +This code currently depend on experimental patches for Antlr4's C++ runtime which has to be build first. This should +more or less do it: + + git clone https://github.com/trygvis/antlr4 + cd antlr4 + mkdir build + cd build + cmake -DCMAKE_INSTALL_PREFIX=$HOME/opt/antlr-cpp .. + make + make install + +This will build and install Antlr4 into $HOME/opt/antlr-cpp. + +To build this code follow a similar approach: + + mkdir build + cmake .. -DAntlr4_DIR=$HOME/opt/antlr-cpp/lib/cmake/Antlr4 + make + make install diff --git a/elfinfo.cpp b/elfinfo.cpp new file mode 100644 index 0000000..a0138e2 --- /dev/null +++ b/elfinfo.cpp @@ -0,0 +1,271 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ld.h" + +using std::vector; + +enum class SectionType { + TEXT, DATA +}; + +const char *to_str(const SectionType &type) { + switch (type) { + case SectionType::TEXT: + return "text"; + case SectionType::DATA: + return "data"; + } +} + +struct Section { + SectionType type; + const Elf64_Addr start; + const Elf64_Addr end; + const Elf64_Xword size; + Elf64_Addr used; + + Section(SectionType type, Elf64_Addr start, Elf64_Xword size) : type(type), start(start), size(size), + end(start + size), used(0) { + } + + bool contains(Elf64_Addr address, Elf64_Xword size) const { + return contains(address) && contains(address + size); + } + + bool contains(Elf64_Addr address) const { + return start <= address && address < end; + } +}; + +vector
sections; + +char *filename = nullptr; + +char *ld_filename = nullptr; + +char *program; + +__attribute__((noreturn)) +void usage(const char *reason = nullptr) { + if (reason) { + fprintf(stderr, "%s\n", reason); + } + fprintf(stderr, "usage: %s -f file [-l ld] [-t start:size] [-d start:size]\n", program); + fprintf(stderr, " -t/-d/-b: add text/data section\n"); + fprintf(stderr, "At least one section or a ld file has to be specified\n"); + exit(EX_USAGE); +} + +void parse_start_size(char *input, Elf64_Addr &start, Elf64_Xword &size) { + char *str_size = strchr(input, ':'); + + if (!str_size) { + usage("bad section specification, missing ':'"); + } + + *str_size = '\0'; + str_size++; + + if (sscanf(input, "%" SCNi64, &start) != 1) { + usage("bad section specification, could not parse start number"); + } + + size_t str_size_len = strlen(str_size); + + if (str_size_len < 1) { + usage("bad section specification"); + } + + char suffix = str_size[str_size_len - 1]; + int modifier; + + if (!isdigit(suffix)) { + switch (suffix) { + case 'k': + case 'K': + modifier = 1024; + break; + case 'm': + case 'M': + modifier = 1024 * 1024; + break; + default: + usage("bad size modifier, only 'k' and 'M' are allowed"); + } + } else { + modifier = 1; + } + + if (sscanf(str_size, "%" SCNi64, &size) != 1) { + usage("bad section specification, could not parse size number"); + } + size = size * modifier; +} + +bool debug = false; + +void parse_args(int argc, char **argv) { + int c; + + while ((c = getopt(argc, argv, "Df:l:t:d:")) != -1) { + switch (c) { + case 'D': + debug = true; + break; + case 't': + case 'd': { + Elf64_Addr start; + Elf64_Xword size; + parse_start_size(optarg, start, size); + SectionType type = c == 't' ? SectionType::TEXT : SectionType::DATA; + sections.push_back(Section(type, start, size)); + break; + } + case 'f': + filename = optarg; + break; + case 'l': + ld_filename = optarg; + break; + case '?': + if (optopt == 'c') + errx(EX_USAGE, "Option -%c requires an argument.\n", optopt); + else + errx(EX_USAGE, "Unknown option `-%c'.\n", optopt); + default: + abort(); + } + } + + if (!filename || (sections.empty() && !ld_filename)) { + usage(); + } +} + +void to_iso(Elf64_Addr i, char *buf) { + const char *suffix; + if (i > 1024 * 1024) { + i /= 1024 * 1024; + suffix = "M"; + } else if (i > 1024) { + i /= 1024; + suffix = "k"; + } else { + suffix = ""; + } + sprintf(buf, "%" PRIu64 "%s", i, suffix); +} + +int main(int argc, char **argv) { + program = argv[0]; + parse_args(argc, argv); + + if (ld_filename) { + ld_file file = ld_file_loader::load(ld_filename); + } + + if (elf_version(EV_CURRENT) == EV_NONE) + errx(EX_SOFTWARE, "ELF library initialization failed: %s", elf_errmsg(-1)); + + int fd; + if ((fd = open(filename, O_RDONLY, 0)) < 0) + err(EX_NOINPUT, "open \"%s\" failed", argv[1]); + + Elf *e; + if ((e = elf_begin(fd, ELF_C_READ, NULL)) == NULL) + errx(EX_SOFTWARE, "elf_begin() failed: %s.", elf_errmsg(-1)); + if (elf_kind(e) != ELF_K_ELF) + errx(EX_DATAERR, "%s is not an ELF object.", argv[1]); + + size_t shstrndx; + if (elf_getshdrstrndx(e, &shstrndx) != 0) + errx(EX_SOFTWARE, "elf_getshdrstrndx() failed: %s.", elf_errmsg(-1)); + + size_t program_header_count; + if (elf_getphdrnum(e, &program_header_count) != 0) + errx(EX_DATAERR, "elf_getphdrnum() failed: %s.", elf_errmsg(-1)); + + size_t text_size = 0, data_size = 0, bss_size = 0; + for (int i = 0; i < program_header_count; i++) { + GElf_Phdr phdr; + + if (gelf_getphdr(e, i, &phdr) != &phdr) + errx(EX_SOFTWARE, "getphdr() failed: %s.", elf_errmsg(-1)); + + if (phdr.p_type == PT_LOAD) { + SectionType expectedType; + size_t *size; + + if (phdr.p_flags == (PF_X | PF_W | PF_R) || phdr.p_flags == (PF_X | PF_R)) { + if (debug) { + printf("Adding PH #%d as text\n", i); + } + + expectedType = SectionType::TEXT; + size = &text_size; + } else if (phdr.p_flags == (PF_R | PF_W)) { + expectedType = SectionType::DATA; + if (phdr.p_filesz > 0) { + if (debug) { + printf("Adding PH #%d as data\n", i); + } + size = &data_size; + } + else { + if (debug) { + printf("Adding PH #%d as bss\n", i); + } + size = &bss_size; + } + } else { + warnx("Unknown flag combination: 0x%02x", phdr.p_flags); + warnx("Unknown flag combination: 0x%02x", PF_X | PF_R); + continue; + } + + auto s = std::find_if(sections.begin(), sections.end(), [&](Section §ion) { + return section.type == expectedType && section.contains(phdr.p_vaddr, phdr.p_memsz); + }); + + if (s == sections.end()) { + fprintf(stderr, + "Could not find a section for elf header #%d of type %s, at address 0x%08" PRIx64 " with size %" PRId64 "\n", + i, to_str(expectedType), phdr.p_vaddr, phdr.p_memsz); + } + else { + (*s).used += phdr.p_memsz; + + *size += phdr.p_memsz; + } + } else { + // ignored + }; + } + + printf("Size by sections\n"); + printf("Type Start End Size Used\n"); + std::for_each(sections.begin(), sections.end(), [&](Section &s) { + char size[100]; + to_iso(s.size, size); + int used_pct = (int) (double(s.used) / double(s.size) * 100.0); + printf("%4s %08" PRIx64 " %08" PRIx64 " %5s %8" PRId64 " %3d%%\n", to_str(s.type), s.start, s.end, size, s.used, + used_pct); + }); + + printf("\n"); + printf("Size by type\n"); + printf("text=%zu, data=%zu, bss=%zu\n", text_size, data_size, bss_size); + return EXIT_SUCCESS; +} diff --git a/ld.cpp b/ld.cpp new file mode 100644 index 0000000..6118f3a --- /dev/null +++ b/ld.cpp @@ -0,0 +1,267 @@ +#include "ld.h" +#include "GnuLdLexer.h" +#include "GnuLdParser.h" +#include "GnuLdParserBaseListener.h" +#include "antlr4-runtime.h" + +#include +#include +#include +#include +#include +#include +#include + +using antlr4::ANTLRFileStream; +using namespace std; + +enum class MemoryAttribute { + R, W, X +}; + +class MemoryArea { +public: + string name; + uint64_t origin; + uint64_t length; + set attributes; +}; + +static MemoryAttribute valueOf(char c) { + switch (c) { + case 'r': + case 'R': + return MemoryAttribute::R; + case 'w': + case 'W': + return MemoryAttribute::W; + case 'x': + case 'X': + return MemoryAttribute::X; + default: + throw std::domain_error("Invalid memory attribute: " + c); + } +} + +static bool endsWith(const string &a, const string &b) { + return b.length() <= a.length() && a.compare(a.length() - b.length(), b.length(), b); +} + +using ParseTree = antlr4::tree::ParseTree; + +template +class ParseTreeProperty { +public: + virtual V get(ParseTree *const node) { + try { + cout << "node= " << node->getText() << endl; + return _annotations.at(node); + } catch (std::out_of_range &e) { + cout << "out of range: " << node->getText() << endl; + throw e; + } + } + + virtual void put(ParseTree *const node, V value) { + _annotations[node] = value; + } + + virtual V removeFrom(ParseTree *const node) { + return _annotations.erase(node); + } + +protected: + std::map _annotations; + +private: +}; + +class ElfinfoGnuLdBaseListener : public GnuLdParserBaseListener { +private: +public: + vector memoryAreas; + + ParseTreeProperty expr; +// map expr; + + static uint64_t parseInt(const string &s) { + string str; + transform(begin(s), end(s), begin(str), ::tolower); + int base = 10; + if (str.compare(0, 2, "0x")) { + base = 16; + str = str.substr(0, 2); + } + + int factor = 1; + if (endsWith(str, "k")) { + factor = 1024; + str = str.substr(0, str.length() - 1); + } else if (endsWith(str, "k")) { + factor = 1024 * 1024; + str = str.substr(0, str.length() - 1); + } + + unsigned long long i = strtoull(str.c_str(), NULL, base); + + if (factor > 1) { + i = i * factor; + } + return i; + } + + virtual void exitExpAlign(GnuLdParser::ExpAlignContext *ctx) override { + expr.put(ctx, 0); + } + + void exitExpInt(GnuLdParser::ExpIntContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.exitExpInt: ctx->INT()->getText() = " + ctx->INT()->getText()); + uint64_t i = parseInt(ctx->INT()->getText()); + expr.put(ctx, i); + } + + void exitExpSub(GnuLdParser::ExpSubContext *ctx) override { + uint64_t a = expr.get(ctx->exp(0).get()); + uint64_t b = expr.get(ctx->exp(1).get()); + uint64_t x = a - b; + expr.put(ctx, x); + } + + void exitExpAdd(GnuLdParser::ExpAddContext *ctx) override { + uint64_t a = expr.get(ctx->exp(0).get()); + uint64_t b = expr.get(ctx->exp(1).get()); + uint64_t x = a + b; + expr.put(ctx, x); + } + + void exitExpName(GnuLdParser::ExpNameContext *ctx) override { + expr.put(ctx, 0); + } + + void exitExpAddr(GnuLdParser::ExpAddrContext *ctx) override { + expr.put(ctx, 0); + } + + void exitExpSizeof(GnuLdParser::ExpSizeofContext *ctx) override { + expr.put(ctx, 0); + } + + void exitExpLengthExp(GnuLdParser::ExpLengthExpContext *ctx) override { + MemoryArea + ma = getMemoryArea(ctx->NAME()->getText()); +// System.out.println("ma.length = " + ma.length); + expr.put(ctx, ma.length); + } + + void exitExpOrigin(GnuLdParser::ExpOriginContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.exitExpOrigin: " + ctx->getText()); + MemoryArea + ma = getMemoryArea(ctx->NAME()->getText()); +// System.out.println("ma.origin = " + ma.origin); + expr.put(ctx, ma.origin); + } + + MemoryArea getMemoryArea(const string &name) { + for (MemoryArea &ma : memoryAreas) { + if (ma.name == name) { + return ma; + } + } + throw new RuntimeException("No such memory area: " + name); + } + + void enterMustbe_exp(GnuLdParser::Mustbe_expContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.enterMustbe_exp"); + } + + void exitMustbe_exp(GnuLdParser::Mustbe_expContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.exitMustbe_exp"); + + expr.put(ctx, expr.get(ctx->exp().get())); + } + + + void enterOrigin_spec(GnuLdParser::Origin_specContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.enterOrigin_spec"); + } + + void exitOrigin_spec(GnuLdParser::Origin_specContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.exitOrigin_spec"); + } + + void enterLength_spec(GnuLdParser::Length_specContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.enterLength_spec"); + } + + void exitLength_spec(GnuLdParser::Length_specContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.exitLength_spec"); + } + + void enterMemory_spec(GnuLdParser::Memory_specContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.enterMemory_spec"); + } + + void exitMemory_spec(GnuLdParser::Memory_specContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.exitMemory_spec"); + MemoryArea ma; + ma.name = ctx->NAME()->getText(); + ma.attributes = attributes; +// System.out.println("ctx->origin_spec() = " + ctx->origin_spec()); + ma.origin = expr.get(ctx->origin_spec().get()->mustbe_exp().get()); + ma.length = expr.get(ctx->length_spec().get()->mustbe_exp().get()); + memoryAreas.push_back(ma); + } + + MemoryAttribute attribute; + bool attributesInverted; + set attributes; + + void exitAttributes_opt(GnuLdParser::Attributes_optContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.exitAttributes_opt"); + attributes.clear(); + } + + void enterAttributeInverted(GnuLdParser::AttributeInvertedContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.enterAttributeInverted"); + + if (!attributes.empty()) { + throw new RuntimeException( + "Attributes for memory areas can only be attributesInverted (with '!') as the first character in a specification; foo(!rw), not foo(x!rw)."); + } +// const string& name = ctx->name()->getText(); + const string &name = ctx->NAME()->getText(); +// System.out.println("ctx->ATTRIBUTE()->getText() = " + name); + + attributesInverted = true; + } + + void enterAttributeNormal(GnuLdParser::AttributeNormalContext *ctx) override { +// System.out.println("ElfinfoGnuLdListener.enterAttributeNormal"); + + const string &name = ctx->NAME()->getText(); +// System.out.println("ctx->ATTRIBUTE()->getText() = " + name); + for (int i = 0; i < name.length(); i++) { + attribute = valueOf(name[i]); + attributes.insert(attribute); + } + attributesInverted = false; + } +}; + +ld_file ld_file_loader::load(std::string path) { + ANTLRFileStream input(path); + GnuLdLexer lexer(&input); + CommonTokenStream tokens(&lexer); + tokens.fill(); + + for (auto token : tokens.getTokens()) { + std::cout << token->toString() << std::endl; + } + + GnuLdParser parser(&tokens); + ElfinfoGnuLdBaseListener listener; + parser.addParseListener(&listener); + auto file = parser.file(); + std::cout << file->toStringTree(&parser) << std::endl << std::endl; + return {}; +} diff --git a/ld.h b/ld.h new file mode 100644 index 0000000..b25a24c --- /dev/null +++ b/ld.h @@ -0,0 +1,11 @@ +#pragma once + +#include + +class ld_file { +}; + +class ld_file_loader { +public: + static ld_file load(std::string path); +}; -- cgit v1.2.3