From ea58b3887a7deba05e3fc85eb536038a547f8871 Mon Sep 17 00:00:00 2001 From: Trygve Laugstøl Date: Sun, 17 Jul 2016 18:56:29 +0200 Subject: lexer/parser: o Renaming the ASSERT_K token to ASSERT. o Fixing a bug in the NAME regex where underscored where not allowed in certain names. o All ASSERT statements use string instead of NAME. o Improving STRING_ANY, allow many non-quote tokens instead of just one. Can probably replace the entire string rule with STRING_ANY now. o Fixing naming of '!', "~" and "?" operators. Ld: o Implement support for multiplication and division operators. o Better error messages. --- GnuLdLexer.g4 | 18 ++---------- GnuLdParser.g4 | 28 +++++++++--------- Ld.cpp | 68 +++++++++++++++++++++++++++++++++---------- includes/trygvis/elfinfo/Ld.h | 9 +++++- 4 files changed, 76 insertions(+), 47 deletions(-) diff --git a/GnuLdLexer.g4 b/GnuLdLexer.g4 index 10475d8..4b4b202 100644 --- a/GnuLdLexer.g4 +++ b/GnuLdLexer.g4 @@ -44,7 +44,7 @@ ALIGNMOD : 'ALIGNMOD'; ALIGNOF : 'ALIGNOF'; ALIGN_WITH_INPUT : 'ALIGN_WITH_INPUT'; AS_NEEDED : 'AS_NEEDED'; -ASSERT_K : 'ASSERT'; +ASSERT : 'ASSERT'; AT : 'AT'; BASE : 'BASE'; BEFORE : 'BEFORE'; @@ -146,21 +146,9 @@ VERS_IDENTIFIER : 'VERS_IDENTIFIER'; VERSIONK : 'VERSIONK'; VERS_TAG : 'VERS_TAG'; -/* -Names are very liberal, they can be full strings and start with a dot. -*/ - QUOTE : '"' -> skip, pushMode(STRING); -//SPACE : ' '; -//SPACES : ' '+; - -//name : -// '"' (NAME | SPACE | SPACES)+ '"' # nameQuoted -// | NAME # namePlain; - -//NAME : [\._a-zA-Z][\._a-zA-Z0-9]*; -NAME : [*\._a-zA-Z][*-\.\/_a-zA-Z0-9]* | '/DISCARD/'; +NAME : [*\._a-zA-Z][*\-\.\/_a-zA-Z0-9]* | '/DISCARD/'; // TODO: ld supports some really fancy expressions here, like "0101010b", "ffH", "ffx", "$Aa" etc //INT : '0x' [0-9a-fA-F]+ @@ -228,5 +216,5 @@ WS ; mode STRING; -STRING_ANY : ~'"'; +STRING_ANY : ~'"'+; STRING_END_QUOTE : '"' -> skip, popMode; diff --git a/GnuLdParser.g4 b/GnuLdParser.g4 index 0d2bf4b..974eba2 100644 --- a/GnuLdParser.g4 +++ b/GnuLdParser.g4 @@ -122,13 +122,13 @@ casesymlist: /* Parsed as expressions so that commas separate entries */ extern_name_list: - extern_name_list_body + extern_name_list_body ; extern_name_list_body: - NAME - | extern_name_list_body NAME - | extern_name_list_body COMMA NAME + NAME + | extern_name_list_body NAME + | extern_name_list_body COMMA NAME ; script_file: @@ -151,7 +151,7 @@ ifile_p1: | floating_point_support | statement_anywhere | version - | SEMICOLON + | SEMICOLON | TARGET_K LPAREN NAME RPAREN | SEARCH_DIR LPAREN filename RPAREN | OUTPUT LPAREN filename RPAREN @@ -202,7 +202,7 @@ sec_or_group_p1: statement_anywhere: ENTRY LPAREN NAME RPAREN | assignment end - | ASSERT_K LPAREN exp COMMA string RPAREN + | ASSERT LPAREN exp COMMA string RPAREN ; /* The '*' and '?' cases are there because the lexer returns them as @@ -275,7 +275,7 @@ statement: | input_section_spec | length LPAREN mustbe_exp RPAREN | FILL LPAREN fill_exp RPAREN - | ASSERT_K LPAREN exp COMMA NAME RPAREN end + | ASSERT LPAREN exp COMMA string RPAREN end | INCLUDE filename statement_list_opt END ; @@ -435,9 +435,9 @@ exp: DASH exp # expNegate // TODO: %prec UNARY | LPAREN exp RPAREN # expParen | NEXT LPAREN exp RPAREN # expNextParen // TODO: %prec UNARY - | EXLAMATION exp # expInvert // TODO: %prec UNARY + | EXLAMATION exp # expNot // TODO: %prec UNARY | PLUS exp # expPlus // TODO: %prec UNARY - | TILDE exp # expMinus // TODO: %prec UNARY + | TILDE exp # expInvert // TODO: %prec UNARY | exp STAR exp # expMul | exp SLASH exp # expDiv | exp MOD exp # expMod @@ -454,7 +454,7 @@ exp: | exp AMPERSAND exp # expAnd | exp HAT exp # expXor | exp BAR exp # expOr - | exp QUESTION exp COLON exp # expTrinary + | exp QUESTION exp COLON exp # expTernary | exp ANDAND exp # expAndand | exp OROR exp # expOror | DEFINED LPAREN NAME RPAREN # expDefined @@ -476,7 +476,7 @@ exp: | NAME # expName | MAX_K LPAREN exp COMMA exp RPAREN # expMax | MIN_K LPAREN exp COMMA exp RPAREN # expMin - | ASSERT_K LPAREN exp COMMA NAME RPAREN # expAssert + | ASSERT LPAREN exp COMMA string RPAREN # expAssert | ORIGIN LPAREN NAME RPAREN # expOrigin | LENGTH LPAREN NAME RPAREN # expLengthExp | LOG2CEIL LPAREN exp RPAREN # expLog2ceil @@ -678,10 +678,8 @@ vers_defns: | NAME | vers_defns SEMICOLON VERS_IDENTIFIER | vers_defns SEMICOLON NAME - | vers_defns SEMICOLON EXTERN NAME LBRACE - vers_defns opt_semicolon RBRACE - | EXTERN NAME LBRACE - vers_defns opt_semicolon RBRACE + | vers_defns SEMICOLON EXTERN NAME LBRACE vers_defns opt_semicolon RBRACE + | EXTERN NAME LBRACE vers_defns opt_semicolon RBRACE | GLOBAL | vers_defns SEMICOLON GLOBAL | LOCAL diff --git a/Ld.cpp b/Ld.cpp index 8bacaea..16c40fa 100644 --- a/Ld.cpp +++ b/Ld.cpp @@ -7,10 +7,9 @@ namespace trygvis { namespace elfinfo { using antlr4::ANTLRFileStream; +using antlr4::tree::ParseTree; using namespace std; -using ParseTree = antlr4::tree::ParseTree; - static MemoryAttribute valueOf(char c) { switch (c) { case 'r': @@ -31,7 +30,7 @@ static bool endsWith(const string &a, const string &b) { return b.length() <= a.length() && a.compare(a.length() - b.length(), b.length(), b) == 0; } -template +template class ParseTreeProperty { public: virtual V get(Ref node) { @@ -39,21 +38,27 @@ public: } virtual V get(ParseTree *const node) { - return _annotations.at(node); - } + if (!debug) { + return _annotations.at(node); + } -// virtual V get(ParseTree *const node) { -// try { -// cout << "node = " << node->getText() << endl; -// return _annotations.at(node); -// } catch (std::out_of_range &e) { -// cout << "out of range: " << node->getText() << endl; -// throw e; -// } -// } + try { +// cout << "node = " << node->getText() << endl; + return _annotations.at(node); + } catch (std::out_of_range &e) { + cout << "get(" << node << "), text=" << node->getText() << endl; + stringstream buf; + buf << "out of range: " << node << ", text=" << node->getText(); + auto msg = buf.str(); + cout << msg << endl; + throw LdInternalErrorException(msg); + } + } virtual void put(ParseTree *const node, V value) { - // cout << "put(" << node << ", " << value << ")" << endl; + if (debug) { + cout << "put(" << node << ", " << value << "), text: " << node->getText() << endl; + } _annotations[node] = value; } @@ -131,6 +136,10 @@ public: expr.put(ctx, 0); } + virtual void exitExpDefined(GnuLdParser::ExpDefinedContext *ctx) override { + expr.put(ctx, 0); + } + void exitExpInt(GnuLdParser::ExpIntContext *ctx) override { uint64_t i = parseInt(ctx->INT()->getText()); expr.put(ctx, i); @@ -150,6 +159,20 @@ public: expr.put(ctx, x); } + void exitExpMul(GnuLdParser::ExpMulContext *ctx) override { + uint64_t a = expr.get(ctx->exp(0)); + uint64_t b = expr.get(ctx->exp(1)); + uint64_t x = a * b; + expr.put(ctx, x); + } + + void exitExpDiv(GnuLdParser::ExpDivContext *ctx) override { + uint64_t a = expr.get(ctx->exp(0)); + uint64_t b = expr.get(ctx->exp(1)); + uint64_t x = b > 0 ? a / b : 0; + expr.put(ctx, x); + } + void exitExpName(GnuLdParser::ExpNameContext *ctx) override { expr.put(ctx, 0); } @@ -176,11 +199,23 @@ public: expr.put(ctx, expr.get(ctx->exp())); } + void exitExpParen(GnuLdParser::ExpParenContext *ctx) override { + expr.put(ctx, expr.get(ctx->exp())); + } + void exitExpLoadaddr(GnuLdParser::ExpLoadaddrContext *ctx) override { auto §ion = getSection(ctx->NAME()->getText()); expr.put(ctx, 0); } + void exitExpTernary(GnuLdParser::ExpTernaryContext *ctx) override { + uint64_t a = expr.get(ctx->exp(0)); + uint64_t b = expr.get(ctx->exp(1)); + uint64_t c = expr.get(ctx->exp(2)); + uint64_t x = a ? b : c; + expr.put(ctx, x); + } + void exitExpConstant(GnuLdParser::ExpConstantContext *ctx) override { expr.put(ctx, expr.get(ctx->NAME())); } @@ -229,7 +264,7 @@ public: void syntaxError(IRecognizer *recognizer, Token *offendingSymbol, size_t line, int charPositionInLine, const std::string &msg, std::exception_ptr e) override { - messages.push_back(msg); + messages.push_back("line " + to_string(line) + ":" + to_string(charPositionInLine) + ": " + msg); } }; @@ -266,6 +301,7 @@ LdScript LdScriptLoader::load(std::string path) { parser.addParseListener(&listener); LdErrorListener ldErrorListener; + parser.removeErrorListeners(); parser.addErrorListener(&ldErrorListener); auto file = parser.file(); diff --git a/includes/trygvis/elfinfo/Ld.h b/includes/trygvis/elfinfo/Ld.h index 09dcaec..03559a2 100644 --- a/includes/trygvis/elfinfo/Ld.h +++ b/includes/trygvis/elfinfo/Ld.h @@ -10,11 +10,17 @@ namespace elfinfo { class LdParseException : public std::runtime_error { public: - explicit LdParseException(const std::vector messages) : runtime_error("Parse error"), messages(messages) {} + explicit LdParseException(const std::vector &messages) : + runtime_error("Parse error"), messages(messages) {} const std::vector messages; }; +class LdInternalErrorException : public std::runtime_error { +public: + explicit LdInternalErrorException(const std::string &what) : runtime_error(what) {} +}; + enum class MemoryAttribute { R, W, X }; @@ -69,6 +75,7 @@ public: LdScript load(std::string path); void setDebug(bool debug); + private: bool debug_; }; -- cgit v1.2.3