aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTrygve Laugstøl <trygvis@inamo.no>2016-07-17 18:56:29 +0200
committerTrygve Laugstøl <trygvis@inamo.no>2016-07-17 18:56:29 +0200
commitea58b3887a7deba05e3fc85eb536038a547f8871 (patch)
treeb686e8985eb420f343636656711bfc24eab12613
parentc543a2f713184ccf6a5e9077dbfb985bdc7c5b4b (diff)
downloadelfinfo-ea58b3887a7deba05e3fc85eb536038a547f8871.tar.gz
elfinfo-ea58b3887a7deba05e3fc85eb536038a547f8871.tar.bz2
elfinfo-ea58b3887a7deba05e3fc85eb536038a547f8871.tar.xz
elfinfo-ea58b3887a7deba05e3fc85eb536038a547f8871.zip
lexer/parser:
o Renaming the ASSERT_K token to ASSERT. o Fixing a bug in the NAME regex where underscored where not allowed in certain names. o All ASSERT statements use string instead of NAME. o Improving STRING_ANY, allow many non-quote tokens instead of just one. Can probably replace the entire string rule with STRING_ANY now. o Fixing naming of '!', "~" and "?" operators. Ld: o Implement support for multiplication and division operators. o Better error messages.
-rw-r--r--GnuLdLexer.g418
-rw-r--r--GnuLdParser.g428
-rw-r--r--Ld.cpp68
-rw-r--r--includes/trygvis/elfinfo/Ld.h9
4 files changed, 76 insertions, 47 deletions
diff --git a/GnuLdLexer.g4 b/GnuLdLexer.g4
index 10475d8..4b4b202 100644
--- a/GnuLdLexer.g4
+++ b/GnuLdLexer.g4
@@ -44,7 +44,7 @@ ALIGNMOD : 'ALIGNMOD';
ALIGNOF : 'ALIGNOF';
ALIGN_WITH_INPUT : 'ALIGN_WITH_INPUT';
AS_NEEDED : 'AS_NEEDED';
-ASSERT_K : 'ASSERT';
+ASSERT : 'ASSERT';
AT : 'AT';
BASE : 'BASE';
BEFORE : 'BEFORE';
@@ -146,21 +146,9 @@ VERS_IDENTIFIER : 'VERS_IDENTIFIER';
VERSIONK : 'VERSIONK';
VERS_TAG : 'VERS_TAG';
-/*
-Names are very liberal, they can be full strings and start with a dot.
-*/
-
QUOTE : '"' -> skip, pushMode(STRING);
-//SPACE : ' ';
-//SPACES : ' '+;
-
-//name :
-// '"' (NAME | SPACE | SPACES)+ '"' # nameQuoted
-// | NAME # namePlain;
-
-//NAME : [\._a-zA-Z][\._a-zA-Z0-9]*;
-NAME : [*\._a-zA-Z][*-\.\/_a-zA-Z0-9]* | '/DISCARD/';
+NAME : [*\._a-zA-Z][*\-\.\/_a-zA-Z0-9]* | '/DISCARD/';
// TODO: ld supports some really fancy expressions here, like "0101010b", "ffH", "ffx", "$Aa" etc
//INT : '0x' [0-9a-fA-F]+
@@ -228,5 +216,5 @@ WS
;
mode STRING;
-STRING_ANY : ~'"';
+STRING_ANY : ~'"'+;
STRING_END_QUOTE : '"' -> skip, popMode;
diff --git a/GnuLdParser.g4 b/GnuLdParser.g4
index 0d2bf4b..974eba2 100644
--- a/GnuLdParser.g4
+++ b/GnuLdParser.g4
@@ -122,13 +122,13 @@ casesymlist:
/* Parsed as expressions so that commas separate entries */
extern_name_list:
- extern_name_list_body
+ extern_name_list_body
;
extern_name_list_body:
- NAME
- | extern_name_list_body NAME
- | extern_name_list_body COMMA NAME
+ NAME
+ | extern_name_list_body NAME
+ | extern_name_list_body COMMA NAME
;
script_file:
@@ -151,7 +151,7 @@ ifile_p1:
| floating_point_support
| statement_anywhere
| version
- | SEMICOLON
+ | SEMICOLON
| TARGET_K LPAREN NAME RPAREN
| SEARCH_DIR LPAREN filename RPAREN
| OUTPUT LPAREN filename RPAREN
@@ -202,7 +202,7 @@ sec_or_group_p1:
statement_anywhere:
ENTRY LPAREN NAME RPAREN
| assignment end
- | ASSERT_K LPAREN exp COMMA string RPAREN
+ | ASSERT LPAREN exp COMMA string RPAREN
;
/* The '*' and '?' cases are there because the lexer returns them as
@@ -275,7 +275,7 @@ statement:
| input_section_spec
| length LPAREN mustbe_exp RPAREN
| FILL LPAREN fill_exp RPAREN
- | ASSERT_K LPAREN exp COMMA NAME RPAREN end
+ | ASSERT LPAREN exp COMMA string RPAREN end
| INCLUDE filename statement_list_opt END
;
@@ -435,9 +435,9 @@ exp:
DASH exp # expNegate // TODO: %prec UNARY
| LPAREN exp RPAREN # expParen
| NEXT LPAREN exp RPAREN # expNextParen // TODO: %prec UNARY
- | EXLAMATION exp # expInvert // TODO: %prec UNARY
+ | EXLAMATION exp # expNot // TODO: %prec UNARY
| PLUS exp # expPlus // TODO: %prec UNARY
- | TILDE exp # expMinus // TODO: %prec UNARY
+ | TILDE exp # expInvert // TODO: %prec UNARY
| exp STAR exp # expMul
| exp SLASH exp # expDiv
| exp MOD exp # expMod
@@ -454,7 +454,7 @@ exp:
| exp AMPERSAND exp # expAnd
| exp HAT exp # expXor
| exp BAR exp # expOr
- | exp QUESTION exp COLON exp # expTrinary
+ | exp QUESTION exp COLON exp # expTernary
| exp ANDAND exp # expAndand
| exp OROR exp # expOror
| DEFINED LPAREN NAME RPAREN # expDefined
@@ -476,7 +476,7 @@ exp:
| NAME # expName
| MAX_K LPAREN exp COMMA exp RPAREN # expMax
| MIN_K LPAREN exp COMMA exp RPAREN # expMin
- | ASSERT_K LPAREN exp COMMA NAME RPAREN # expAssert
+ | ASSERT LPAREN exp COMMA string RPAREN # expAssert
| ORIGIN LPAREN NAME RPAREN # expOrigin
| LENGTH LPAREN NAME RPAREN # expLengthExp
| LOG2CEIL LPAREN exp RPAREN # expLog2ceil
@@ -678,10 +678,8 @@ vers_defns:
| NAME
| vers_defns SEMICOLON VERS_IDENTIFIER
| vers_defns SEMICOLON NAME
- | vers_defns SEMICOLON EXTERN NAME LBRACE
- vers_defns opt_semicolon RBRACE
- | EXTERN NAME LBRACE
- vers_defns opt_semicolon RBRACE
+ | vers_defns SEMICOLON EXTERN NAME LBRACE vers_defns opt_semicolon RBRACE
+ | EXTERN NAME LBRACE vers_defns opt_semicolon RBRACE
| GLOBAL
| vers_defns SEMICOLON GLOBAL
| LOCAL
diff --git a/Ld.cpp b/Ld.cpp
index 8bacaea..16c40fa 100644
--- a/Ld.cpp
+++ b/Ld.cpp
@@ -7,10 +7,9 @@ namespace trygvis {
namespace elfinfo {
using antlr4::ANTLRFileStream;
+using antlr4::tree::ParseTree;
using namespace std;
-using ParseTree = antlr4::tree::ParseTree;
-
static MemoryAttribute valueOf(char c) {
switch (c) {
case 'r':
@@ -31,7 +30,7 @@ static bool endsWith(const string &a, const string &b) {
return b.length() <= a.length() && a.compare(a.length() - b.length(), b.length(), b) == 0;
}
-template<typename V>
+template<typename V, bool debug = false>
class ParseTreeProperty {
public:
virtual V get(Ref<ParseTree> node) {
@@ -39,21 +38,27 @@ public:
}
virtual V get(ParseTree *const node) {
- return _annotations.at(node);
- }
+ if (!debug) {
+ return _annotations.at(node);
+ }
-// virtual V get(ParseTree *const node) {
-// try {
-// cout << "node = " << node->getText() << endl;
-// return _annotations.at(node);
-// } catch (std::out_of_range &e) {
-// cout << "out of range: " << node->getText() << endl;
-// throw e;
-// }
-// }
+ try {
+// cout << "node = " << node->getText() << endl;
+ return _annotations.at(node);
+ } catch (std::out_of_range &e) {
+ cout << "get(" << node << "), text=" << node->getText() << endl;
+ stringstream buf;
+ buf << "out of range: " << node << ", text=" << node->getText();
+ auto msg = buf.str();
+ cout << msg << endl;
+ throw LdInternalErrorException(msg);
+ }
+ }
virtual void put(ParseTree *const node, V value) {
- // cout << "put(" << node << ", " << value << ")" << endl;
+ if (debug) {
+ cout << "put(" << node << ", " << value << "), text: " << node->getText() << endl;
+ }
_annotations[node] = value;
}
@@ -131,6 +136,10 @@ public:
expr.put(ctx, 0);
}
+ virtual void exitExpDefined(GnuLdParser::ExpDefinedContext *ctx) override {
+ expr.put(ctx, 0);
+ }
+
void exitExpInt(GnuLdParser::ExpIntContext *ctx) override {
uint64_t i = parseInt(ctx->INT()->getText());
expr.put(ctx, i);
@@ -150,6 +159,20 @@ public:
expr.put(ctx, x);
}
+ void exitExpMul(GnuLdParser::ExpMulContext *ctx) override {
+ uint64_t a = expr.get(ctx->exp(0));
+ uint64_t b = expr.get(ctx->exp(1));
+ uint64_t x = a * b;
+ expr.put(ctx, x);
+ }
+
+ void exitExpDiv(GnuLdParser::ExpDivContext *ctx) override {
+ uint64_t a = expr.get(ctx->exp(0));
+ uint64_t b = expr.get(ctx->exp(1));
+ uint64_t x = b > 0 ? a / b : 0;
+ expr.put(ctx, x);
+ }
+
void exitExpName(GnuLdParser::ExpNameContext *ctx) override {
expr.put(ctx, 0);
}
@@ -176,11 +199,23 @@ public:
expr.put(ctx, expr.get(ctx->exp()));
}
+ void exitExpParen(GnuLdParser::ExpParenContext *ctx) override {
+ expr.put(ctx, expr.get(ctx->exp()));
+ }
+
void exitExpLoadaddr(GnuLdParser::ExpLoadaddrContext *ctx) override {
auto &section = getSection(ctx->NAME()->getText());
expr.put(ctx, 0);
}
+ void exitExpTernary(GnuLdParser::ExpTernaryContext *ctx) override {
+ uint64_t a = expr.get(ctx->exp(0));
+ uint64_t b = expr.get(ctx->exp(1));
+ uint64_t c = expr.get(ctx->exp(2));
+ uint64_t x = a ? b : c;
+ expr.put(ctx, x);
+ }
+
void exitExpConstant(GnuLdParser::ExpConstantContext *ctx) override {
expr.put(ctx, expr.get(ctx->NAME()));
}
@@ -229,7 +264,7 @@ public:
void syntaxError(IRecognizer *recognizer, Token *offendingSymbol, size_t line, int charPositionInLine,
const std::string &msg, std::exception_ptr e) override {
- messages.push_back(msg);
+ messages.push_back("line " + to_string(line) + ":" + to_string(charPositionInLine) + ": " + msg);
}
};
@@ -266,6 +301,7 @@ LdScript LdScriptLoader::load(std::string path) {
parser.addParseListener(&listener);
LdErrorListener ldErrorListener;
+ parser.removeErrorListeners();
parser.addErrorListener(&ldErrorListener);
auto file = parser.file();
diff --git a/includes/trygvis/elfinfo/Ld.h b/includes/trygvis/elfinfo/Ld.h
index 09dcaec..03559a2 100644
--- a/includes/trygvis/elfinfo/Ld.h
+++ b/includes/trygvis/elfinfo/Ld.h
@@ -10,11 +10,17 @@ namespace elfinfo {
class LdParseException : public std::runtime_error {
public:
- explicit LdParseException(const std::vector<std::string> messages) : runtime_error("Parse error"), messages(messages) {}
+ explicit LdParseException(const std::vector<std::string> &messages) :
+ runtime_error("Parse error"), messages(messages) {}
const std::vector<std::string> messages;
};
+class LdInternalErrorException : public std::runtime_error {
+public:
+ explicit LdInternalErrorException(const std::string &what) : runtime_error(what) {}
+};
+
enum class MemoryAttribute {
R, W, X
};
@@ -69,6 +75,7 @@ public:
LdScript load(std::string path);
void setDebug(bool debug);
+
private:
bool debug_;
};