EVENT_START = 1 EVENT_END = 2 EVENT_LPAREN = 3 EVENT_RPAREN = 4 EVENT_TEXT = 5 def state_to_str(s): if s == EVENT_START: return "EVENT_START" if s == EVENT_END: return "EVENT_END" if s == EVENT_LPAREN: return "EVENT_LPAREN" if s == EVENT_RPAREN: return "EVENT_RPAREN" if s == EVENT_TEXT: return "EVENT_TEXT" return "UNKNOWN STATE" def logging_parser(parser): for (event, token) in parser: if token: print("SEXPR: event={}, token={}".format(state_to_str(event), token or "")) else: print("SEXPR: event={}".format(state_to_str(event))) yield (event, token) def parse(path): class state(object): def __init__(self): self.f = open(path, "r")#, 128 * 1024) self.state = EVENT_START self.buffer = [] def text(self): return self._text def close(self): if self.f: f = self.f self.f = None f.close() def _read_c(self): # TODO: count lines and characters if self.buffer: c = self.buffer[0] del self.buffer[0] return c c = self.f.read(1) while c and c == '\n': c = self.f.read(1) return c def _unread(self, s): for c in s: self.buffer.append(c) def _read_token(self): s = "" while True: c = self._read_c() if not c: return s if s else None if c == " ": # Ignore leading blanks if not s: continue if c == "(" or c == ")": if not s: return c self._unread(c) return s.rstrip() if c == " ": return s if c == "\"": while c: c = self._read_c() if c == "\"": break s += c return s s += c return s.rstrip() def next(self): s = self.state self._text = None if s == EVENT_END: raise Exception("Invalid state, state=END") token = self._read_token() if token is None: self.state = EVENT_END self.close() return (EVENT_END, None) if token == "(": self.state = EVENT_LPAREN return (EVENT_LPAREN, None) if token == ")": self.state = EVENT_RPAREN return (EVENT_RPAREN, None) self._text = token self.state = EVENT_TEXT return (EVENT_TEXT, token) s = state() (event, token) = s.next() while event != EVENT_END: yield (event, token) (event, token) = s.next()