don't abuse exceptiosns for indicating EOF

author Matthias Braun <matze@braunis.de>

Sun, 18 May 2008 12:18:12 +0000 (12:18 +0000)

committer Matthias Braun <matze@braunis.de>

Sun, 18 May 2008 12:18:12 +0000 (12:18 +0000)
author Matthias Braun <matze@braunis.de>
Sun, 18 May 2008 12:18:12 +0000 (12:18 +0000)
committer Matthias Braun <matze@braunis.de>
Sun, 18 May 2008 12:18:12 +0000 (12:18 +0000)
diff --git a/src/lisp/lexer.cpp b/src/lisp/lexer.cpp

index 05b52bc..e1a4220 100644 (file)
--- a/src/lisp/lexer.cpp
+++ b/src/lisp/lexer.cpp
@@ -28,20 +28,13 @@
  namespace lisp
  {
  
-class EOFException
-{
-};
-
  Lexer::Lexer(std::istream& newstream)
      : stream(newstream), eof(false), linenumber(0)
  {
-  try {
-    // trigger a refill of the buffer
-    c = 0;
-    bufend = 0;
-    nextChar();
-  } catch(EOFException& ) {
-  }
+  // trigger a refill of the buffer
+  bufpos = NULL;
+  bufend = NULL;
+  nextChar();
  }
  
  Lexer::~Lexer()
@@ -51,14 +44,15 @@ Lexer::~Lexer()
  void
  Lexer::nextChar()
  {
-  ++c;
-  if(c >= bufend) {
-    if(eof)
-      throw EOFException();
+  if(bufpos >= bufend) {
+    if(eof) {
+      c = EOF;
+      return;
+    }
      stream.read(buffer, BUFFER_SIZE);
      size_t bytes_read = stream.gcount();
  
-    c = buffer;
+    bufpos = buffer;
      bufend = buffer + bytes_read;
  
      // the following is a hack that appends an additional ' ' at the end of
@@ -70,6 +64,7 @@ Lexer::nextChar()
        ++bufend;
      }
    }
+  c = *bufpos++;
  }
  
  Lexer::TokenType
@@ -77,142 +72,138 @@ Lexer::getNextToken()
  {
    static const char* delims = "\"();";
  
-  try {
-    while(isspace(*c)) {
-      if(*c == '\n')
-        ++linenumber;
-      nextChar();
-    };
+  while(isspace(c)) {
+    if(c == '\n')
+      ++linenumber;
+    nextChar();
+  };
  
-    token_length = 0;
+  token_length = 0;
  
-    switch(*c) {
-      case ';': // comment
-        while(true) {
-          nextChar();
-          if(*c == '\n') {
-            ++linenumber;
-            break;
-          }
-        }
-        return getNextToken(); // and again
-      case '(':
+  switch(c) {
+    case ';': // comment
+      while(true) {
          nextChar();
-        return TOKEN_OPEN_PAREN;
-      case ')':
+        if(c == '\n') {
+          ++linenumber;
+          break;
+        }
+      }
+      return getNextToken(); // and again
+    case '(':
+      nextChar();
+      return TOKEN_OPEN_PAREN;
+    case ')':
+      nextChar();
+      return TOKEN_CLOSE_PAREN;
+    case '"': {  // string
+      int startline = linenumber;
+      while(1) {
          nextChar();
-        return TOKEN_CLOSE_PAREN;
-      case '"': {  // string
-        int startline = linenumber;
-        try {
-          while(1) {
-            nextChar();
-            if(*c == '"')
-              break;
-            else if (*c == '\r') // XXX this breaks with pure \r EOL
-              continue;
-            else if(*c == '\n')
-              linenumber++;
-            else if(*c == '\\') {
-              nextChar();
-              switch(*c) {
-                case 'n':
-                  *c = '\n';
-                  break;
-                case 't':
-                  *c = '\t';
-                  break;
-              }
-            }
-            if(token_length < MAX_TOKEN_LENGTH)
-              token_string[token_length++] = *c;
+        switch(c) {
+        case '"':
+          nextChar();
+          goto string_finished;
+        case '\r':
+          continue;
+        case '\n':
+          linenumber++;
+          break;
+        case '\\':
+          nextChar();
+          switch(c) {
+          case 'n':
+            c = '\n';
+            break;
+          case 't':
+            c = '\t';
+            break;
            }
-          token_string[token_length] = 0;
-        } catch(EOFException& ) {
+          break;
+        case EOF: {
            std::stringstream msg;
            msg << "Parse error in line " << startline << ": "
                << "EOF while parsing string.";
            throw std::runtime_error(msg.str());
          }
+        default:
+          break;
+        }
+        if(token_length < MAX_TOKEN_LENGTH)
+          token_string[token_length++] = c;
+      }
+string_finished:
+      token_string[token_length] = 0;
+      return TOKEN_STRING;
+    }
+    case '#': // constant
+      nextChar();
+
+      while(isalnum(c) || c == '_') {
+        if(token_length < MAX_TOKEN_LENGTH)
+          token_string[token_length++] = c;
          nextChar();
-        return TOKEN_STRING;
        }
-      case '#': // constant
-        try {
-          nextChar();
+      token_string[token_length] = 0;
  
-          while(isalnum(*c) || *c == '_') {
-            if(token_length < MAX_TOKEN_LENGTH)
-              token_string[token_length++] = *c;
-            nextChar();
-          }
-          token_string[token_length] = 0;
-        } catch(EOFException& ) {
-          std::stringstream msg;
-          msg << "Parse Error in line " << linenumber << ": "
-            << "EOF while parsing constant.";
-          throw std::runtime_error(msg.str());
-        }
+      if(strcmp(token_string, "t") == 0)
+        return TOKEN_TRUE;
+      if(strcmp(token_string, "f") == 0)
+        return TOKEN_FALSE;
  
-        if(strcmp(token_string, "t") == 0)
-          return TOKEN_TRUE;
-        if(strcmp(token_string, "f") == 0)
-          return TOKEN_FALSE;
+      // we only handle #t and #f constants at the moment...
+      {
+        std::stringstream msg;
+        msg << "Parse Error in line " << linenumber << ": "
+            << "Unknown constant '" << token_string << "'.";
+        throw std::runtime_error(msg.str());
+      }
  
-        // we only handle #t and #f constants at the moment...
+    case EOF:
+      return TOKEN_EOF;
  
-        {
-          std::stringstream msg;
-          msg << "Parse Error in line " << linenumber << ": "
-            << "Unknown constant '" << token_string << "'.";
-          throw std::runtime_error(msg.str());
-        }
+    default:
+      if(isdigit(c) || c == '-') {
+        bool have_nondigits = false;
+        bool have_digits = false;
+        int have_floating_point = 0;
  
-      default:
-        if(isdigit(*c) || *c == '-') {
-          bool have_nondigits = false;
-          bool have_digits = false;
-          int have_floating_point = 0;
-
-          do {
-            if(isdigit(*c))
-              have_digits = true;
-            else if(*c == '.')
-              ++have_floating_point;
-            else if(isalnum(*c) || *c == '_')
-              have_nondigits = true;
-
-            if(token_length < MAX_TOKEN_LENGTH)
-              token_string[token_length++] = *c;
-
-            nextChar();
-          } while(!isspace(*c) && !strchr(delims, *c));
-
-          token_string[token_length] = 0;
-
-          // no nextChar
-
-          if(have_nondigits || !have_digits || have_floating_point > 1)
-            return TOKEN_SYMBOL;
-          else if(have_floating_point == 1)
-            return TOKEN_REAL;
-          else
-            return TOKEN_INTEGER;
-        } else {
-          do {
-            if(token_length < MAX_TOKEN_LENGTH)
-              token_string[token_length++] = *c;
-            nextChar();
-          } while(!isspace(*c) && !strchr(delims, *c));
-          token_string[token_length] = 0;
-
-          // no nextChar
+        do {
+          if(isdigit(c))
+            have_digits = true;
+          else if(c == '.')
+            ++have_floating_point;
+          else if(isalnum(c) || c == '_')
+            have_nondigits = true;
  
+          if(token_length < MAX_TOKEN_LENGTH)
+            token_string[token_length++] = c;
+
+          nextChar();
+        } while(!isspace(c) && !strchr(delims, c));
+
+        token_string[token_length] = 0;
+
+        // no nextChar
+
+        if(have_nondigits || !have_digits || have_floating_point > 1)
            return TOKEN_SYMBOL;
-        }
-    }
-  } catch(EOFException& ) {
-    return TOKEN_EOF;
+        else if(have_floating_point == 1)
+          return TOKEN_REAL;
+        else
+          return TOKEN_INTEGER;
+      } else {
+        do {
+          if(token_length < MAX_TOKEN_LENGTH)
+            token_string[token_length++] = c;
+          nextChar();
+        } while(!isspace(c) && !strchr(delims, c));
+        token_string[token_length] = 0;
+
+        // no nextChar
+
+        return TOKEN_SYMBOL;
+      }
    }
  }
  
diff --git a/src/lisp/lexer.hpp b/src/lisp/lexer.hpp

index 1cd062b..5c5f2d5 100644 (file)
--- a/src/lisp/lexer.hpp
+++ b/src/lisp/lexer.hpp
@@ -59,7 +59,8 @@ private:
    int linenumber;
    char buffer[BUFFER_SIZE+1];
    char* bufend;
-  char* c;
+  char* bufpos;
+  int  c;
    char token_string[MAX_TOKEN_LENGTH + 1];
    int token_length;
  };
author	Matthias Braun <matze@braunis.de>
	Sun, 18 May 2008 12:18:12 +0000 (12:18 +0000)
committer	Matthias Braun <matze@braunis.de>
	Sun, 18 May 2008 12:18:12 +0000 (12:18 +0000)
src/lisp/lexer.cpp		patch \| blob \| history
src/lisp/lexer.hpp		patch \| blob \| history