src/lisp/lexer.cpp

   1 //  SuperTux
   2 //  Copyright (C) 2006 Matthias Braun <matze@braunis.de>
   3 //
   4 //  This program is free software: you can redistribute it and/or modify
   5 //  it under the terms of the GNU General Public License as published by
   6 //  the Free Software Foundation, either version 3 of the License, or
   7 //  (at your option) any later version.
   8 //
   9 //  This program is distributed in the hope that it will be useful,
  10 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 //  GNU General Public License for more details.
  13 //
  14 //  You should have received a copy of the GNU General Public License
  15 //  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 #include "lisp/lexer.hpp"
  18
  19 #include <string.h>
  20 #include <sstream>
  21 #include <stdexcept>
  22 #include <stdio.h>
  23
  24 namespace lisp {
  25
  26 Lexer::Lexer(std::istream& newstream) :
  27   stream(newstream),
  28   eof(false),
  29   linenumber(0),
  30   bufend(),
  31   bufpos(),
  32   c(),
  33   token_string(),
  34   token_length()
  35 {
  36   // trigger a refill of the buffer
  37   bufpos = NULL;
  38   bufend = NULL;
  39   nextChar();
  40 }
  41
  42 Lexer::~Lexer()
  43 {
  44 }
  45
  46 void
  47 Lexer::nextChar()
  48 {
  49   if(bufpos >= bufend || (bufpos == NULL && bufend == NULL) /* Initial refill trigger */) {
  50     if(eof) {
  51       c = EOF;
  52       return;
  53     }
  54     stream.read(buffer, BUFFER_SIZE);
  55     size_t bytes_read = stream.gcount();
  56
  57     bufpos = buffer;
  58     bufend = buffer + bytes_read;
  59
  60     // the following is a hack that appends an additional ' ' at the end of
  61     // the file to avoid problems when parsing symbols/elements and a sudden
  62     // EOF. This is faster than relying on unget and IMO also nicer.
  63     if(bytes_read == 0 || stream.eof()) {
  64       eof = true;
  65       *bufend = ' ';
  66       ++bufend;
  67     }
  68   }
  69
  70   if(bufpos == NULL)
  71     return;
  72
  73   c = *bufpos++;
  74   if(c == '\n')
  75     ++linenumber;
  76 }
  77
  78 void
  79 Lexer::addChar()
  80 {
  81   if(token_length < MAX_TOKEN_LENGTH)
  82     token_string[token_length++] = c;
  83   nextChar();
  84 }
  85
  86 Lexer::TokenType
  87 Lexer::getNextToken()
  88 {
  89   static const char* delims = "\"();";
  90
  91   while(isspace(c)) {
  92     nextChar();
  93   }
  94
  95   token_length = 0;
  96
  97   switch(c) {
  98     case ';': // comment
  99       while(c != '\n') {
 100         nextChar();
 101       }
 102       return getNextToken(); // and again
 103     case '(':
 104       nextChar();
 105       return TOKEN_OPEN_PAREN;
 106     case ')':
 107       nextChar();
 108       return TOKEN_CLOSE_PAREN;
 109     case '"': {  // string
 110       int startline = linenumber;
 111       while(1) {
 112         nextChar();
 113         switch(c) {
 114           case '"':
 115             nextChar();
 116             goto string_finished;
 117           case '\r':
 118             continue;
 119           case '\n':
 120             break;
 121           case '\\':
 122             nextChar();
 123             switch(c) {
 124               case 'n':
 125                 c = '\n';
 126                 break;
 127               case 't':
 128                 c = '\t';
 129                 break;
 130             }
 131             break;
 132           case EOF: {
 133             std::stringstream msg;
 134             msg << "Parse error in line " << startline << ": "
 135                 << "EOF while parsing string.";
 136             throw std::runtime_error(msg.str());
 137           }
 138           default:
 139             break;
 140         }
 141         if(token_length < MAX_TOKEN_LENGTH)
 142           token_string[token_length++] = c;
 143       }
 144       string_finished:
 145       token_string[token_length] = 0;
 146       return TOKEN_STRING;
 147     }
 148     case '#': // constant
 149       nextChar();
 150
 151       while(isalnum(c) || c == '_') {
 152         addChar();
 153       }
 154       token_string[token_length] = 0;
 155
 156       if(strcmp(token_string, "t") == 0)
 157         return TOKEN_TRUE;
 158       if(strcmp(token_string, "f") == 0)
 159         return TOKEN_FALSE;
 160
 161       // we only handle #t and #f constants at the moment...
 162       {
 163         std::stringstream msg;
 164         msg << "Parse Error in line " << linenumber << ": "
 165             << "Unknown constant '" << token_string << "'.";
 166         throw std::runtime_error(msg.str());
 167       }
 168
 169     case EOF:
 170       return TOKEN_EOF;
 171
 172     default:
 173       if(isdigit(c) || c == '-') {
 174         bool have_nondigits = false;
 175         bool have_digits = false;
 176         int have_floating_point = 0;
 177
 178         do {
 179           if(isdigit(c))
 180             have_digits = true;
 181           else if(c == '.')
 182             ++have_floating_point;
 183           else if(isalnum(c) || c == '_')
 184             have_nondigits = true;
 185
 186           addChar();
 187         } while(!isspace(c) && !strchr(delims, c));
 188
 189         token_string[token_length] = 0;
 190
 191         // no nextChar
 192
 193         if(have_nondigits || !have_digits || have_floating_point > 1)
 194           return TOKEN_SYMBOL;
 195         else if(have_floating_point == 1)
 196           return TOKEN_REAL;
 197         else
 198           return TOKEN_INTEGER;
 199       } else {
 200         do {
 201           addChar();
 202         } while(!isspace(c) && !strchr(delims, c));
 203         token_string[token_length] = 0;
 204
 205         // no nextChar
 206
 207         return TOKEN_SYMBOL;
 208       }
 209   }
 210 }
 211
 212 } // end of namespace lisp
 213
 214 /* EOF */