src/lisp/lexer.cpp

   1 //  SuperTux
   2 //  Copyright (C) 2006 Matthias Braun <matze@braunis.de>
   3 //
   4 //  This program is free software: you can redistribute it and/or modify
   5 //  it under the terms of the GNU General Public License as published by
   6 //  the Free Software Foundation, either version 3 of the License, or
   7 //  (at your option) any later version.
   8 //
   9 //  This program is distributed in the hope that it will be useful,
  10 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 //  GNU General Public License for more details.
  13 //
  14 //  You should have received a copy of the GNU General Public License
  15 //  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 #include "lisp/lexer.hpp"
  18
  19 #include <string.h>
  20 #include <sstream>
  21 #include <stdexcept>
  22 #include <stdio.h>
  23
  24 namespace lisp {
  25
  26 Lexer::Lexer(std::istream& newstream) :
  27   stream(newstream),
  28   eof(false),
  29   linenumber(0),
  30   bufend(),
  31   bufpos(),
  32   c(),
  33   token_length()
  34 {
  35   // trigger a refill of the buffer
  36   bufpos = NULL;
  37   bufend = NULL;
  38   nextChar();
  39 }
  40
  41 Lexer::~Lexer()
  42 {
  43 }
  44
  45 void
  46 Lexer::nextChar()
  47 {
  48   if(bufpos >= bufend) {
  49     if(eof) {
  50       c = EOF;
  51       return;
  52     }
  53     stream.read(buffer, BUFFER_SIZE);
  54     size_t bytes_read = stream.gcount();
  55
  56     bufpos = buffer;
  57     bufend = buffer + bytes_read;
  58
  59     // the following is a hack that appends an additional ' ' at the end of
  60     // the file to avoid problems when parsing symbols/elements and a sudden
  61     // EOF. This is faster than relying on unget and IMO also nicer.
  62     if(bytes_read == 0 || stream.eof()) {
  63       eof = true;
  64       *bufend = ' ';
  65       ++bufend;
  66     }
  67   }
  68   c = *bufpos++;
  69   if(c == '\n')
  70     ++linenumber;
  71 }
  72
  73 void
  74 Lexer::addChar()
  75 {
  76   if(token_length < MAX_TOKEN_LENGTH)
  77     token_string[token_length++] = c;
  78   nextChar();
  79 }
  80
  81 Lexer::TokenType
  82 Lexer::getNextToken()
  83 {
  84   static const char* delims = "\"();";
  85
  86   while(isspace(c)) {
  87     nextChar();
  88   }
  89
  90   token_length = 0;
  91
  92   switch(c) {
  93     case ';': // comment
  94       while(c != '\n') {
  95         nextChar();
  96       }
  97       return getNextToken(); // and again
  98     case '(':
  99       nextChar();
 100       return TOKEN_OPEN_PAREN;
 101     case ')':
 102       nextChar();
 103       return TOKEN_CLOSE_PAREN;
 104     case '"': {  // string
 105       int startline = linenumber;
 106       while(1) {
 107         nextChar();
 108         switch(c) {
 109           case '"':
 110             nextChar();
 111             goto string_finished;
 112           case '\r':
 113             continue;
 114           case '\n':
 115             break;
 116           case '\\':
 117             nextChar();
 118             switch(c) {
 119               case 'n':
 120                 c = '\n';
 121                 break;
 122               case 't':
 123                 c = '\t';
 124                 break;
 125             }
 126             break;
 127           case EOF: {
 128             std::stringstream msg;
 129             msg << "Parse error in line " << startline << ": "
 130                 << "EOF while parsing string.";
 131             throw std::runtime_error(msg.str());
 132           }
 133           default:
 134             break;
 135         }
 136         if(token_length < MAX_TOKEN_LENGTH)
 137           token_string[token_length++] = c;
 138       }
 139       string_finished:
 140       token_string[token_length] = 0;
 141       return TOKEN_STRING;
 142     }
 143     case '#': // constant
 144       nextChar();
 145
 146       while(isalnum(c) || c == '_') {
 147         addChar();
 148       }
 149       token_string[token_length] = 0;
 150
 151       if(strcmp(token_string, "t") == 0)
 152         return TOKEN_TRUE;
 153       if(strcmp(token_string, "f") == 0)
 154         return TOKEN_FALSE;
 155
 156       // we only handle #t and #f constants at the moment...
 157       {
 158         std::stringstream msg;
 159         msg << "Parse Error in line " << linenumber << ": "
 160             << "Unknown constant '" << token_string << "'.";
 161         throw std::runtime_error(msg.str());
 162       }
 163
 164     case EOF:
 165       return TOKEN_EOF;
 166
 167     default:
 168       if(isdigit(c) || c == '-') {
 169         bool have_nondigits = false;
 170         bool have_digits = false;
 171         int have_floating_point = 0;
 172
 173         do {
 174           if(isdigit(c))
 175             have_digits = true;
 176           else if(c == '.')
 177             ++have_floating_point;
 178           else if(isalnum(c) || c == '_')
 179             have_nondigits = true;
 180
 181           addChar();
 182         } while(!isspace(c) && !strchr(delims, c));
 183
 184         token_string[token_length] = 0;
 185
 186         // no nextChar
 187
 188         if(have_nondigits || !have_digits || have_floating_point > 1)
 189           return TOKEN_SYMBOL;
 190         else if(have_floating_point == 1)
 191           return TOKEN_REAL;
 192         else
 193           return TOKEN_INTEGER;
 194       } else {
 195         do {
 196           addChar();
 197         } while(!isspace(c) && !strchr(delims, c));
 198         token_string[token_length] = 0;
 199
 200         // no nextChar
 201
 202         return TOKEN_SYMBOL;
 203       }
 204   }
 205 }
 206
 207 } // end of namespace lisp
 208
 209 /* EOF */