src/lisp/lexer.cpp

   1 //  $Id$
   2 //
   3 //  SuperTux
   4 //  Copyright (C) 2006 Matthias Braun <matze@braunis.de>
   5 //
   6 //  This program is free software; you can redistribute it and/or
   7 //  modify it under the terms of the GNU General Public License
   8 //  as published by the Free Software Foundation; either version 2
   9 //  of the License, or (at your option) any later version.
  10 //
  11 //  This program is distributed in the hope that it will be useful,
  12 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 //  GNU General Public License for more details.
  15 //
  16 //  You should have received a copy of the GNU General Public License
  17 //  along with this program; if not, write to the Free Software
  18 //  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19 #include <config.h>
  20
  21 #include <sstream>
  22 #include <cstring>
  23 #include <stdexcept>
  24 #include <iostream>
  25
  26 #include "lexer.hpp"
  27
  28 namespace lisp
  29 {
  30
  31 Lexer::Lexer(std::istream& newstream)
  32     : stream(newstream), eof(false), linenumber(0)
  33 {
  34   // trigger a refill of the buffer
  35   bufpos = NULL;
  36   bufend = NULL;
  37   nextChar();
  38 }
  39
  40 Lexer::~Lexer()
  41 {
  42 }
  43
  44 void
  45 Lexer::nextChar()
  46 {
  47   if(bufpos >= bufend) {
  48     if(eof) {
  49       c = EOF;
  50       return;
  51     }
  52     stream.read(buffer, BUFFER_SIZE);
  53     size_t bytes_read = stream.gcount();
  54
  55     bufpos = buffer;
  56     bufend = buffer + bytes_read;
  57
  58     // the following is a hack that appends an additional ' ' at the end of
  59     // the file to avoid problems when parsing symbols/elements and a sudden
  60     // EOF. This is faster than relying on unget and IMO also nicer.
  61     if(bytes_read == 0 || stream.eof()) {
  62       eof = true;
  63       *bufend = ' ';
  64       ++bufend;
  65     }
  66   }
  67   c = *bufpos++;
  68 }
  69
  70 Lexer::TokenType
  71 Lexer::getNextToken()
  72 {
  73   static const char* delims = "\"();";
  74
  75   while(isspace(c)) {
  76     if(c == '\n')
  77       ++linenumber;
  78     nextChar();
  79   };
  80
  81   token_length = 0;
  82
  83   switch(c) {
  84     case ';': // comment
  85       while(true) {
  86         nextChar();
  87         if(c == '\n') {
  88           ++linenumber;
  89           break;
  90         }
  91       }
  92       return getNextToken(); // and again
  93     case '(':
  94       nextChar();
  95       return TOKEN_OPEN_PAREN;
  96     case ')':
  97       nextChar();
  98       return TOKEN_CLOSE_PAREN;
  99     case '"': {  // string
 100       int startline = linenumber;
 101       while(1) {
 102         nextChar();
 103         switch(c) {
 104         case '"':
 105           nextChar();
 106           goto string_finished;
 107         case '\r':
 108           continue;
 109         case '\n':
 110           linenumber++;
 111           break;
 112         case '\\':
 113           nextChar();
 114           switch(c) {
 115           case 'n':
 116             c = '\n';
 117             break;
 118           case 't':
 119             c = '\t';
 120             break;
 121           }
 122           break;
 123         case EOF: {
 124           std::stringstream msg;
 125           msg << "Parse error in line " << startline << ": "
 126               << "EOF while parsing string.";
 127           throw std::runtime_error(msg.str());
 128         }
 129         default:
 130           break;
 131         }
 132         if(token_length < MAX_TOKEN_LENGTH)
 133           token_string[token_length++] = c;
 134       }
 135 string_finished:
 136       token_string[token_length] = 0;
 137       return TOKEN_STRING;
 138     }
 139     case '#': // constant
 140       nextChar();
 141
 142       while(isalnum(c) || c == '_') {
 143         if(token_length < MAX_TOKEN_LENGTH)
 144           token_string[token_length++] = c;
 145         nextChar();
 146       }
 147       token_string[token_length] = 0;
 148
 149       if(strcmp(token_string, "t") == 0)
 150         return TOKEN_TRUE;
 151       if(strcmp(token_string, "f") == 0)
 152         return TOKEN_FALSE;
 153
 154       // we only handle #t and #f constants at the moment...
 155       {
 156         std::stringstream msg;
 157         msg << "Parse Error in line " << linenumber << ": "
 158             << "Unknown constant '" << token_string << "'.";
 159         throw std::runtime_error(msg.str());
 160       }
 161
 162     case EOF:
 163       return TOKEN_EOF;
 164
 165     default:
 166       if(isdigit(c) || c == '-') {
 167         bool have_nondigits = false;
 168         bool have_digits = false;
 169         int have_floating_point = 0;
 170
 171         do {
 172           if(isdigit(c))
 173             have_digits = true;
 174           else if(c == '.')
 175             ++have_floating_point;
 176           else if(isalnum(c) || c == '_')
 177             have_nondigits = true;
 178
 179           if(token_length < MAX_TOKEN_LENGTH)
 180             token_string[token_length++] = c;
 181
 182           nextChar();
 183         } while(!isspace(c) && !strchr(delims, c));
 184
 185         token_string[token_length] = 0;
 186
 187         // no nextChar
 188
 189         if(have_nondigits || !have_digits || have_floating_point > 1)
 190           return TOKEN_SYMBOL;
 191         else if(have_floating_point == 1)
 192           return TOKEN_REAL;
 193         else
 194           return TOKEN_INTEGER;
 195       } else {
 196         do {
 197           if(token_length < MAX_TOKEN_LENGTH)
 198             token_string[token_length++] = c;
 199           nextChar();
 200         } while(!isspace(c) && !strchr(delims, c));
 201         token_string[token_length] = 0;
 202
 203         // no nextChar
 204
 205         return TOKEN_SYMBOL;
 206       }
 207   }
 208 }
 209
 210 } // end of namespace lisp