src/lisp/lexer.cpp

   1 //  $Id$
   2 //
   3 //  SuperTux
   4 //  Copyright (C) 2006 Matthias Braun <matze@braunis.de>
   5 //
   6 //  This program is free software; you can redistribute it and/or
   7 //  modify it under the terms of the GNU General Public License
   8 //  as published by the Free Software Foundation; either version 2
   9 //  of the License, or (at your option) any later version.
  10 //
  11 //  This program is distributed in the hope that it will be useful,
  12 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 //  GNU General Public License for more details.
  15 //
  16 //  You should have received a copy of the GNU General Public License
  17 //  along with this program; if not, write to the Free Software
  18 //  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19 #include <config.h>
  20
  21 #include <sstream>
  22 #include <cstring>
  23 #include <stdexcept>
  24 #include <iostream>
  25
  26 #include "lexer.hpp"
  27
  28 namespace lisp
  29 {
  30
  31 class EOFException
  32 {
  33 };
  34
  35 Lexer::Lexer(std::istream& newstream)
  36     : stream(newstream), eof(false), linenumber(0)
  37 {
  38   try {
  39     // trigger a refill of the buffer
  40     c = 0;
  41     bufend = 0;
  42     nextChar();
  43   } catch(EOFException& ) {
  44   }
  45 }
  46
  47 Lexer::~Lexer()
  48 {
  49 }
  50
  51 void
  52 Lexer::nextChar()
  53 {
  54   ++c;
  55   if(c >= bufend) {
  56     if(eof)
  57       throw EOFException();
  58     stream.read(buffer, BUFFER_SIZE);
  59     size_t bytes_read = stream.gcount();
  60
  61     c = buffer;
  62     bufend = buffer + bytes_read;
  63
  64     // the following is a hack that appends an additional ' ' at the end of
  65     // the file to avoid problems when parsing symbols/elements and a sudden
  66     // EOF. This is faster than relying on unget and IMO also nicer.
  67     if(bytes_read == 0 || stream.eof()) {
  68       eof = true;
  69       *bufend = ' ';
  70       ++bufend;
  71     }
  72   }
  73 }
  74
  75 Lexer::TokenType
  76 Lexer::getNextToken()
  77 {
  78   static const char* delims = "\"();";
  79
  80   try {
  81     while(isspace(*c)) {
  82       if(*c == '\n')
  83         ++linenumber;
  84       nextChar();
  85     };
  86
  87     token_length = 0;
  88
  89     switch(*c) {
  90       case ';': // comment
  91         while(true) {
  92           nextChar();
  93           if(*c == '\n') {
  94             ++linenumber;
  95             break;
  96           }
  97         }
  98         return getNextToken(); // and again
  99       case '(':
 100         nextChar();
 101         return TOKEN_OPEN_PAREN;
 102       case ')':
 103         nextChar();
 104         return TOKEN_CLOSE_PAREN;
 105       case '"': {  // string
 106         int startline = linenumber;
 107         try {
 108           while(1) {
 109             nextChar();
 110             if(*c == '"')
 111               break;
 112             else if (*c == '\r') // XXX this breaks with pure \r EOL
 113               continue;
 114             else if(*c == '\n')
 115               linenumber++;
 116             else if(*c == '\\') {
 117               nextChar();
 118               switch(*c) {
 119                 case 'n':
 120                   *c = '\n';
 121                   break;
 122                 case 't':
 123                   *c = '\t';
 124                   break;
 125               }
 126             }
 127             if(token_length < MAX_TOKEN_LENGTH)
 128               token_string[token_length++] = *c;
 129           }
 130           token_string[token_length] = 0;
 131         } catch(EOFException& ) {
 132           std::stringstream msg;
 133           msg << "Parse error in line " << startline << ": "
 134               << "EOF while parsing string.";
 135           throw std::runtime_error(msg.str());
 136         }
 137         nextChar();
 138         return TOKEN_STRING;
 139       }
 140       case '#': // constant
 141         try {
 142           nextChar();
 143
 144           while(isalnum(*c) || *c == '_') {
 145             if(token_length < MAX_TOKEN_LENGTH)
 146               token_string[token_length++] = *c;
 147             nextChar();
 148           }
 149           token_string[token_length] = 0;
 150         } catch(EOFException& ) {
 151           std::stringstream msg;
 152           msg << "Parse Error in line " << linenumber << ": "
 153             << "EOF while parsing constant.";
 154           throw std::runtime_error(msg.str());
 155         }
 156
 157         if(strcmp(token_string, "t") == 0)
 158           return TOKEN_TRUE;
 159         if(strcmp(token_string, "f") == 0)
 160           return TOKEN_FALSE;
 161
 162         // we only handle #t and #f constants at the moment...
 163
 164         {
 165           std::stringstream msg;
 166           msg << "Parse Error in line " << linenumber << ": "
 167             << "Unknown constant '" << token_string << "'.";
 168           throw std::runtime_error(msg.str());
 169         }
 170
 171       default:
 172         if(isdigit(*c) || *c == '-') {
 173           bool have_nondigits = false;
 174           bool have_digits = false;
 175           int have_floating_point = 0;
 176
 177           do {
 178             if(isdigit(*c))
 179               have_digits = true;
 180             else if(*c == '.')
 181               ++have_floating_point;
 182             else if(isalnum(*c) || *c == '_')
 183               have_nondigits = true;
 184
 185             if(token_length < MAX_TOKEN_LENGTH)
 186               token_string[token_length++] = *c;
 187
 188             nextChar();
 189           } while(!isspace(*c) && !strchr(delims, *c));
 190
 191           token_string[token_length] = 0;
 192
 193           // no nextChar
 194
 195           if(have_nondigits || !have_digits || have_floating_point > 1)
 196             return TOKEN_SYMBOL;
 197           else if(have_floating_point == 1)
 198             return TOKEN_REAL;
 199           else
 200             return TOKEN_INTEGER;
 201         } else {
 202           do {
 203             if(token_length < MAX_TOKEN_LENGTH)
 204               token_string[token_length++] = *c;
 205             nextChar();
 206           } while(!isspace(*c) && !strchr(delims, *c));
 207           token_string[token_length] = 0;
 208
 209           // no nextChar
 210
 211           return TOKEN_SYMBOL;
 212         }
 213     }
 214   } catch(EOFException& ) {
 215     return TOKEN_EOF;
 216   }
 217 }
 218
 219 } // end of namespace lisp