src/lisp/lexer.cpp

   1 //  $Id$
   2 //
   3 //  SuperTux
   4 //  Copyright (C) 2006 Matthias Braun <matze@braunis.de>
   5 //
   6 //  This program is free software; you can redistribute it and/or
   7 //  modify it under the terms of the GNU General Public License
   8 //  as published by the Free Software Foundation; either version 2
   9 //  of the License, or (at your option) any later version.
  10 //
  11 //  This program is distributed in the hope that it will be useful,
  12 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 //  GNU General Public License for more details.
  15 //
  16 //  You should have received a copy of the GNU General Public License
  17 //  along with this program; if not, write to the Free Software
  18 //  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19 #include <config.h>
  20
  21 #include <sstream>
  22 #include <cstring>
  23 #include <stdexcept>
  24 #include <iostream>
  25 #include <stdio.h>
  26
  27 #include "lexer.hpp"
  28
  29 namespace lisp
  30 {
  31
  32 Lexer::Lexer(std::istream& newstream)
  33     : stream(newstream), eof(false), linenumber(0)
  34 {
  35   // trigger a refill of the buffer
  36   bufpos = NULL;
  37   bufend = NULL;
  38   nextChar();
  39 }
  40
  41 Lexer::~Lexer()
  42 {
  43 }
  44
  45 void
  46 Lexer::nextChar()
  47 {
  48   if(bufpos >= bufend) {
  49     if(eof) {
  50       c = EOF;
  51       return;
  52     }
  53     stream.read(buffer, BUFFER_SIZE);
  54     size_t bytes_read = stream.gcount();
  55
  56     bufpos = buffer;
  57     bufend = buffer + bytes_read;
  58
  59     // the following is a hack that appends an additional ' ' at the end of
  60     // the file to avoid problems when parsing symbols/elements and a sudden
  61     // EOF. This is faster than relying on unget and IMO also nicer.
  62     if(bytes_read == 0 || stream.eof()) {
  63       eof = true;
  64       *bufend = ' ';
  65       ++bufend;
  66     }
  67   }
  68   c = *bufpos++;
  69   if(c == '\n')
  70     ++linenumber;
  71 }
  72
  73 void
  74 Lexer::addChar()
  75 {
  76   if(token_length < MAX_TOKEN_LENGTH)
  77     token_string[token_length++] = c;
  78   nextChar();
  79 }
  80
  81 Lexer::TokenType
  82 Lexer::getNextToken()
  83 {
  84   static const char* delims = "\"();";
  85
  86   while(isspace(c)) {
  87     nextChar();
  88   }
  89
  90   token_length = 0;
  91
  92   switch(c) {
  93     case ';': // comment
  94       while(c != '\n') {
  95         nextChar();
  96       }
  97       return getNextToken(); // and again
  98     case '(':
  99       nextChar();
 100       return TOKEN_OPEN_PAREN;
 101     case ')':
 102       nextChar();
 103       return TOKEN_CLOSE_PAREN;
 104     case '"': {  // string
 105       int startline = linenumber;
 106       while(1) {
 107         nextChar();
 108         switch(c) {
 109         case '"':
 110           nextChar();
 111           goto string_finished;
 112         case '\r':
 113           continue;
 114         case '\n':
 115           break;
 116         case '\\':
 117           nextChar();
 118           switch(c) {
 119           case 'n':
 120             c = '\n';
 121             break;
 122           case 't':
 123             c = '\t';
 124             break;
 125           }
 126           break;
 127         case EOF: {
 128           std::stringstream msg;
 129           msg << "Parse error in line " << startline << ": "
 130               << "EOF while parsing string.";
 131           throw std::runtime_error(msg.str());
 132         }
 133         default:
 134           break;
 135         }
 136         if(token_length < MAX_TOKEN_LENGTH)
 137           token_string[token_length++] = c;
 138       }
 139 string_finished:
 140       token_string[token_length] = 0;
 141       return TOKEN_STRING;
 142     }
 143     case '#': // constant
 144       nextChar();
 145
 146       while(isalnum(c) || c == '_') {
 147         addChar();
 148       }
 149       token_string[token_length] = 0;
 150
 151       if(strcmp(token_string, "t") == 0)
 152         return TOKEN_TRUE;
 153       if(strcmp(token_string, "f") == 0)
 154         return TOKEN_FALSE;
 155
 156       // we only handle #t and #f constants at the moment...
 157       {
 158         std::stringstream msg;
 159         msg << "Parse Error in line " << linenumber << ": "
 160             << "Unknown constant '" << token_string << "'.";
 161         throw std::runtime_error(msg.str());
 162       }
 163
 164     case EOF:
 165       return TOKEN_EOF;
 166
 167     default:
 168       if(isdigit(c) || c == '-') {
 169         bool have_nondigits = false;
 170         bool have_digits = false;
 171         int have_floating_point = 0;
 172
 173         do {
 174           if(isdigit(c))
 175             have_digits = true;
 176           else if(c == '.')
 177             ++have_floating_point;
 178           else if(isalnum(c) || c == '_')
 179             have_nondigits = true;
 180
 181           addChar();
 182         } while(!isspace(c) && !strchr(delims, c));
 183
 184         token_string[token_length] = 0;
 185
 186         // no nextChar
 187
 188         if(have_nondigits || !have_digits || have_floating_point > 1)
 189           return TOKEN_SYMBOL;
 190         else if(have_floating_point == 1)
 191           return TOKEN_REAL;
 192         else
 193           return TOKEN_INTEGER;
 194       } else {
 195         do {
 196           addChar();
 197         } while(!isspace(c) && !strchr(delims, c));
 198         token_string[token_length] = 0;
 199
 200         // no nextChar
 201
 202         return TOKEN_SYMBOL;
 203       }
 204   }
 205 }
 206
 207 } // end of namespace lisp