4 // Copyright (C) 2006 Matthias Braun <matze@braunis.de>
6 // This program is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU General Public License
8 // as published by the Free Software Foundation; either version 2
9 // of the License, or (at your option) any later version.
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
31 Lexer::Lexer(std::istream& newstream)
32 : stream(newstream), eof(false), linenumber(0)
34 // trigger a refill of the buffer
47 if(bufpos >= bufend) {
52 stream.read(buffer, BUFFER_SIZE);
53 size_t bytes_read = stream.gcount();
56 bufend = buffer + bytes_read;
58 // the following is a hack that appends an additional ' ' at the end of
59 // the file to avoid problems when parsing symbols/elements and a sudden
60 // EOF. This is faster than relying on unget and IMO also nicer.
61 if(bytes_read == 0 || stream.eof()) {
73 static const char* delims = "\"();";
92 return getNextToken(); // and again
95 return TOKEN_OPEN_PAREN;
98 return TOKEN_CLOSE_PAREN;
100 int startline = linenumber;
106 goto string_finished;
124 std::stringstream msg;
125 msg << "Parse error in line " << startline << ": "
126 << "EOF while parsing string.";
127 throw std::runtime_error(msg.str());
132 if(token_length < MAX_TOKEN_LENGTH)
133 token_string[token_length++] = c;
136 token_string[token_length] = 0;
139 case '#': // constant
142 while(isalnum(c) || c == '_') {
143 if(token_length < MAX_TOKEN_LENGTH)
144 token_string[token_length++] = c;
147 token_string[token_length] = 0;
149 if(strcmp(token_string, "t") == 0)
151 if(strcmp(token_string, "f") == 0)
154 // we only handle #t and #f constants at the moment...
156 std::stringstream msg;
157 msg << "Parse Error in line " << linenumber << ": "
158 << "Unknown constant '" << token_string << "'.";
159 throw std::runtime_error(msg.str());
166 if(isdigit(c) || c == '-') {
167 bool have_nondigits = false;
168 bool have_digits = false;
169 int have_floating_point = 0;
175 ++have_floating_point;
176 else if(isalnum(c) || c == '_')
177 have_nondigits = true;
179 if(token_length < MAX_TOKEN_LENGTH)
180 token_string[token_length++] = c;
183 } while(!isspace(c) && !strchr(delims, c));
185 token_string[token_length] = 0;
189 if(have_nondigits || !have_digits || have_floating_point > 1)
191 else if(have_floating_point == 1)
194 return TOKEN_INTEGER;
197 if(token_length < MAX_TOKEN_LENGTH)
198 token_string[token_length++] = c;
200 } while(!isspace(c) && !strchr(delims, c));
201 token_string[token_length] = 0;
210 } // end of namespace lisp