// Derived from org.mozilla.javascript.TokenStream [NPL]
/**
* The contents of this file are subject to the Netscape Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Initial Developer of the Original Code is Netscape
* Communications Corporation.
*
* Contributor(s): Roger Lawrence, Mike McCabe
*/
package org.ibex.js;
import java.io.*;
/** Lexes a stream of characters into a stream of Tokens */
class Lexer implements Tokens {
/** for debugging */
public static void main(String[] s) throws IOException {
Lexer l = new Lexer(new InputStreamReader(System.in), "stdin", 0);
int tok = 0;
while((tok = l.getToken()) != -1) System.out.println(codeToString[tok]);
}
/** the token that was just parsed */
protected int op;
/** the most recently parsed token, regardless of pushbacks */
protected int mostRecentlyReadToken;
/** if the token just parsed was a NUMBER, this is the numeric value */
protected Number number = null;
/** if the token just parsed was a NAME or STRING, this is the string value */
protected String string = null;
/** the line number of the most recently lexed token */
protected int line = 0;
/** the line number of the most recently parsed token */
protected int parserLine = 0;
/** the column number of the current token */
protected int col = 0;
/** the name of the source code file being lexed */
protected String sourceName;
private SmartReader in;
public Lexer(Reader r, String sourceName, int line) throws IOException {
this.sourceName = sourceName;
this.line = line;
this.parserLine = line;
in = new SmartReader(r);
}
// Predicates ///////////////////////////////////////////////////////////////////////
private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); }
private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); }
private static int xDigitToInt(int c) {
if ('0' <= c && c <= '9') return c - '0';
else if ('a' <= c && c <= 'f') return c - ('a' - 10);
else if ('A' <= c && c <= 'F') return c - ('A' - 10);
else return -1;
}
// Token Subtype Handlers /////////////////////////////////////////////////////////
private int getKeyword(String name) throws IOException {
//#switch(name)
case "if": return IF;
case "lt": return LT;
case "gt": return GT;
case "in": return IN;
case "do": return DO;
case "and": return AND;
case "or": return OR;
case "for": return FOR;
case "int": return RESERVED;
case "new": return RESERVED;
case "try": return TRY;
case "var": return VAR;
case "byte": return RESERVED;
case "case": return CASE;
case "char": return RESERVED;
case "else": return ELSE;
case "enum": return RESERVED;
case "goto": return RESERVED;
case "long": return RESERVED;
case "null": return NULL;
case "true": return TRUE;
case "with": return RESERVED;
case "void": return RESERVED;
case "class": return RESERVED;
case "break": return BREAK;
case "while": return WHILE;
case "false": return FALSE;
case "const": return RESERVED;
case "final": return RESERVED;
case "super": return RESERVED;
case "throw": return THROW;
case "catch": return CATCH;
case "class": return RESERVED;
case "delete": return RESERVED;
case "return": return RETURN;
case "throws": return RESERVED;
case "double": return RESERVED;
case "assert": return ASSERT;
case "public": return RESERVED;
case "switch": return SWITCH;
case "typeof": return TYPEOF;
case "package": return RESERVED;
case "default": return DEFAULT;
case "finally": return FINALLY;
case "boolean": return RESERVED;
case "private": return RESERVED;
case "extends": return RESERVED;
case "abstract": return RESERVED;
case "continue": return CONTINUE;
case "debugger": return RESERVED;
case "function": return FUNCTION;
case "volatile": return RESERVED;
case "interface": return RESERVED;
case "protected": return RESERVED;
case "transient": return RESERVED;
case "implements": return RESERVED;
case "instanceof": return RESERVED;
case "synchronized": return RESERVED;
case "cascade": return CASCADE;
//#end
return -1;
}
private int getIdentifier(int c) throws IOException {
in.startString();
while (Character.isJavaIdentifierPart((char)(c = in.read())));
in.unread();
String str = in.getString();
int result = getKeyword(str);
if (result == RESERVED) throw new LexerException("The reserved word \"" + str + "\" is not permitted in Ibex scripts");
if (result != -1) return result;
this.string = str.intern();
return NAME;
}
private int getNumber(int c) throws IOException {
int base = 10;
in.startString();
double dval = Double.NaN;
long longval = 0;
boolean isInteger = true;
// figure out what base we're using
if (c == '0') {
if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); }
else if (isDigit(c)) base = 8;
}
while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read();
if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
isInteger = false;
if (c == '.') do { c = in.read(); } while (isDigit(c));
if (c == 'e' || c == 'E') {
c = in.read();
if (c == '+' || c == '-') c = in.read();
if (!isDigit(c)) throw new LexerException("float listeral did not have an exponent value");
do { c = in.read(); } while (isDigit(c));
}
}
in.unread();
String numString = in.getString();
if (base == 10 && !isInteger) {
try { dval = (Double.valueOf(numString)).doubleValue(); }
catch (NumberFormatException ex) { throw new LexerException("invalid numeric literal: \"" + numString + "\""); }
} else {
if (isInteger) {
longval = Long.parseLong(numString, base);
dval = (double)longval;
} else {
dval = Double.parseDouble(numString);
longval = (long) dval;
if (longval == dval) isInteger = true;
}
}
if (!isInteger) this.number = new Double(dval);
else if(longval >= Integer.MIN_VALUE && longval <= Integer.MAX_VALUE) this.number = new Integer((int)longval);
else this.number = new Long(longval);
return NUMBER;
}
private int getString(int c) throws IOException {
StringBuffer stringBuf = null;
int quoteChar = c;
c = in.read();
in.startString(); // start after the first "
while(c != quoteChar) {
if (c == '\n' || c == -1) throw new LexerException("unterminated string literal");
if (c == '\\') {
if (stringBuf == null) {
in.unread(); // Don't include the backslash
stringBuf = new StringBuffer(in.getString());
in.read();
}
switch (c = in.read()) {
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = '\u000B'; break;
case '\\': c = '\\'; break;
case 'u': {
int v = 0;
for(int i=0; i<4; i++) {
int ci = in.read();
if (!((ci >= '0' && ci <= '9') || (ci >= 'a' && ci <= 'f') || (ci >= 'A' && ci <= 'F')))
throw new LexerException("illegal character '" + ((char)c) + "' in \\u unicode escape sequence");
v = (v << 8) | Integer.parseInt(ci + "", 16);
}
c = (char)v;
break;
}
default:
// just use the character that was escaped
break;
}
}
if (stringBuf != null) stringBuf.append((char) c);
c = in.read();
}
if (stringBuf != null) this.string = stringBuf.toString().intern();
else {
in.unread(); // miss the trailing "
this.string = in.getString().intern();
in.read();
}
return STRING;
}
private int _getToken() throws IOException {
int c;
do { c = in.read(); } while (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\n' );
if (c == -1) return -1;
if (c == '\\' || Character.isJavaIdentifierStart((char)c)) return getIdentifier(c);
if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c);
if (c == '"' || c == '\'') return getString(c);
switch (c) {
case ';': return SEMI;
case '[': return LB;
case ']': return RB;
case '{': return LC;
case '}': return RC;
case '(': return LP;
case ')': return RP;
case ',': return COMMA;
case '?': return HOOK;
case ':': return !in.match(':') ? COLON : in.match('=') ? GRAMMAR : le(":: is not a valid token");
case '.': return DOT;
case '|': return in.match('|') ? OR : (in.match('=') ? ASSIGN_BITOR : BITOR);
case '^': return in.match('=') ? ASSIGN_BITXOR : BITXOR;
case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND;
case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ;
case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE;
case '%': return in.match('=') ? ASSIGN_MOD : MOD;
case '~': return BITNOT;
case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? (in.match('=') ? ADD_TRAP : INC) : ADD;
case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? (in.match('=') ? DEL_TRAP : DEC) : SUB;
case '*': return in.match('=') ? ASSIGN_MUL : MUL;
case '<': return !in.match('<') ? (in.match('=') ? LE : LT) : in.match('=') ? ASSIGN_LSH : LSH;
case '>': return !in.match('>') ? (in.match('=') ? GE : GT) :
in.match('>') ? (in.match('=') ? ASSIGN_URSH : URSH) : (in.match('=') ? ASSIGN_RSH : RSH);
case '/':
if (in.match('=')) return ASSIGN_DIV;
if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); }
if (!in.match('*')) return DIV;
while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) {
if (c == '\n' || c != '/' || !in.match('*')) continue;
if (in.match('/')) return getToken();
throw new LexerException("nested comments are not permitted");
}
if (c == -1) throw new LexerException("unterminated comment");
return getToken(); // `goto retry'
default: throw new LexerException("illegal character: \'" + ((char)c) + "\'");
}
}
private int le(String s) throws LexerException { if (true) throw new LexerException(s); return 0; }
// SmartReader ////////////////////////////////////////////////////////////////
/** a Reader that tracks line numbers and can push back tokens */
private class SmartReader {
PushbackReader reader = null;
int lastread = -1;
public SmartReader(Reader r) { reader = new PushbackReader(r); }
public void unread() throws IOException { unread((char)lastread); }
public void unread(char c) throws IOException {
reader.unread(c);
if(c == '\n') col = -1;
else col--;
if (accumulator != null) accumulator.setLength(accumulator.length() - 1);
}
public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; }
public int peek() throws IOException {
int peeked = reader.read();
if (peeked != -1) reader.unread((char)peeked);
return peeked;
}
public int read() throws IOException {
lastread = reader.read();
if (accumulator != null) accumulator.append((char)lastread);
if (lastread != '\n' && lastread != '\r') col++;
if (lastread == '\n') {
// col is -1 if we just unread a newline, this is sort of ugly
if (col != -1) parserLine = ++line;
col = 0;
}
return lastread;
}
// FEATURE: could be much more efficient
StringBuffer accumulator = null;
public void startString() {
accumulator = new StringBuffer();
accumulator.append((char)lastread);
}
public String getString() throws IOException {
String ret = accumulator.toString().intern();
accumulator = null;
return ret;
}
}
// Token PushBack code ////////////////////////////////////////////////////////////
private int pushBackDepth = 0;
private int[] pushBackInts = new int[10];
private Object[] pushBackObjects = new Object[10];
/** push back a token */
public final void pushBackToken(int op, Object obj) {
if (pushBackDepth >= pushBackInts.length - 1) {
int[] newInts = new int[pushBackInts.length * 2];
System.arraycopy(pushBackInts, 0, newInts, 0, pushBackInts.length);
pushBackInts = newInts;
Object[] newObjects = new Object[pushBackObjects.length * 2];
System.arraycopy(pushBackObjects, 0, newObjects, 0, pushBackObjects.length);
pushBackObjects = newObjects;
}
pushBackInts[pushBackDepth] = op;
pushBackObjects[pushBackDepth] = obj;
pushBackDepth++;
}
/** push back the most recently read token */
public final void pushBackToken() { pushBackToken(op, number != null ? (Object)number : (Object)string); }
/** read a token but leave it in the stream */
public final int peekToken() throws IOException {
int ret = getToken();
pushBackToken();
return ret;
}
/** read a token */
public final int getToken() throws IOException {
number = null;
string = null;
if (pushBackDepth == 0) {
mostRecentlyReadToken = op;
return op = _getToken();
}
pushBackDepth--;
op = pushBackInts[pushBackDepth];
if (pushBackObjects[pushBackDepth] != null) {
number = pushBackObjects[pushBackDepth] instanceof Number ? (Number)pushBackObjects[pushBackDepth] : null;
string = pushBackObjects[pushBackDepth] instanceof String ? (String)pushBackObjects[pushBackDepth] : null;
}
return op;
}
class LexerException extends IOException {
public LexerException(String s) { super(sourceName + ":" + line + "," + col + ": " + s); }
}
}