// Derived from org.mozilla.javascript.TokenStream [NPL] /** * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. * * Contributor(s): Roger Lawrence, Mike McCabe */ package org.ibex.js; import java.io.*; /** Lexes a stream of characters into a stream of Tokens */ class Lexer implements Tokens { /** for debugging */ public static void main(String[] s) throws IOException { Lexer l = new Lexer(new InputStreamReader(System.in), "stdin", 0); int tok = 0; while((tok = l.getToken()) != -1) System.out.println(codeToString[tok]); } /** the token that was just parsed */ protected int op; /** the most recently parsed token, regardless of pushbacks */ protected int mostRecentlyReadToken; /** if the token just parsed was a NUMBER, this is the numeric value */ protected Number number = null; /** if the token just parsed was a NAME or STRING, this is the string value */ protected String string = null; /** the line number of the most recently lexed token */ protected int line = 0; /** the line number of the most recently parsed token */ protected int parserLine = 0; /** the column number of the current token */ protected int col = 0; /** the name of the source code file being lexed */ protected String sourceName; private SmartReader in; public Lexer(Reader r, String sourceName, int line) throws IOException { this.sourceName = sourceName; this.line = line; this.parserLine = line; in = new SmartReader(r); } // Predicates /////////////////////////////////////////////////////////////////////// private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); } private static boolean isDigit(int c) { return (c >= '0' && c <= '9'); } private static int xDigitToInt(int c) { if ('0' <= c && c <= '9') return c - '0'; else if ('a' <= c && c <= 'f') return c - ('a' - 10); else if ('A' <= c && c <= 'F') return c - ('A' - 10); else return -1; } // Token Subtype Handlers ///////////////////////////////////////////////////////// private int getKeyword(String name) throws IOException { //#switch(name) case "if": return IF; case "lt": return LT; case "gt": return GT; case "in": return IN; case "do": return DO; case "and": return AND; case "or": return OR; case "for": return FOR; case "int": return RESERVED; case "new": return RESERVED; case "try": return TRY; case "var": return VAR; case "byte": return RESERVED; case "case": return CASE; case "char": return RESERVED; case "else": return ELSE; case "enum": return RESERVED; case "goto": return RESERVED; case "long": return RESERVED; case "null": return NULL; case "true": return TRUE; case "with": return RESERVED; case "void": return RESERVED; case "class": return RESERVED; case "break": return BREAK; case "while": return WHILE; case "false": return FALSE; case "const": return RESERVED; case "final": return RESERVED; case "super": return RESERVED; case "throw": return THROW; case "catch": return CATCH; case "class": return RESERVED; case "delete": return RESERVED; case "return": return RETURN; case "throws": return RESERVED; case "double": return RESERVED; case "assert": return ASSERT; case "public": return RESERVED; case "switch": return SWITCH; case "typeof": return TYPEOF; case "package": return RESERVED; case "default": return DEFAULT; case "finally": return FINALLY; case "boolean": return RESERVED; case "private": return RESERVED; case "extends": return RESERVED; case "abstract": return RESERVED; case "continue": return CONTINUE; case "debugger": return RESERVED; case "function": return FUNCTION; case "volatile": return RESERVED; case "interface": return RESERVED; case "protected": return RESERVED; case "transient": return RESERVED; case "implements": return RESERVED; case "instanceof": return RESERVED; case "synchronized": return RESERVED; case "cascade": return CASCADE; //#end return -1; } private int getIdentifier(int c) throws IOException { in.startString(); while (Character.isJavaIdentifierPart((char)(c = in.read()))); in.unread(); String str = in.getString(); int result = getKeyword(str); if (result == RESERVED) throw new LexerException("The reserved word \"" + str + "\" is not permitted in Ibex scripts"); if (result != -1) return result; this.string = str.intern(); return NAME; } private int getNumber(int c) throws IOException { int base = 10; in.startString(); double dval = Double.NaN; long longval = 0; boolean isInteger = true; // figure out what base we're using if (c == '0') { if (Character.toLowerCase((char)(c = in.read())) == 'x') { base = 16; in.startString(); } else if (isDigit(c)) base = 8; } while (0 <= xDigitToInt(c) && !(base < 16 && isAlpha(c))) c = in.read(); if (base == 10 && (c == '.' || c == 'e' || c == 'E')) { isInteger = false; if (c == '.') do { c = in.read(); } while (isDigit(c)); if (c == 'e' || c == 'E') { c = in.read(); if (c == '+' || c == '-') c = in.read(); if (!isDigit(c)) throw new LexerException("float listeral did not have an exponent value"); do { c = in.read(); } while (isDigit(c)); } } in.unread(); String numString = in.getString(); if (base == 10 && !isInteger) { try { dval = (Double.valueOf(numString)).doubleValue(); } catch (NumberFormatException ex) { throw new LexerException("invalid numeric literal: \"" + numString + "\""); } } else { if (isInteger) { longval = Long.parseLong(numString, base); dval = (double)longval; } else { dval = Double.parseDouble(numString); longval = (long) dval; if (longval == dval) isInteger = true; } } if (!isInteger) this.number = new Double(dval); else if(longval >= Integer.MIN_VALUE && longval <= Integer.MAX_VALUE) this.number = new Integer((int)longval); else this.number = new Long(longval); return NUMBER; } private int getString(int c) throws IOException { StringBuffer stringBuf = null; int quoteChar = c; c = in.read(); in.startString(); // start after the first " while(c != quoteChar) { if (c == '\n' || c == -1) throw new LexerException("unterminated string literal"); if (c == '\\') { if (stringBuf == null) { in.unread(); // Don't include the backslash stringBuf = new StringBuffer(in.getString()); in.read(); } switch (c = in.read()) { case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\u000B'; break; case '\\': c = '\\'; break; case 'u': { int v = 0; for(int i=0; i<4; i++) { int ci = in.read(); if (!((ci >= '0' && ci <= '9') || (ci >= 'a' && ci <= 'f') || (ci >= 'A' && ci <= 'F'))) throw new LexerException("illegal character '" + ((char)c) + "' in \\u unicode escape sequence"); v = (v << 8) | Integer.parseInt(ci + "", 16); } c = (char)v; break; } default: // just use the character that was escaped break; } } if (stringBuf != null) stringBuf.append((char) c); c = in.read(); } if (stringBuf != null) this.string = stringBuf.toString().intern(); else { in.unread(); // miss the trailing " this.string = in.getString().intern(); in.read(); } return STRING; } private int _getToken() throws IOException { int c; do { c = in.read(); } while (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\n' ); if (c == -1) return -1; if (c == '\\' || Character.isJavaIdentifierStart((char)c)) return getIdentifier(c); if (isDigit(c) || (c == '.' && isDigit(in.peek()))) return getNumber(c); if (c == '"' || c == '\'') return getString(c); switch (c) { case ';': return SEMI; case '[': return LB; case ']': return RB; case '{': return LC; case '}': return RC; case '(': return LP; case ')': return RP; case ',': return COMMA; case '?': return HOOK; case ':': return !in.match(':') ? COLON : in.match('=') ? GRAMMAR : le(":: is not a valid token"); case '.': return DOT; case '|': return in.match('|') ? OR : (in.match('=') ? ASSIGN_BITOR : BITOR); case '^': return in.match('=') ? ASSIGN_BITXOR : BITXOR; case '&': return in.match('&') ? AND : in.match('=') ? ASSIGN_BITAND : BITAND; case '=': return !in.match('=') ? ASSIGN : in.match('=') ? SHEQ : EQ; case '!': return !in.match('=') ? BANG : in.match('=') ? SHNE : NE; case '%': return in.match('=') ? ASSIGN_MOD : MOD; case '~': return BITNOT; case '+': return in.match('=') ? ASSIGN_ADD : in.match('+') ? (in.match('=') ? ADD_TRAP : INC) : ADD; case '-': return in.match('=') ? ASSIGN_SUB: in.match('-') ? (in.match('=') ? DEL_TRAP : DEC) : SUB; case '*': return in.match('=') ? ASSIGN_MUL : MUL; case '<': return !in.match('<') ? (in.match('=') ? LE : LT) : in.match('=') ? ASSIGN_LSH : LSH; case '>': return !in.match('>') ? (in.match('=') ? GE : GT) : in.match('>') ? (in.match('=') ? ASSIGN_URSH : URSH) : (in.match('=') ? ASSIGN_RSH : RSH); case '/': if (in.match('=')) return ASSIGN_DIV; if (in.match('/')) { while ((c = in.read()) != -1 && c != '\n'); in.unread(); return getToken(); } if (!in.match('*')) return DIV; while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) { if (c == '\n' || c != '/' || !in.match('*')) continue; if (in.match('/')) return getToken(); throw new LexerException("nested comments are not permitted"); } if (c == -1) throw new LexerException("unterminated comment"); return getToken(); // `goto retry' default: throw new LexerException("illegal character: \'" + ((char)c) + "\'"); } } private int le(String s) throws LexerException { if (true) throw new LexerException(s); return 0; } // SmartReader //////////////////////////////////////////////////////////////// /** a Reader that tracks line numbers and can push back tokens */ private class SmartReader { PushbackReader reader = null; int lastread = -1; public SmartReader(Reader r) { reader = new PushbackReader(r); } public void unread() throws IOException { unread((char)lastread); } public void unread(char c) throws IOException { reader.unread(c); if(c == '\n') col = -1; else col--; if (accumulator != null) accumulator.setLength(accumulator.length() - 1); } public boolean match(char c) throws IOException { if (peek() == c) { reader.read(); return true; } else return false; } public int peek() throws IOException { int peeked = reader.read(); if (peeked != -1) reader.unread((char)peeked); return peeked; } public int read() throws IOException { lastread = reader.read(); if (accumulator != null) accumulator.append((char)lastread); if (lastread != '\n' && lastread != '\r') col++; if (lastread == '\n') { // col is -1 if we just unread a newline, this is sort of ugly if (col != -1) parserLine = ++line; col = 0; } return lastread; } // FEATURE: could be much more efficient StringBuffer accumulator = null; public void startString() { accumulator = new StringBuffer(); accumulator.append((char)lastread); } public String getString() throws IOException { String ret = accumulator.toString().intern(); accumulator = null; return ret; } } // Token PushBack code //////////////////////////////////////////////////////////// private int pushBackDepth = 0; private int[] pushBackInts = new int[10]; private Object[] pushBackObjects = new Object[10]; /** push back a token */ public final void pushBackToken(int op, Object obj) { if (pushBackDepth >= pushBackInts.length - 1) { int[] newInts = new int[pushBackInts.length * 2]; System.arraycopy(pushBackInts, 0, newInts, 0, pushBackInts.length); pushBackInts = newInts; Object[] newObjects = new Object[pushBackObjects.length * 2]; System.arraycopy(pushBackObjects, 0, newObjects, 0, pushBackObjects.length); pushBackObjects = newObjects; } pushBackInts[pushBackDepth] = op; pushBackObjects[pushBackDepth] = obj; pushBackDepth++; } /** push back the most recently read token */ public final void pushBackToken() { pushBackToken(op, number != null ? (Object)number : (Object)string); } /** read a token but leave it in the stream */ public final int peekToken() throws IOException { int ret = getToken(); pushBackToken(); return ret; } /** read a token */ public final int getToken() throws IOException { number = null; string = null; if (pushBackDepth == 0) { mostRecentlyReadToken = op; return op = _getToken(); } pushBackDepth--; op = pushBackInts[pushBackDepth]; if (pushBackObjects[pushBackDepth] != null) { number = pushBackObjects[pushBackDepth] instanceof Number ? (Number)pushBackObjects[pushBackDepth] : null; string = pushBackObjects[pushBackDepth] instanceof String ? (String)pushBackObjects[pushBackDepth] : null; } return op; } class LexerException extends IOException { public LexerException(String s) { super(sourceName + ":" + line + "," + col + ": " + s); } } }