001 package sexp;
002
003 import java.io.IOException;
004 import java.io.StreamTokenizer;
005 import java.io.StringReader;
006
007 /**
008 * SExpParser is a parser for s-expressions. Its sole public method takes a string
009 * and produces the corresponding SExp.
010 */
011 public class SExpParser {
012
013 private static final boolean debug = false;
014
015 /**
016 * Parse a string into a SExp.
017 * @requires s!= null && s is a well-formed s-expression
018 * @return s-expression corresponding to s
019 */
020 public SExp parse(String s) throws SExpParseException {
021 if (s == null)
022 throw new IllegalArgumentException("String cannot be null.");
023 StreamTokenizer st = new StreamTokenizer(new StringReader(s));
024 st.resetSyntax();
025 st.wordChars(0, Character.MAX_VALUE);
026 st.whitespaceChars(' ', ' ');
027 st.whitespaceChars('\r', '\r');
028 st.whitespaceChars('\n', '\n');
029 st.whitespaceChars('\t', '\t');
030 st.ordinaryChar('(');
031 st.ordinaryChar(')');
032 st.quoteChar('"');
033 st.eolIsSignificant(false);
034 st.lowerCaseMode(true);
035 try {
036 SExp sexp = parseExpression(st);
037 ensureEndOfExpression(st);
038 return sexp;
039 } catch (IOException e) {
040 throw new SExpParseException(e);
041 }
042 }
043
044 private SExp parseExpression(StreamTokenizer st) throws IOException, SExpParseException {
045 int ttype = st.nextToken();
046
047 switch (ttype) {
048 case '(':
049 if (debug) System.err.println("OPENPAREN");
050 return parseList(st);
051 case StreamTokenizer.TT_WORD:
052 if (debug) System.err.println("WORD: " + st.sval);
053 return new SSymbol(st.sval);
054 case '"':
055 if (debug) System.err.println("STRING: " + st.sval);
056 return new SString(st.sval);
057 case ')':
058 if (debug) System.err.println("CLOSEPAREN");
059 throw new SExpParseException("too many right parentheses");
060 case StreamTokenizer.TT_EOF:
061 throw new SExpParseException("missing right parenthesis");
062 default:
063 throw new SExpParseException("unexpected character: " + (char)ttype);
064 }
065 }
066
067 private SList parseList(StreamTokenizer st) throws IOException, SExpParseException {
068 if (st.nextToken() == ')') {
069 // end of list
070 return new SEmpty();
071 } else {
072 st.pushBack(); // put the token back on the stream
073 SExp first = parseExpression(st);
074 SList rest = parseList(st);
075 return new SNonEmpty(first, rest);
076 }
077 }
078
079 private void ensureEndOfExpression(StreamTokenizer st) throws IOException, SExpParseException {
080 int ttype = st.nextToken();
081
082 switch (ttype) {
083 case StreamTokenizer.TT_EOF:
084 return; // OK, this is what we want
085 case StreamTokenizer.TT_WORD:
086 throw new SExpParseException("extra token after expression: " + st.sval);
087 default:
088 throw new SExpParseException("extra character after expression: " + (char)ttype);
089 }
090 }
091 }