package au.com.cellmaster.gwt.server;
import java.util.ArrayList;
import java.util.Stack;
public class ParseFormula {
private int offset = 0;
public ParseFormula
(String formula
) {
this.formula = formula;
}
static public final String TOK_TYPE_NOOP
= "noop"; static public final String TOK_TYPE_OPERAND
= "operand"; static public final String TOK_TYPE_FUNCTION
= "function"; static public final String TOK_TYPE_SUBEXPR
= "subexpression"; static public final String TOK_TYPE_ARGUMENT
= "argument"; static public final String TOK_TYPE_OP_PRE
= "operator-prefix"; static public final String TOK_TYPE_OP_IN
= "operator-infix"; static public final String TOK_TYPE_OP_POST
= "operator-postfix"; static public final String TOK_TYPE_WSPACE
= "white-space"; static public final String TOK_TYPE_UNKNOWN
= "unknown";
static public final String TOK_SUBTYPE_START
= "start"; static public final String TOK_SUBTYPE_STOP
= "stop";
static public final String TOK_SUBTYPE_TEXT
= "text"; static public final String TOK_SUBTYPE_NUMBER
= "number"; static public final String TOK_SUBTYPE_LOGICAL
= "logical"; static public final String TOK_SUBTYPE_ERROR
= "error"; static public final String TOK_SUBTYPE_RANGE
= "range";
static public final String TOK_SUBTYPE_MATH
= "math"; static public final String TOK_SUBTYPE_CONCAT
= "concatenate"; static public final String TOK_SUBTYPE_INTERSECT
= "intersect"; static public final String TOK_SUBTYPE_UNION
= "union";
public class FormulaToken {
this.value = value;
this.type = type;
this.subtype = subtype;
}
}
public class TokenList {
public TokenList() {
this.items = new ArrayList<FormulaToken>();
this.index = -1;
}
public int index;
public ArrayList<FormulaToken> items;
return add( value, type, null);
}
if (null == subtype) subtype = "";
FormulaToken token = new FormulaToken(value, type, subtype);
this.addRef(token);
return token;
}
public void addRef(FormulaToken token) {
this.items.add(token);
}
public void reset() {
this.index = -1;
}
public boolean BOF() {
return (this.index <= 0);
}
public boolean EOF() {
return (this.index >= (this.items.size() - 1));
}
public boolean moveNext() {
if (this.EOF()) return false;
this.index++; return true;
}
public FormulaToken current() {
if (this.index == -1) return null;
return (this.items.get(this.index));
}
public FormulaToken next() {
if (this.EOF()) return null;
return (this.items.get(this.index + 1));
}
public FormulaToken previous() {
if (this.index < 1) return null;
return (this.items.get(this.index - 1));
}
}
public class TokenStack {
public TokenStack() {
this.items = new Stack<FormulaToken>();
}
public Stack<FormulaToken> items;
public void push(FormulaToken token) {
this.items.push(token);
}
public FormulaToken pop() {
FormulaToken token = this.items.pop();
return (new FormulaToken("", token.type, TOK_SUBTYPE_STOP));
}
public FormulaToken token() {
return ((this.items.size() > 0) ? this.items.peek() : null);
}
return ((this.token() !=null ) ? this.token().value : "");
}
return ((this.token() !=null ) ? this.token().type : "");
}
return ((this.token() !=null ) ? this.token().subtype : "");
}
}
private String currentChar
() { return formula.
substring(offset, offset
+ 1); }; private String doubleChar
() { if(offset
+1 == formula.
length()) return currentChar
(); return formula.
substring(offset, offset
+ 2); }; private String nextChar
() { return formula.
substring(offset
+ 1, offset
+ 1); }; private boolean EOF() { return (offset >= formula.length()); };
public TokenList getTokens() {
TokenList tokens = new TokenList();
TokenStack tokenStack = new TokenStack();
boolean inString = false;
boolean inPath = false;
boolean inRange = false;
boolean inError = false;
while (formula.length() > 0) {
if (formula.startsWith(" "))
formula = formula.substring(1);
else {
if (formula.startsWith("="))
formula = formula.substring(1);
break;
}
}
String regexSN
= "^[1-9]{1}(\\.[0-9]+)?E{1}$";
while (!EOF()) {
// state-dependent character evaluation (order is important)
// double-quoted strings
// embeds are doubled
// end marks token
if (inString) {
if (currentChar().equals("\"")) {
if (nextChar().equals("\"")) {
token += "\"";
offset += 1;
} else {
inString = false;
tokens.add(token, TOK_TYPE_OPERAND, TOK_SUBTYPE_TEXT);
token = "";
}
} else {
token += currentChar();
}
offset += 1;
continue;
}
// single-quoted strings (links)
// embeds are double
// end does not mark a token
if (inPath) {
if (currentChar().equals("'")) {
if (nextChar().equals("'")) {
token += "'";
offset += 1;
} else {
inPath = false;
}
} else {
token += currentChar();
}
offset += 1;
continue;
}
// bracked strings (range offset or linked workbook name)
// no embeds (changed to "()" by Excel)
// end does not mark a token
if (inRange) {
if (currentChar().equals("]")) {
inRange = false;
}
token += currentChar();
offset += 1;
continue;
}
// error values
// end marks a token, determined from absolute list of values
if (inError) {
token += currentChar();
offset += 1;
if ((",#NULL!,#DIV/0!,#VALUE!,#REF!,#NAME?,#NUM!,#N/A,").indexOf("," + token + ",") != -1) {
inError = false;
tokens.add(token, TOK_TYPE_OPERAND, TOK_SUBTYPE_ERROR);
token = "";
}
continue;
}
// scientific notation check
if (("+-").indexOf(currentChar()) != -1) {
if (token.length() > 1) {
if (token.matches(regexSN)) {
token += currentChar();
offset += 1;
continue;
}
}
}
// independent character evaulation (order not important)
// establish state-dependent character evaluations
if (currentChar().equals("\"")) {
if (token.length() > 0) {
// not expected
tokens.add(token, TOK_TYPE_UNKNOWN);
token = "";
}
inString = true;
offset += 1;
continue;
}
if (currentChar().equals("'")) {
if (token.length() > 0) {
// not expected
tokens.add(token, TOK_TYPE_UNKNOWN);
token = "";
}
inPath = true;
offset += 1;
continue;
}
if (currentChar().equals("[")) {
inRange = true;
token += currentChar();
offset += 1;
continue;
}
if (currentChar().equals("#")) {
if (token.length() > 0) {
// not expected
tokens.add(token, TOK_TYPE_UNKNOWN);
token = "";
}
inError = true;
token += currentChar();
offset += 1;
continue;
}
// mark start and end of arrays and array rows
if (currentChar().equals("{")) {
if (token.length() > 0) {
// not expected
tokens.add(token, TOK_TYPE_UNKNOWN);
token = "";
}
tokenStack.push(tokens.add("ARRAY", TOK_TYPE_FUNCTION, TOK_SUBTYPE_START));
tokenStack.push(tokens.add("ARRAYROW", TOK_TYPE_FUNCTION, TOK_SUBTYPE_START));
offset += 1;
continue;
}
if (currentChar().equals(";")) {
if (token.length() > 0) {
tokens.add(token, TOK_TYPE_OPERAND);
token = "";
}
tokens.addRef(tokenStack.pop());
tokens.add(",", TOK_TYPE_ARGUMENT);
tokenStack.push(tokens.add("ARRAYROW", TOK_TYPE_FUNCTION, TOK_SUBTYPE_START));
offset += 1;
continue;
}
if (currentChar().equals("}")) {
if (token.length() > 0) {
tokens.add(token, TOK_TYPE_OPERAND);
token = "";
}
tokens.addRef(tokenStack.pop());
tokens.addRef(tokenStack.pop());
offset += 1;
continue;
}
// trim white-space
if (currentChar().equals(" ")) {
if (token.length() > 0) {
tokens.add(token, TOK_TYPE_OPERAND);
token = "";
}
tokens.add("", TOK_TYPE_WSPACE);
offset += 1;
while ((currentChar().equals(" ")) && (!EOF())) {
offset += 1;
}
continue;
}
// multi-character comparators
if ((",>=,<=,<>,").indexOf("," + doubleChar() + ",") != -1) {
if (token.length() > 0) {
tokens.add(token, TOK_TYPE_OPERAND);
token = "";
}
tokens.add(doubleChar(), TOK_TYPE_OP_IN, TOK_SUBTYPE_LOGICAL);
offset += 2;
continue;
}
// standard infix operators
if (("+-*/^&=><").indexOf(currentChar()) != -1) {
if (token.length() > 0) {
tokens.add(token, TOK_TYPE_OPERAND);
token = "";
}
tokens.add(currentChar(), TOK_TYPE_OP_IN);
offset += 1;
continue;
}
// standard postfix operators
if (("%").indexOf(currentChar()) != -1) {
if (token.length() > 0) {
tokens.add(token, TOK_TYPE_OPERAND);
token = "";
}
tokens.add(currentChar(), TOK_TYPE_OP_POST);
offset += 1;
continue;
}
// start subexpression or function
if (currentChar().equals("(")) {
if (token.length() > 0) {
tokenStack.push(tokens.add(token, TOK_TYPE_FUNCTION, TOK_SUBTYPE_START));
token = "";
} else {
tokenStack.push(tokens.add("", TOK_TYPE_SUBEXPR, TOK_SUBTYPE_START));
}
offset += 1;
continue;
}
// function, subexpression, array parameters
if (currentChar().equals(",")) {
if (token.length() > 0) {
tokens.add(token, TOK_TYPE_OPERAND);
token = "";
}
if (!(tokenStack.type() == TOK_TYPE_FUNCTION)) {
tokens.add(currentChar(), TOK_TYPE_OP_IN, TOK_SUBTYPE_UNION);
} else {
tokens.add(currentChar(), TOK_TYPE_ARGUMENT);
}
offset += 1;
continue;
}
// stop subexpression
if (currentChar().equals(")")) {
if (token.length() > 0) {
tokens.add(token, TOK_TYPE_OPERAND);
token = "";
}
tokens.addRef(tokenStack.pop());
offset += 1;
continue;
}
// token accumulation
token += currentChar();
offset += 1;
}
// dump remaining accumulation
if (token.length() > 0) tokens.add(token, TOK_TYPE_OPERAND);
// move all tokens to a new collection, excluding all unnecessary white-space tokens
TokenList tokens2 = new TokenList();
FormulaToken tokenObject;
while (tokens.moveNext()) {
tokenObject = tokens.current();
if (tokenObject.type == TOK_TYPE_WSPACE) {
if ((tokens.BOF()) || (tokens.EOF())) {}
else if (!(
((tokens.previous().type == TOK_TYPE_FUNCTION) && (tokens.previous().subtype == TOK_SUBTYPE_STOP)) ||
((tokens.previous().type == TOK_TYPE_SUBEXPR) && (tokens.previous().subtype == TOK_SUBTYPE_STOP)) ||
(tokens.previous().type == TOK_TYPE_OPERAND)
)
) {}
else if (!(
((tokens.next().type == TOK_TYPE_FUNCTION) && (tokens.next().subtype == TOK_SUBTYPE_START)) ||
((tokens.next().type == TOK_TYPE_SUBEXPR) && (tokens.next().subtype == TOK_SUBTYPE_START)) ||
(tokens.next().type == TOK_TYPE_OPERAND)
)
) {}
else
tokens2.add(tokenObject.value, TOK_TYPE_OP_IN, TOK_SUBTYPE_INTERSECT);
continue;
}
tokens2.addRef(tokenObject);
}
// switch infix "-" operator to prefix when appropriate, switch infix "+" operator to noop when appropriate, identify operand
// and infix-operator subtypes, pull "@" from in front of function names
while (tokens2.moveNext()) {
tokenObject = tokens2.current();
if ((tokenObject.type == TOK_TYPE_OP_IN) && tokenObject.value.equals("-")) {
if (tokens2.BOF())
tokenObject.type = TOK_TYPE_OP_PRE;
else if (
((tokens2.previous().type == TOK_TYPE_FUNCTION) && (tokens2.previous().subtype == TOK_SUBTYPE_STOP)) ||
((tokens2.previous().type == TOK_TYPE_SUBEXPR) && (tokens2.previous().subtype == TOK_SUBTYPE_STOP)) ||
(tokens2.previous().type == TOK_TYPE_OP_POST) ||
(tokens2.previous().type == TOK_TYPE_OPERAND)
)
tokenObject.subtype = TOK_SUBTYPE_MATH;
else
tokenObject.type = TOK_TYPE_OP_PRE;
continue;
}
if ((tokenObject.type == TOK_TYPE_OP_IN) && (tokenObject.value.equals( "+"))) {
if (tokens2.BOF())
tokenObject.type = TOK_TYPE_NOOP;
else if (
((tokens2.previous().type == TOK_TYPE_FUNCTION) && (tokens2.previous().subtype == TOK_SUBTYPE_STOP)) ||
((tokens2.previous().type == TOK_TYPE_SUBEXPR) && (tokens2.previous().subtype == TOK_SUBTYPE_STOP)) ||
(tokens2.previous().type == TOK_TYPE_OP_POST) ||
(tokens2.previous().type == TOK_TYPE_OPERAND)
)
tokenObject.subtype = TOK_SUBTYPE_MATH;
else
tokenObject.type = TOK_TYPE_NOOP;
continue;
}
if ((tokenObject.type == TOK_TYPE_OP_IN) && (tokenObject.subtype.length() == 0)) {
if (("<>=").indexOf(tokenObject.value.substring(0, 1)) != -1)
tokenObject.subtype = TOK_SUBTYPE_LOGICAL;
else if (tokenObject.value.equals("&"))
tokenObject.subtype = TOK_SUBTYPE_CONCAT;
else
tokenObject.subtype = TOK_SUBTYPE_MATH;
continue;
}
if ((tokenObject.type == TOK_TYPE_OPERAND) && (tokenObject.subtype.length() == 0)) {
boolean isFloat = true;
try {
Float.
parseFloat(tokenObject.
value); isFloat = false;
}
if (isFloat == false)
if ((tokenObject.value.equalsIgnoreCase("TRUE")) || (tokenObject.value.equalsIgnoreCase("FALSE")))
tokenObject.subtype = TOK_SUBTYPE_LOGICAL;
else
tokenObject.subtype = TOK_SUBTYPE_RANGE;
else
tokenObject.subtype = TOK_SUBTYPE_NUMBER;
continue;
}
if (tokenObject.type == TOK_TYPE_FUNCTION) {
if (tokenObject.value.startsWith("@"))
tokenObject.value = tokenObject.value.substring(1);
continue;
}
}
tokens2.reset();
// move all tokens to a new collection, excluding all noops
tokens = new TokenList();
while (tokens2.moveNext()) {
if (tokens2.current().type != TOK_TYPE_NOOP)
tokens.addRef(tokens2.current());
}
tokens.reset();
return tokens;
}
}