import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Lexer {
public static class Token
{
public int tokenNumber;
public Token
(int tokenNumber,
String tokenValue
) {
this.tokenNumber = tokenNumber;
this.tokenValue = tokenValue;
}
}
public static int WHITESPACE = 1; // group 1
public static int PUNCTUATION = 2; // group 2 etc.
public static int LPAREN = 3;
public static int RPAREN = 4;
public static int KEYWORD = 5;
public static int IDENTIFIER = 6;
public static int NUMBER = 7;
public static int SEMICOLON = 8;
public static int ERROR = 9;
public static int EOF = 10;
Matcher m;
boolean skipError;
public static void main
(String[] args
) { Lexer lexer = new Lexer("tcu else i34 !!!! 2983 ( + +eqdQ!!!!"); // With some error characters "!" thrown in the middle and at the end
for(;;) {
Token token = lexer.next();
System.
out.
println(token.
tokenNumber + ": " + token.
tokenValue); if (token.tokenNumber == EOF)
break;
}
}
{
String _WHITESPACE
= "(\\s+)"; String _PUNCTUATION
= "((?:[+*/-]|:=))"; String _KEYWORD
= "(if|then|else|endif|while|do|endwhile|skip)"; String _IDENTIFIER
= "([a-zA-Z][0-9a-zA-Z]*)"; String _ERROR
= "(.)"; // must be last and able to capture one character
String regex
= String.
join("|", _WHITESPACE, _PUNCTUATION, _LPAREN, _RPAREN, _KEYWORD, _IDENTIFIER, _NUMBER, _SEMICOLON, _ERROR
);
Pattern p = Pattern.compile(regex);
this.text = text;
m = p.matcher(this.text);
skipError = false;
}
public Token next()
{
Token token = null;
for(;;) {
if (!m.find())
return new Token(EOF, "<EOF>");
for (int tokenNumber = 1; tokenNumber <= 9; tokenNumber++) {
String tokenValue
= m.
group(tokenNumber
); if (tokenValue != null) {
token = new Token(tokenNumber, tokenValue);
break;
}
}
if (token.tokenNumber == ERROR) {
if (!skipError) {
skipError = true; // we don't want successive errors
return token;
}
}
else {
skipError = false;
if (token.tokenNumber != WHITESPACE)
return token;
}
}
}
}
aW1wb3J0IGphdmEudXRpbC5yZWdleC5NYXRjaGVyOwppbXBvcnQgamF2YS51dGlsLnJlZ2V4LlBhdHRlcm47CgpwdWJsaWMgY2xhc3MgTGV4ZXIgewoKCXB1YmxpYyBzdGF0aWMgY2xhc3MgVG9rZW4KCXsKCQlwdWJsaWMgaW50IHRva2VuTnVtYmVyOwoJCXB1YmxpYyBTdHJpbmcgdG9rZW5WYWx1ZTsKCgkJcHVibGljIFRva2VuKGludCB0b2tlbk51bWJlciwgU3RyaW5nIHRva2VuVmFsdWUpCgkJewoJCQl0aGlzLnRva2VuTnVtYmVyID0gdG9rZW5OdW1iZXI7CgkJCXRoaXMudG9rZW5WYWx1ZSA9IHRva2VuVmFsdWU7CgkJfQoJfQoKICAgIHB1YmxpYyBzdGF0aWMgaW50IFdISVRFU1BBQ0UgPSAxOyAvLyBncm91cCAxCiAgICBwdWJsaWMgc3RhdGljIGludCBQVU5DVFVBVElPTiA9IDI7IC8vIGdyb3VwIDIgZXRjLgogICAgcHVibGljIHN0YXRpYyBpbnQgTFBBUkVOID0gMzsKICAgIHB1YmxpYyBzdGF0aWMgaW50IFJQQVJFTiA9IDQ7CiAgICBwdWJsaWMgc3RhdGljIGludCBLRVlXT1JEID0gNTsKICAgIHB1YmxpYyBzdGF0aWMgaW50IElERU5USUZJRVIgPSA2OwogICAgcHVibGljIHN0YXRpYyBpbnQgTlVNQkVSID0gNzsKICAgIHB1YmxpYyBzdGF0aWMgaW50IFNFTUlDT0xPTiA9IDg7CiAgICBwdWJsaWMgc3RhdGljIGludCBFUlJPUiA9IDk7CiAgICBwdWJsaWMgc3RhdGljIGludCBFT0YgPSAxMDsKCiAgICBNYXRjaGVyIG07CiAgICBTdHJpbmcgdGV4dDsKICAgIGJvb2xlYW4gc2tpcEVycm9yOwoKCiAgICBwdWJsaWMgc3RhdGljIHZvaWQgbWFpbihTdHJpbmdbXSBhcmdzKSB7CiAgICAJTGV4ZXIgbGV4ZXIgPSBuZXcgTGV4ZXIoInRjdSBlbHNlIGkzNCAhISEhIDI5ODMgKCArICtlcWRRISEhISIpOyAvLyBXaXRoIHNvbWUgZXJyb3IgY2hhcmFjdGVycyAiISIgdGhyb3duIGluIHRoZSBtaWRkbGUgYW5kIGF0IHRoZSBlbmQKICAgIAlmb3IoOzspIHsKICAgIAkJVG9rZW4gdG9rZW4gPSBsZXhlci5uZXh0KCk7CiAgICAJCVN5c3RlbS5vdXQucHJpbnRsbih0b2tlbi50b2tlbk51bWJlciArICI6ICIgKyB0b2tlbi50b2tlblZhbHVlKTsKICAgIAkJaWYgKHRva2VuLnRva2VuTnVtYmVyID09IEVPRikKICAgIAkJCWJyZWFrOwogICAgCX0KICAgIH0KCiAgICBwdWJsaWMgTGV4ZXIoU3RyaW5nIHRleHQpCiAgICB7CgogICAgICAgIFN0cmluZyBfV0hJVEVTUEFDRSA9ICIoXFxzKykiOwogICAgICAgIFN0cmluZyBfUFVOQ1RVQVRJT04gPSAiKCg/OlsrKi8tXXw6PSkpIjsKICAgICAgICBTdHJpbmcgX0xQQVJFTiA9ICIoXFwoKSI7CiAgICAgICAgU3RyaW5nIF9SUEFSRU4gPSAiKFxcKSkiOwogICAgICAgIFN0cmluZyBfS0VZV09SRCA9ICIoaWZ8dGhlbnxlbHNlfGVuZGlmfHdoaWxlfGRvfGVuZHdoaWxlfHNraXApIjsKICAgICAgICBTdHJpbmcgX0lERU5USUZJRVIgPSAiKFthLXpBLVpdWzAtOWEtekEtWl0qKSI7CiAgICAgICAgU3RyaW5nIF9OVU1CRVIgPSAiKFswLTkpXSspIjsKICAgICAgICBTdHJpbmcgX1NFTUlDT0xPTiA9ICIoOykiOwoJCVN0cmluZyBfRVJST1IgPSAiKC4pIjsgLy8gbXVzdCBiZSBsYXN0IGFuZCBhYmxlIHRvIGNhcHR1cmUgb25lIGNoYXJhY3RlcgoKICAgICAgICBTdHJpbmcgcmVnZXggPSBTdHJpbmcuam9pbigifCIsIF9XSElURVNQQUNFLCBfUFVOQ1RVQVRJT04sIF9MUEFSRU4sIF9SUEFSRU4sIF9LRVlXT1JELCBfSURFTlRJRklFUiwgX05VTUJFUiwgX1NFTUlDT0xPTiwgX0VSUk9SKTsKCiAgICAgICAgUGF0dGVybiBwID0gUGF0dGVybi5jb21waWxlKHJlZ2V4KTsKCQl0aGlzLnRleHQgPSB0ZXh0OwogICAgICAgIG0gPSBwLm1hdGNoZXIodGhpcy50ZXh0KTsKCQlza2lwRXJyb3IgPSBmYWxzZTsKICAgIH0KCiAgICBwdWJsaWMgVG9rZW4gbmV4dCgpCiAgICB7CiAgICAgICAgVG9rZW4gdG9rZW4gPSBudWxsOwogICAgCWZvcig7OykgewogICAgICAgICAgICBpZiAoIW0uZmluZCgpKQogICAgICAgICAgICAgICAgcmV0dXJuIG5ldyBUb2tlbihFT0YsICI8RU9GPiIpOwoJCQlmb3IgKGludCB0b2tlbk51bWJlciA9IDE7IHRva2VuTnVtYmVyIDw9IDk7IHRva2VuTnVtYmVyKyspIHsKCQkJCVN0cmluZyB0b2tlblZhbHVlID0gbS5ncm91cCh0b2tlbk51bWJlcik7CgkJCQlpZiAodG9rZW5WYWx1ZSAhPSBudWxsKSB7CgkJCQkJdG9rZW4gPSBuZXcgVG9rZW4odG9rZW5OdW1iZXIsIHRva2VuVmFsdWUpOwoJCQkJCWJyZWFrOwoJCQkJfQoJCQl9CgkJCWlmICh0b2tlbi50b2tlbk51bWJlciA9PSBFUlJPUikgewoJCQkJaWYgKCFza2lwRXJyb3IpIHsKCQkJCQlza2lwRXJyb3IgPSB0cnVlOyAvLyB3ZSBkb24ndCB3YW50IHN1Y2Nlc3NpdmUgZXJyb3JzCgkJCQkJcmV0dXJuIHRva2VuOwoJCQkJfQoJCQl9CgkJCWVsc2UgewoJCQkJc2tpcEVycm9yID0gZmFsc2U7CgkJCQlpZiAodG9rZW4udG9rZW5OdW1iZXIgIT0gV0hJVEVTUEFDRSkKCQkJCQlyZXR1cm4gdG9rZW47CgkJCX0KICAgICAgICB9CiAgICB9Cgp9