fork download
  1. import java.util.regex.Matcher;
  2. import java.util.regex.Pattern;
  3.  
  4. public class Lexer {
  5.  
  6. public static class Token
  7. {
  8. public int tokenNumber;
  9. public String tokenValue;
  10.  
  11. public Token(int tokenNumber, String tokenValue)
  12. {
  13. this.tokenNumber = tokenNumber;
  14. this.tokenValue = tokenValue;
  15. }
  16. }
  17.  
  18. public static int WHITESPACE = 1; // group 1
  19. public static int PUNCTUATION = 2; // group 2 etc.
  20. public static int LPAREN = 3;
  21. public static int RPAREN = 4;
  22. public static int KEYWORD = 5;
  23. public static int IDENTIFIER = 6;
  24. public static int NUMBER = 7;
  25. public static int SEMICOLON = 8;
  26. public static int ERROR = 9;
  27. public static int EOF = 10;
  28.  
  29. Matcher m;
  30. String text;
  31. boolean skipError;
  32.  
  33.  
  34. public static void main(String[] args) {
  35. Lexer lexer = new Lexer("tcu else i34 !!!! 2983 ( + +eqdQ!!!!"); // With some error characters "!" thrown in the middle and at the end
  36. for(;;) {
  37. Token token = lexer.next();
  38. System.out.println(token.tokenNumber + ": " + token.tokenValue);
  39. if (token.tokenNumber == EOF)
  40. break;
  41. }
  42. }
  43.  
  44. public Lexer(String text)
  45. {
  46.  
  47. String _WHITESPACE = "(\\s+)";
  48. String _PUNCTUATION = "((?:[+*/-]|:=))";
  49. String _LPAREN = "(\\()";
  50. String _RPAREN = "(\\))";
  51. String _KEYWORD = "(if|then|else|endif|while|do|endwhile|skip)";
  52. String _IDENTIFIER = "([a-zA-Z][0-9a-zA-Z]*)";
  53. String _NUMBER = "([0-9)]+)";
  54. String _SEMICOLON = "(;)";
  55. String _ERROR = "(.)"; // must be last and able to capture one character
  56.  
  57. String regex = String.join("|", _WHITESPACE, _PUNCTUATION, _LPAREN, _RPAREN, _KEYWORD, _IDENTIFIER, _NUMBER, _SEMICOLON, _ERROR);
  58.  
  59. Pattern p = Pattern.compile(regex);
  60. this.text = text;
  61. m = p.matcher(this.text);
  62. skipError = false;
  63. }
  64.  
  65. public Token next()
  66. {
  67. Token token = null;
  68. for(;;) {
  69. if (!m.find())
  70. return new Token(EOF, "<EOF>");
  71. for (int tokenNumber = 1; tokenNumber <= 9; tokenNumber++) {
  72. String tokenValue = m.group(tokenNumber);
  73. if (tokenValue != null) {
  74. token = new Token(tokenNumber, tokenValue);
  75. break;
  76. }
  77. }
  78. if (token.tokenNumber == ERROR) {
  79. if (!skipError) {
  80. skipError = true; // we don't want successive errors
  81. return token;
  82. }
  83. }
  84. else {
  85. skipError = false;
  86. if (token.tokenNumber != WHITESPACE)
  87. return token;
  88. }
  89. }
  90. }
  91.  
  92. }
Success #stdin #stdout 0.16s 36680KB
stdin
Standard input is empty
stdout
6: tcu
5: else
6: i34
9: !
7: 2983
3: (
2: +
2: +
6: eqdQ
9: !
10: <EOF>