fork download
  1. import re
  2. file = """
  3. int main() {
  4. return 2;
  5. }"""
  6.  
  7. tokens = ['{','}',r'\(',r'\)',';',"int","return",r'[a-zA-Z]\w*','[0-9]+']
  8. p = re.compile(fr"\s*({'|'.join(tokens)})")
  9.  
  10. def tokenize(w, pattern):
  11. index = 0
  12. m = pattern.match(w, index)
  13. o = []
  14. # Although index != m.end() check zero-length match, it's more of
  15. # a guard against accidental infinite loop.
  16. # Don't expect a regex which can match empty string to work.
  17. # See Caveat section.
  18. while m and index != m.end():
  19. o.append(m.group(1))
  20. index = m.end()
  21. m = pattern.match(w, index)
  22. return o
  23.  
  24. print(tokenize(file, p))
Success #stdin #stdout 0.03s 9160KB
stdin
Standard input is empty
stdout
['int', 'main', '(', ')', '{', 'return', '2', ';', '}']