import re
file = """
int main() {
return 2;
}"""
tokens = ['{','}',r'\(',r'\)',';',"int","return",r'[a-zA-Z]\w*','[0-9]+']
p = re.compile(fr"\s*({'|'.join(tokens)})")
def tokenize(w, pattern):
index = 0
m = pattern.match(w, index)
o = []
# Although index != m.end() check zero-length match, it's more of
# a guard against accidental infinite loop.
# Don't expect a regex which can match empty string to work.
# See Caveat section.
while m and index != m.end():
o.append(m.group(1))
index = m.end()
m = pattern.match(w, index)
return o
print(tokenize(file, p))
aW1wb3J0IHJlCmZpbGUgPSAiIiIKaW50IG1haW4oKSB7CiAgICByZXR1cm4gMjsKfSIiIgoKdG9rZW5zID0gWyd7JywnfScscidcKCcscidcKScsJzsnLCJpbnQiLCJyZXR1cm4iLHInW2EtekEtWl1cdyonLCdbMC05XSsnXQpwID0gcmUuY29tcGlsZShmciJccyooeyd8Jy5qb2luKHRva2Vucyl9KSIpCgpkZWYgdG9rZW5pemUodywgcGF0dGVybik6CiAgICBpbmRleCA9IDAKICAgIG0gPSBwYXR0ZXJuLm1hdGNoKHcsIGluZGV4KQogICAgbyA9IFtdCiAgICAjIEFsdGhvdWdoIGluZGV4ICE9IG0uZW5kKCkgY2hlY2sgemVyby1sZW5ndGggbWF0Y2gsIGl0J3MgbW9yZSBvZgogICAgIyBhIGd1YXJkIGFnYWluc3QgYWNjaWRlbnRhbCBpbmZpbml0ZSBsb29wLgogICAgIyBEb24ndCBleHBlY3QgYSByZWdleCB3aGljaCBjYW4gbWF0Y2ggZW1wdHkgc3RyaW5nIHRvIHdvcmsuCiAgICAjIFNlZSBDYXZlYXQgc2VjdGlvbi4KICAgIHdoaWxlIG0gYW5kIGluZGV4ICE9IG0uZW5kKCk6CiAgICAgICAgby5hcHBlbmQobS5ncm91cCgxKSkKICAgICAgICBpbmRleCA9IG0uZW5kKCkKICAgICAgICBtID0gcGF0dGVybi5tYXRjaCh3LCBpbmRleCkKICAgIHJldHVybiBvCgpwcmludCh0b2tlbml6ZShmaWxlLCBwKSk=