import re
file = """
int main() {
    return 2;
}"""

tokens = ['{','}',r'\(',r'\)',';',"int","return",r'[a-zA-Z]\w*','[0-9]+']
p = re.compile(fr"\s*({'|'.join(tokens)})")

def tokenize(w, pattern):
    index = 0
    m = pattern.match(w, index)
    o = []
    # Although index != m.end() check zero-length match, it's more of
    # a guard against accidental infinite loop.
    # Don't expect a regex which can match empty string to work.
    # See Caveat section.
    while m and index != m.end():
        o.append(m.group(1))
        index = m.end()
        m = pattern.match(w, index)
    return o

print(tokenize(file, p))