import re
import nltk
input_program = input("Enter Your Code: ");
input_program_tokens = nltk.wordpunct_tokenize(input_program);
print(input_program_tokens);
RE_Keywords
= "auto
|break
|case
|char
|const
|continue
|default
|do
|double
|else
|enum
|extern
|float|for
|goto
|if
|int
|long
|register
|return
|short
|signed
|sizeof
|static
|struct
|switch
|typedef
|union
|unsigned
|void
|volatile
|while
|string
|class
|struc
|include"
RE_Operators = "(\++)|(-)|(=)|(\*)|(/)|(%)|(--)|(<=)|(>=)"
RE_Numerals = "^(\d+)$"
RE_Special_Characters = "[\[@&~!#$\^\|{}\]:;<>?,\.']|\(\)|\(|\)|{}|\[\]|\""
RE_Identifiers = "^[a-zA-Z_]+[a-zA-Z0-9_]*"
RE_Headers = "([a-zA-Z]+\.[h])"
#To Categorize The Tokens
for token in input_program_tokens:
if(re.findall(RE_Keywords,token)):
print(token , "-------> Keyword")
elif(re.findall(RE_Operators,token)):
print(token, "-------> Operator")
elif(re.findall(RE_Numerals,token)):
print(token, "-------> Numeral")
elif(re.findall(RE_Special_Characters,token)):
print(token, "-------> Special Character/Symbol")
elif(re.findall(RE_Identifiers,token)):
print(token, "-------> Identifiers")
else:
print("Unknown Value")
aW1wb3J0IHJlCgppbXBvcnQgbmx0awoKaW5wdXRfcHJvZ3JhbSA9IGlucHV0KCJFbnRlciBZb3VyIENvZGU6ICIpOwppbnB1dF9wcm9ncmFtX3Rva2VucyA9IG5sdGsud29yZHB1bmN0X3Rva2VuaXplKGlucHV0X3Byb2dyYW0pOwoKcHJpbnQoaW5wdXRfcHJvZ3JhbV90b2tlbnMpOwoKClJFX0tleXdvcmRzID0gImF1dG98YnJlYWt8Y2FzZXxjaGFyfGNvbnN0fGNvbnRpbnVlfGRlZmF1bHR8ZG98ZG91YmxlfGVsc2V8ZW51bXxleHRlcm58ZmxvYXR8Zm9yfGdvdG98aWZ8aW50fGxvbmd8cmVnaXN0ZXJ8cmV0dXJufHNob3J0fHNpZ25lZHxzaXplb2Z8c3RhdGljfHN0cnVjdHxzd2l0Y2h8dHlwZWRlZnx1bmlvbnx1bnNpZ25lZHx2b2lkfHZvbGF0aWxlfHdoaWxlfHN0cmluZ3xjbGFzc3xzdHJ1Y3xpbmNsdWRlIgpSRV9PcGVyYXRvcnMgPSAiKFwrKyl8KC0pfCg9KXwoXCopfCgvKXwoJSl8KC0tKXwoPD0pfCg+PSkiClJFX051bWVyYWxzID0gIl4oXGQrKSQiClJFX1NwZWNpYWxfQ2hhcmFjdGVycyA9ICJbXFtAJn4hIyRcXlx8e31cXTo7PD4/LFwuJ118XChcKXxcKHxcKXx7fXxcW1xdfFwiIgpSRV9JZGVudGlmaWVycyA9ICJeW2EtekEtWl9dK1thLXpBLVowLTlfXSoiClJFX0hlYWRlcnMgPSAiKFthLXpBLVpdK1wuW2hdKSIKCgojVG8gQ2F0ZWdvcml6ZSBUaGUgVG9rZW5zCgpmb3IgdG9rZW4gaW4gaW5wdXRfcHJvZ3JhbV90b2tlbnM6CiAgICBpZihyZS5maW5kYWxsKFJFX0tleXdvcmRzLHRva2VuKSk6CiAgICAgICAgcHJpbnQodG9rZW4gLCAiLS0tLS0tLT4gS2V5d29yZCIpCiAgICBlbGlmKHJlLmZpbmRhbGwoUkVfT3BlcmF0b3JzLHRva2VuKSk6CiAgICAgICAgcHJpbnQodG9rZW4sICItLS0tLS0tPiBPcGVyYXRvciIpCiAgICBlbGlmKHJlLmZpbmRhbGwoUkVfTnVtZXJhbHMsdG9rZW4pKToKICAgICAgICBwcmludCh0b2tlbiwgIi0tLS0tLS0+IE51bWVyYWwiKQogICAgZWxpZihyZS5maW5kYWxsKFJFX1NwZWNpYWxfQ2hhcmFjdGVycyx0b2tlbikpOgogICAgICAgIHByaW50KHRva2VuLCAiLS0tLS0tLT4gU3BlY2lhbCBDaGFyYWN0ZXIvU3ltYm9sIikKICAgIGVsaWYocmUuZmluZGFsbChSRV9JZGVudGlmaWVycyx0b2tlbikpOgogICAgICAgIHByaW50KHRva2VuLCAiLS0tLS0tLT4gSWRlbnRpZmllcnMiKQogICAgZWxzZToKICAgICAgICBwcmludCgiVW5rbm93biBWYWx1ZSIpCg==