import re, string
tokenized_doc1 = [['The', 'intelligent', 'directory', 'enquiry', 'assistant', '(', 'YPA', ')', 'project', 'is', 'an', 'example', '(', 'going', 'back', 'quite', 'a', 'few', 'years', 'now', '...', ')', 'where', 'the', 'extraction', 'of', 'information', 'from', 'partially', 'structured', 'data', 'together', 'with', 'engineering', 'issues', 'played', 'major', 'roles', 'in', 'making', 'the', 'YPA', 'a', 'usable', 'online', 'system', '.'], ['I', 'am', 'developing', 'techniques', 'that', 'allow', 'the', 'extraction', 'of', 'conceptual', 'information', 'from', 'document', 'collections', 'and', 'the', 'utilization', 'of', 'such', 'knowledge', 'in', 'retrieval', 'tasks', '.'], ['The', 'type', 'of', 'documents', 'can', 'range', 'from', 'Web', 'pages', 'to', 'newspaper', 'articles', 'or', 'other', 'forms', 'of', 'vaguely/partially', 'structured', 'data', '.']]
punct_rx = re.compile('[%s]' % re.escape(string.punctuation.replace(".", "").replace("-", "")))
token_without_punctuation = []
for x in tokenized_doc1:
y = []
for token in x:
tokens = punct_rx.sub("", token)
y.append(tokens)
token_without_punctuation.append(y)
print(token_without_punctuation)
aW1wb3J0IHJlLCBzdHJpbmcKCnRva2VuaXplZF9kb2MxID0gW1snVGhlJywgJ2ludGVsbGlnZW50JywgJ2RpcmVjdG9yeScsICdlbnF1aXJ5JywgJ2Fzc2lzdGFudCcsICcoJywgJ1lQQScsICcpJywgJ3Byb2plY3QnLCAnaXMnLCAnYW4nLCAnZXhhbXBsZScsICcoJywgJ2dvaW5nJywgJ2JhY2snLCAncXVpdGUnLCAnYScsICdmZXcnLCAneWVhcnMnLCAnbm93JywgJy4uLicsICcpJywgJ3doZXJlJywgJ3RoZScsICdleHRyYWN0aW9uJywgJ29mJywgJ2luZm9ybWF0aW9uJywgJ2Zyb20nLCAncGFydGlhbGx5JywgJ3N0cnVjdHVyZWQnLCAnZGF0YScsICd0b2dldGhlcicsICd3aXRoJywgJ2VuZ2luZWVyaW5nJywgJ2lzc3VlcycsICdwbGF5ZWQnLCAnbWFqb3InLCAncm9sZXMnLCAnaW4nLCAnbWFraW5nJywgJ3RoZScsICdZUEEnLCAnYScsICd1c2FibGUnLCAnb25saW5lJywgJ3N5c3RlbScsICcuJ10sIFsnSScsICdhbScsICdkZXZlbG9waW5nJywgJ3RlY2huaXF1ZXMnLCAndGhhdCcsICdhbGxvdycsICd0aGUnLCAnZXh0cmFjdGlvbicsICdvZicsICdjb25jZXB0dWFsJywgJ2luZm9ybWF0aW9uJywgJ2Zyb20nLCAnZG9jdW1lbnQnLCAnY29sbGVjdGlvbnMnLCAnYW5kJywgJ3RoZScsICd1dGlsaXphdGlvbicsICdvZicsICdzdWNoJywgJ2tub3dsZWRnZScsICdpbicsICdyZXRyaWV2YWwnLCAndGFza3MnLCAnLiddLCBbJ1RoZScsICd0eXBlJywgJ29mJywgJ2RvY3VtZW50cycsICdjYW4nLCAncmFuZ2UnLCAnZnJvbScsICdXZWInLCAncGFnZXMnLCAndG8nLCAnbmV3c3BhcGVyJywgJ2FydGljbGVzJywgJ29yJywgJ290aGVyJywgJ2Zvcm1zJywgJ29mJywgJ3ZhZ3VlbHkvcGFydGlhbGx5JywgJ3N0cnVjdHVyZWQnLCAnZGF0YScsICcuJ11dCnB1bmN0X3J4ID0gcmUuY29tcGlsZSgnWyVzXScgJSByZS5lc2NhcGUoc3RyaW5nLnB1bmN0dWF0aW9uLnJlcGxhY2UoIi4iLCAiIikucmVwbGFjZSgiLSIsICIiKSkpCnRva2VuX3dpdGhvdXRfcHVuY3R1YXRpb24gPSBbXQoKZm9yIHggaW4gdG9rZW5pemVkX2RvYzE6CiAgICB5ID0gW10KICAgIGZvciB0b2tlbiBpbiB4OgogICAgICAgIHRva2VucyA9IHB1bmN0X3J4LnN1YigiIiwgdG9rZW4pCiAgICAgICAgeS5hcHBlbmQodG9rZW5zKQogICAgdG9rZW5fd2l0aG91dF9wdW5jdHVhdGlvbi5hcHBlbmQoeSkKCnByaW50KHRva2VuX3dpdGhvdXRfcHVuY3R1YXRpb24p
[['The', 'intelligent', 'directory', 'enquiry', 'assistant', '', 'YPA', '', 'project', 'is', 'an', 'example', '', 'going', 'back', 'quite', 'a', 'few', 'years', 'now', '...', '', 'where', 'the', 'extraction', 'of', 'information', 'from', 'partially', 'structured', 'data', 'together', 'with', 'engineering', 'issues', 'played', 'major', 'roles', 'in', 'making', 'the', 'YPA', 'a', 'usable', 'online', 'system', '.'], ['I', 'am', 'developing', 'techniques', 'that', 'allow', 'the', 'extraction', 'of', 'conceptual', 'information', 'from', 'document', 'collections', 'and', 'the', 'utilization', 'of', 'such', 'knowledge', 'in', 'retrieval', 'tasks', '.'], ['The', 'type', 'of', 'documents', 'can', 'range', 'from', 'Web', 'pages', 'to', 'newspaper', 'articles', 'or', 'other', 'forms', 'of', 'vaguelypartially', 'structured', 'data', '.']]