import re, sys, unicodedata
s = "test1 this is a sample subscript o₁"
No = [chr(i) for i in range(sys.maxunicode) if unicodedata.category(chr(i)) == 'No']
print([x for x in re.findall(r'\b[^\W\d_]{2,}\b', s) if not any(y in x for y in No)])
aW1wb3J0IHJlLCBzeXMsIHVuaWNvZGVkYXRhCnMgPSAidGVzdDEgdGhpcyBpcyBhIHNhbXBsZSBzdWJzY3JpcHQgb+KCgSIKTm8gPSBbY2hyKGkpIGZvciBpIGluIHJhbmdlKHN5cy5tYXh1bmljb2RlKSBpZiB1bmljb2RlZGF0YS5jYXRlZ29yeShjaHIoaSkpID09ICdObyddCnByaW50KFt4IGZvciB4IGluIHJlLmZpbmRhbGwocidcYlteXFdcZF9dezIsfVxiJywgcykgaWYgbm90IGFueSh5IGluIHggZm9yIHkgaW4gTm8pXSk=