# coding=utf8
# the above tag defines encoding for this document and is for Python 2.x compatibility
import re
regex = r"[A-Z][a-z]+\s+[A-Z][a-z]+"
test_str = ("<html>\n"
"<body>\n"
"<table>\n"
"<tr class=tb1><td>Lorem Ipsum dolor Sit amet</td></tr>\n"
"<tr class=tb1><td>Consectetuer adipiscing elit</td></tr>\n"
"<tr><td>Aliquam Tincidunt mauris eu Risus</td></tr>\n"
"<tr><td>Vestibulum Auctor Dapibus neque</td></tr>\n"
"</table>\n"
"</body>\n"
"</html>\n"
"\"\"\"")
matches = re.finditer(regex, test_str, re.MULTILINE)
for matchNum, match in enumerate(matches):
matchNum = matchNum + 1
print (match.group())
# Note: for Python 2.7 compatibility, use ur"" to prefix the regex and u"" to prefix the test string and substitution.
IyBjb2Rpbmc9dXRmOAojIHRoZSBhYm92ZSB0YWcgZGVmaW5lcyBlbmNvZGluZyBmb3IgdGhpcyBkb2N1bWVudCBhbmQgaXMgZm9yIFB5dGhvbiAyLnggY29tcGF0aWJpbGl0eQoKaW1wb3J0IHJlCgpyZWdleCA9IHIiW0EtWl1bYS16XStccytbQS1aXVthLXpdKyIKCnRlc3Rfc3RyID0gKCI8aHRtbD5cbiIKCSI8Ym9keT5cbiIKCSI8dGFibGU+XG4iCgkiPHRyIGNsYXNzPXRiMT48dGQ+TG9yZW0gSXBzdW0gZG9sb3IgU2l0IGFtZXQ8L3RkPjwvdHI+XG4iCgkiPHRyIGNsYXNzPXRiMT48dGQ+Q29uc2VjdGV0dWVyIGFkaXBpc2NpbmcgZWxpdDwvdGQ+PC90cj5cbiIKCSI8dHI+PHRkPkFsaXF1YW0gVGluY2lkdW50IG1hdXJpcyBldSBSaXN1czwvdGQ+PC90cj5cbiIKCSI8dHI+PHRkPlZlc3RpYnVsdW0gQXVjdG9yIERhcGlidXMgbmVxdWU8L3RkPjwvdHI+XG4iCgkiPC90YWJsZT5cbiIKCSI8L2JvZHk+XG4iCgkiPC9odG1sPlxuIgoJIlwiXCJcIiIpCgptYXRjaGVzID0gcmUuZmluZGl0ZXIocmVnZXgsIHRlc3Rfc3RyLCByZS5NVUxUSUxJTkUpCgpmb3IgbWF0Y2hOdW0sIG1hdGNoIGluIGVudW1lcmF0ZShtYXRjaGVzKToKICAgIG1hdGNoTnVtID0gbWF0Y2hOdW0gKyAxCiAgICAKICAgIHByaW50IChtYXRjaC5ncm91cCgpKQogICAgICAgIAojIE5vdGU6IGZvciBQeXRob24gMi43IGNvbXBhdGliaWxpdHksIHVzZSB1ciIiIHRvIHByZWZpeCB0aGUgcmVnZXggYW5kIHUiIiB0byBwcmVmaXggdGhlIHRlc3Qgc3RyaW5nIGFuZCBzdWJzdGl0dXRpb24u