import re
regex = r"```.*?```|`.*?`|(<img(?!.*?alt=(['\"]).*?\2)[^>]*)(>)"
test_str = ("```html\n"
"<img src=\"fil.png\">\n"
"```\n\n"
"- [ ] Here is another image <img src=\"fil.png\"> and another `<img src=\"fil.png\">`\n\n"
" ```html\n"
" <a href=\"scratch/index.html\" id=\"scratch\" data-original-title=\"\" title=\"\" aria-describedby=\"popover162945\">\n"
" <div class=\"logo-wrapper\">\n"
" </div>\n"
" <div class=\"name\">\n"
" <span>Scratch</span>\n"
" </div>\n"
" <img src=\"fil.png\">\n"
" </a>\n"
" ```")
matches = re.finditer(regex, test_str, re.DOTALL)
for match in matches:
if match.group(1):
print ("Found at {start}-{end}: {group}".format(start = match.start(1), end = match.end(1), group = match.group(1)))
aW1wb3J0IHJlCnJlZ2V4ID0gciJgYGAuKj9gYGB8YC4qP2B8KDxpbWcoPyEuKj9hbHQ9KFsnXCJdKS4qP1wyKVtePl0qKSg+KSIKdGVzdF9zdHIgPSAoImBgYGh0bWxcbiIKCSI8aW1nIHNyYz1cImZpbC5wbmdcIj5cbiIKCSJgYGBcblxuIgoJIi0gWyBdIEhlcmUgaXMgYW5vdGhlciBpbWFnZSA8aW1nIHNyYz1cImZpbC5wbmdcIj4gYW5kIGFub3RoZXIgYDxpbWcgc3JjPVwiZmlsLnBuZ1wiPmBcblxuIgoJIiAgYGBgaHRtbFxuIgoJIiAgPGEgaHJlZj1cInNjcmF0Y2gvaW5kZXguaHRtbFwiIGlkPVwic2NyYXRjaFwiIGRhdGEtb3JpZ2luYWwtdGl0bGU9XCJcIiB0aXRsZT1cIlwiIGFyaWEtZGVzY3JpYmVkYnk9XCJwb3BvdmVyMTYyOTQ1XCI+XG4iCgkiICAgIDxkaXYgY2xhc3M9XCJsb2dvLXdyYXBwZXJcIj5cbiIKCSIgICAgPC9kaXY+XG4iCgkiICAgIDxkaXYgY2xhc3M9XCJuYW1lXCI+XG4iCgkiICAgICAgPHNwYW4+U2NyYXRjaDwvc3Bhbj5cbiIKCSIgICAgPC9kaXY+XG4iCgkiICAgIDxpbWcgc3JjPVwiZmlsLnBuZ1wiPlxuIgoJIiAgPC9hPlxuIgoJIiAgYGBgIikKCm1hdGNoZXMgPSByZS5maW5kaXRlcihyZWdleCwgdGVzdF9zdHIsIHJlLkRPVEFMTCkKZm9yIG1hdGNoIGluIG1hdGNoZXM6CiAgICBpZiBtYXRjaC5ncm91cCgxKToKICAgICAgICBwcmludCAoIkZvdW5kIGF0IHtzdGFydH0te2VuZH06IHtncm91cH0iLmZvcm1hdChzdGFydCA9IG1hdGNoLnN0YXJ0KDEpLCBlbmQgPSBtYXRjaC5lbmQoMSksIGdyb3VwID0gbWF0Y2guZ3JvdXAoMSkpKQ==