import re
subtitle_match_count = 0
rx = r"""
(^[0-9]{2}:[0-9]{2}:[0-9]{2}[.,][0-9]{3}) # match TC-IN in group1
[ ]-->[ ] # VTT/SRT style TC-IN--TC-OUT separator
([0-9]{2}:[0-9]{2}:[0-9]{2}[.,][0-9]{3}) # match TC-OUT n group2
(.*)\n # additional VTT info (like) alignment
([\s \S ]*?)\s *(?:\n \n |\Z ) # subtitle_content
"""
s = "WEBVTT\n \n 00:00:00.440 --> 00:00:02.320 align:middle line:-1\n Hi.\n \n 00:00:03.440 --> 00:00:07.520 align:middle line:-1\n This subtitle has one line.\n \n 00:00:09.240 --> 00:00:11.080 align:middle line:-2\n This subtitle has\n two lines.\n \n 00:00:15.240 --> 00:00:23.960 align:middle line:-4\n Now...\n Let's try\n four...\n lines...\n \n 00:00:24.080 --> 00:00:27.080 align:middle"
matches = re .finditer ( rx, s, re .VERBOSE | re .MULTILINE )
for match in matches:
subtitle_match_count += 1
group1, group2, group3, group4 = match.groups ( )
tc_in = group1.strip ( )
tc_out = group2.strip ( )
vtt_extra_info = group3
subtitle_content = group4
print "*** subtitle match count: %d ***" % subtitle_match_count
print "TIMECODE IN" .ljust ( 20 ) , tc_in
print "TIMECODE OUT" .ljust ( 20 ) , tc_out
print "ALIGN" .ljust ( 20 ) , vtt_extra_info.strip ( )
print "SUBTITLE CONTENT" .ljust ( 20 ) , subtitle_content
print
aW1wb3J0IHJlCgpzdWJ0aXRsZV9tYXRjaF9jb3VudCA9IDAKcnggPSByIiIiCgkoXlswLTldezJ9OlswLTldezJ9OlswLTldezJ9Wy4sXVswLTldezN9KSAgICMgbWF0Y2ggVEMtSU4gaW4gZ3JvdXAxCglbIF0tLT5bIF0gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjIFZUVC9TUlQgc3R5bGUgVEMtSU4tLVRDLU9VVCBzZXBhcmF0b3IKCShbMC05XXsyfTpbMC05XXsyfTpbMC05XXsyfVsuLF1bMC05XXszfSkgICAgIyBtYXRjaCBUQy1PVVQgbiBncm91cDIKCSguKilcbiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMgYWRkaXRpb25hbCBWVFQgaW5mbyAobGlrZSkgYWxpZ25tZW50CgkoW1xzXFNdKj8pXHMqKD86XG5cbnxcWikgICAgICAgICAgICAgICAgIyBzdWJ0aXRsZV9jb250ZW50CgkiIiIKCnMgPSAiV0VCVlRUXG5cbjAwOjAwOjAwLjQ0MCAtLT4gMDA6MDA6MDIuMzIwIGFsaWduOm1pZGRsZSBsaW5lOi0xXG5IaS5cblxuMDA6MDA6MDMuNDQwIC0tPiAwMDowMDowNy41MjAgYWxpZ246bWlkZGxlIGxpbmU6LTFcblRoaXMgc3VidGl0bGUgaGFzIG9uZSBsaW5lLlxuXG4wMDowMDowOS4yNDAgLS0+IDAwOjAwOjExLjA4MCBhbGlnbjptaWRkbGUgbGluZTotMlxuVGhpcyBzdWJ0aXRsZSBoYXNcbnR3byBsaW5lcy5cblxuMDA6MDA6MTUuMjQwIC0tPiAwMDowMDoyMy45NjAgYWxpZ246bWlkZGxlIGxpbmU6LTRcbk5vdy4uLlxuTGV0J3MgdHJ5XG5mb3VyLi4uXG5saW5lcy4uLlxuXG4wMDowMDoyNC4wODAgLS0+IDAwOjAwOjI3LjA4MCBhbGlnbjptaWRkbGUiCgptYXRjaGVzID0gcmUuZmluZGl0ZXIocngsIHMsIHJlLlZFUkJPU0UgfCByZS5NVUxUSUxJTkUpCmZvciBtYXRjaCBpbiBtYXRjaGVzOgogICAgc3VidGl0bGVfbWF0Y2hfY291bnQgKz0gMQogICAgZ3JvdXAxLCBncm91cDIsIGdyb3VwMywgZ3JvdXA0ID0gbWF0Y2guZ3JvdXBzKCkKICAgIHRjX2luID0gZ3JvdXAxLnN0cmlwKCkKICAgIHRjX291dCA9IGdyb3VwMi5zdHJpcCgpCiAgICB2dHRfZXh0cmFfaW5mbyA9IGdyb3VwMwogICAgc3VidGl0bGVfY29udGVudCA9IGdyb3VwNAogICAgcHJpbnQgIioqKiBzdWJ0aXRsZSBtYXRjaCBjb3VudDogJWQgKioqIiAlIHN1YnRpdGxlX21hdGNoX2NvdW50CiAgICBwcmludCAiVElNRUNPREUgSU4iLmxqdXN0KDIwKSwgdGNfaW4KICAgIHByaW50ICJUSU1FQ09ERSBPVVQiLmxqdXN0KDIwKSwgdGNfb3V0CiAgICBwcmludCAiQUxJR04iLmxqdXN0KDIwKSwgdnR0X2V4dHJhX2luZm8uc3RyaXAoKQogICAgcHJpbnQgIlNVQlRJVExFIENPTlRFTlQiLmxqdXN0KDIwKSwgc3VidGl0bGVfY29udGVudAogICAgcHJpbnQ=