import re
file = """*some random text*
...
...
...
tag/delimiter 1
text 1 #extract
text 2 #extract
... #extract
... #extract
text n #extract
tag/ending_delimiter
*some random text*
...
...
...
tag/delimiter 2
text 1 #extract
text 2 #extract
... #extract
... #extract
text n #extract
tag/ending_delimiter
*some random text*
...
...
...
tag/delimiter n
text 1 #extract
text 2 #extract
... #extract
... #extract
text n #extract
tag/ending_delimiter
*some random text until the file ends*"""
data_file = file .splitlines ( )
block = [ ]
found = False
list_of_starting_delimiters = [ 'tag/delimiter' ]
attribute_end = 'tag/ending_delimiter'
curr = [ ]
for elem in list_of_starting_delimiters:
for line in data_file:
if found:
curr.append ( line)
if line.strip ( ) .startswith ( attribute_end) :
found = False
block.append ( "\n " .join ( curr) )
curr = [ ]
else :
if line.strip ( ) .startswith ( elem) :
found = True
curr.append ( line.strip ( ) ) #block = elem
if len ( curr) > 0 :
block.append ( curr)
for b in block:
print ( b)
print ( "---Next block---" )
aW1wb3J0IHJlCmZpbGU9IiIiKnNvbWUgcmFuZG9tIHRleHQqCi4uLgouLi4KLi4uCnRhZy9kZWxpbWl0ZXIgMQp0ZXh0IDEgICAjZXh0cmFjdAp0ZXh0IDIgICAjZXh0cmFjdAouLi4gICAgICAjZXh0cmFjdAouLi4gICAgICAjZXh0cmFjdAp0ZXh0IG4gICAjZXh0cmFjdAp0YWcvZW5kaW5nX2RlbGltaXRlcgoqc29tZSByYW5kb20gdGV4dCoKLi4uCi4uLgouLi4KdGFnL2RlbGltaXRlciAyCnRleHQgMSAgICNleHRyYWN0CnRleHQgMiAgICNleHRyYWN0Ci4uLiAgICAgICNleHRyYWN0Ci4uLiAgICAgICNleHRyYWN0CnRleHQgbiAgICNleHRyYWN0CnRhZy9lbmRpbmdfZGVsaW1pdGVyCipzb21lIHJhbmRvbSB0ZXh0KgouLi4KLi4uCi4uLgp0YWcvZGVsaW1pdGVyIG4KdGV4dCAxICAgI2V4dHJhY3QKdGV4dCAyICAgI2V4dHJhY3QKLi4uICAgICAgI2V4dHJhY3QKLi4uICAgICAgI2V4dHJhY3QKdGV4dCBuICAgI2V4dHJhY3QKdGFnL2VuZGluZ19kZWxpbWl0ZXIKKnNvbWUgcmFuZG9tIHRleHQgdW50aWwgdGhlIGZpbGUgZW5kcyoiIiIKZGF0YV9maWxlID0gZmlsZS5zcGxpdGxpbmVzKCkKYmxvY2sgPSBbXQpmb3VuZCA9IEZhbHNlCmxpc3Rfb2Zfc3RhcnRpbmdfZGVsaW1pdGVycyA9IFsndGFnL2RlbGltaXRlciddCmF0dHJpYnV0ZV9lbmQgPSAndGFnL2VuZGluZ19kZWxpbWl0ZXInCmN1cnIgPSBbXQoKZm9yIGVsZW0gaW4gbGlzdF9vZl9zdGFydGluZ19kZWxpbWl0ZXJzOgogICAgZm9yIGxpbmUgaW4gZGF0YV9maWxlOgogICAgICAgIGlmIGZvdW5kOgogICAgICAgICAgICBjdXJyLmFwcGVuZChsaW5lKQogICAgICAgICAgICBpZiBsaW5lLnN0cmlwKCkuc3RhcnRzd2l0aChhdHRyaWJ1dGVfZW5kKToKICAgICAgICAgICAgICAgIGZvdW5kID0gRmFsc2UKICAgICAgICAgICAgICAgIGJsb2NrLmFwcGVuZCgiXG4iLmpvaW4oY3VycikpCiAgICAgICAgICAgICAgICBjdXJyID0gW10KICAgICAgICBlbHNlOgogICAgICAgICAgICBpZiBsaW5lLnN0cmlwKCkuc3RhcnRzd2l0aChlbGVtKToKICAgICAgICAgICAgICAgIGZvdW5kID0gVHJ1ZQogICAgICAgICAgICAgICAgY3Vyci5hcHBlbmQobGluZS5zdHJpcCgpKSAjYmxvY2sgPSBlbGVtCgppZiBsZW4oY3VycikgPiAwOgoJYmxvY2suYXBwZW5kKGN1cnIpCgkKZm9yIGIgaW4gYmxvY2s6CglwcmludChiKQoJcHJpbnQoIi0tLU5leHQgYmxvY2stLS0iKQoK