textstream=[
"This ",
"is ",
"a ",
"reference ",
"[", # Notice that anything can be broken across different messages
"r1] ",
"and ",
"here",
"'s ",
"an ",
"image ",
"of ",
"a ",
"cat <", # A more extreme example, breaking messages completely arbitrary
"ima",
"ge",
">a ",
"black ",
"ca",
"t</i",
"mage>.",
]
#charstream=(char for fragment in textstream for char in fragment)
charstream=(char for fragment in textstream for char in (print(f'Input: "{fragment}"'),fragment)[1])
state="text" # state, can switch to "reference" and "tag"
collector="" # temporary storage between state changes
tag=""
for character in charstream:
oldstate=state # 2. (so we recognize state changes)
if character=="[": # 1.
state="reference"
elif character=="]":
state="text"
elif character=="<":
state="tag"
elif character==">":
state="text"
else:
collector+=character
if oldstate!=state and collector: #2.
if oldstate=="tag":
if not tag: # entering tag
tag=collector
else: # exiting tag, this could be validated
tag=""
elif tag:
print(f'{{"{tag}":"{collector}"}}')
else:
print(f'{{"{oldstate}":"{collector}"}}')
collector=""
if collector: # 3.-ish
print(f'{{"{state}":"{collector}"}}') # could be an error if not state is not "text"
dGV4dHN0cmVhbT1bCiAgIlRoaXMgIiwKICAiaXMgIiwKICAiYSAiLAogICJyZWZlcmVuY2UgIiwKICAiWyIsICAjIE5vdGljZSB0aGF0IGFueXRoaW5nIGNhbiBiZSBicm9rZW4gYWNyb3NzIGRpZmZlcmVudCBtZXNzYWdlcwogICJyMV0gIiwKICAiYW5kICIsCiAgImhlcmUiLAogICIncyAiLAogICJhbiAiLAogICJpbWFnZSAiLAogICJvZiAiLAogICJhICIsCiAgImNhdCA8IiwgICMgQSBtb3JlIGV4dHJlbWUgZXhhbXBsZSwgYnJlYWtpbmcgbWVzc2FnZXMgY29tcGxldGVseSBhcmJpdHJhcnkKICAiaW1hIiwKICAiZ2UiLAogICI+YSAiLAogICJibGFjayAiLAogICJjYSIsCiAgInQ8L2kiLAogICJtYWdlPi4iLApdCiNjaGFyc3RyZWFtPShjaGFyIGZvciBmcmFnbWVudCBpbiB0ZXh0c3RyZWFtIGZvciBjaGFyIGluIGZyYWdtZW50KQpjaGFyc3RyZWFtPShjaGFyIGZvciBmcmFnbWVudCBpbiB0ZXh0c3RyZWFtIGZvciBjaGFyIGluIChwcmludChmJ0lucHV0OiAie2ZyYWdtZW50fSInKSxmcmFnbWVudClbMV0pCgpzdGF0ZT0idGV4dCIgICMgc3RhdGUsIGNhbiBzd2l0Y2ggdG8gInJlZmVyZW5jZSIgYW5kICJ0YWciCmNvbGxlY3Rvcj0iIiAgIyB0ZW1wb3Jhcnkgc3RvcmFnZSBiZXR3ZWVuIHN0YXRlIGNoYW5nZXMKdGFnPSIiCgpmb3IgY2hhcmFjdGVyIGluIGNoYXJzdHJlYW06CiAgb2xkc3RhdGU9c3RhdGUgICAgICAgICAgICAgIyAyLiAoc28gd2UgcmVjb2duaXplIHN0YXRlIGNoYW5nZXMpCiAgaWYgY2hhcmFjdGVyPT0iWyI6ICAgICAgICAgIyAxLgogICAgc3RhdGU9InJlZmVyZW5jZSIKICBlbGlmIGNoYXJhY3Rlcj09Il0iOgogICAgc3RhdGU9InRleHQiCiAgZWxpZiBjaGFyYWN0ZXI9PSI8IjoKICAgIHN0YXRlPSJ0YWciCiAgZWxpZiBjaGFyYWN0ZXI9PSI+IjoKICAgIHN0YXRlPSJ0ZXh0IgogIGVsc2U6CiAgICBjb2xsZWN0b3IrPWNoYXJhY3RlcgogIGlmIG9sZHN0YXRlIT1zdGF0ZSBhbmQgY29sbGVjdG9yOiAgICAgICAgICAgIzIuCiAgICBpZiBvbGRzdGF0ZT09InRhZyI6CiAgICAgIGlmIG5vdCB0YWc6ICAjIGVudGVyaW5nIHRhZwogICAgICAgIHRhZz1jb2xsZWN0b3IKICAgICAgZWxzZTogICAgICAgICMgZXhpdGluZyB0YWcsIHRoaXMgY291bGQgYmUgdmFsaWRhdGVkCiAgICAgICAgdGFnPSIiCiAgICBlbGlmIHRhZzoKICAgICAgcHJpbnQoZid7eyJ7dGFnfSI6Intjb2xsZWN0b3J9In19JykKICAgIGVsc2U6CiAgICAgIHByaW50KGYne3sie29sZHN0YXRlfSI6Intjb2xsZWN0b3J9In19JykKICAgIGNvbGxlY3Rvcj0iIgoKaWYgY29sbGVjdG9yOiAgICAgICAgICAgICAgICAjIDMuLWlzaAogIHByaW50KGYne3sie3N0YXRlfSI6Intjb2xsZWN0b3J9In19JykgIyBjb3VsZCBiZSBhbiBlcnJvciBpZiBub3Qgc3RhdGUgaXMgbm90ICJ0ZXh0Igo=