fork download
  1. textstream=[
  2. "This ",
  3. "is ",
  4. "a ",
  5. "reference ",
  6. "[", # Notice that anything can be broken across different messages
  7. "r1] ",
  8. "and ",
  9. "here",
  10. "'s ",
  11. "an ",
  12. "image ",
  13. "of ",
  14. "a ",
  15. "cat <", # A more extreme example, breaking messages completely arbitrary
  16. "ima",
  17. "ge",
  18. ">a ",
  19. "black ",
  20. "ca",
  21. "t</i",
  22. "mage>.",
  23. ]
  24. #charstream=(char for fragment in textstream for char in fragment)
  25. charstream=(char for fragment in textstream for char in (print(f'Input: "{fragment}"'),fragment)[1])
  26.  
  27. state="text" # state, can switch to "reference" and "tag"
  28. collector="" # temporary storage between state changes
  29. tag=""
  30.  
  31. for character in charstream:
  32. oldstate=state # 2. (so we recognize state changes)
  33. if character=="[": # 1.
  34. state="reference"
  35. elif character=="]":
  36. state="text"
  37. elif character=="<":
  38. state="tag"
  39. elif character==">":
  40. state="text"
  41. else:
  42. collector+=character
  43. if oldstate!=state and collector: #2.
  44. if oldstate=="tag":
  45. if not tag: # entering tag
  46. tag=collector
  47. else: # exiting tag, this could be validated
  48. tag=""
  49. elif tag:
  50. print(f'{{"{tag}":"{collector}"}}')
  51. else:
  52. print(f'{{"{oldstate}":"{collector}"}}')
  53. collector=""
  54.  
  55. if collector: # 3.-ish
  56. print(f'{{"{state}":"{collector}"}}') # could be an error if not state is not "text"
  57.  
Success #stdin #stdout 0.04s 9624KB
stdin
Standard input is empty
stdout
Input: "This "
Input: "is "
Input: "a "
Input: "reference "
Input: "["
{"text":"This is a reference "}
Input: "r1] "
{"reference":"r1"}
Input: "and "
Input: "here"
Input: "'s "
Input: "an "
Input: "image "
Input: "of "
Input: "a "
Input: "cat <"
{"text":" and here's an image of a cat "}
Input: "ima"
Input: "ge"
Input: ">a "
Input: "black "
Input: "ca"
Input: "t</i"
{"image":"a black cat"}
Input: "mage>."
{"text":"."}