fork(1) download
  1. textstream=[
  2. "This ",
  3. "is ",
  4. "a ",
  5. "reference ",
  6. "[", # Notice that anything can be broken across different messages
  7. "r1] ",
  8. "and ",
  9. "here",
  10. "'s ",
  11. "an ",
  12. "image ",
  13. "of ",
  14. "a ",
  15. "cat <", # A more extreme example, breaking messages completely arbitrary
  16. "ima",
  17. "ge",
  18. ">a ",
  19. "black ",
  20. "ca",
  21. "t</i",
  22. "mage>.",
  23. ]
  24. charstream=(char for fragment in textstream for char in fragment)
  25.  
  26. state="text" # state, can switch to "reference" and "tag"
  27. collector="" # temporary storage between state changes
  28. parsed=[] # list of parsed result
  29.  
  30. for character in charstream:
  31. oldstate=state # 2. (so we recognize state changes)
  32. if character=="[": # 1.
  33. state="reference"
  34. elif character=="]":
  35. state="text"
  36. elif character=="<":
  37. state="tag"
  38. elif character==">":
  39. state="text"
  40. else:
  41. collector+=character
  42. if oldstate!=state and collector: # 2.
  43. parsed.append({oldstate:collector})
  44. collector=""
  45.  
  46. if collector: # 3.-ish
  47. parsed.append({state:collector}) # could be an error if not state is not "text"
  48.  
  49. import json
  50. print(json.dumps(parsed,indent=2))
  51.  
  52. print("-----------------------------------")
  53.  
  54. result=[]
  55. tag=""
  56. for element in parsed:
  57. if "tag" in element:
  58. if tag=="": # entering a tag
  59. tag=element["tag"]
  60. else:
  61. tag="" # it's an exit, could be validated
  62. else:
  63. if tag=="": # we are at top level
  64. result.append(element)
  65. else: # we use the tag, and also expect "text"
  66. result.append({tag:element["text"]})
  67.  
  68. print(json.dumps(result,indent=2))
Success #stdin #stdout 0.03s 9724KB
stdin
Standard input is empty
stdout
[
  {
    "text": "This is a reference "
  },
  {
    "reference": "r1"
  },
  {
    "text": " and here's an image of a cat "
  },
  {
    "tag": "image"
  },
  {
    "text": "a black cat"
  },
  {
    "tag": "/image"
  },
  {
    "text": "."
  }
]
-----------------------------------
[
  {
    "text": "This is a reference "
  },
  {
    "reference": "r1"
  },
  {
    "text": " and here's an image of a cat "
  },
  {
    "image": "a black cat"
  },
  {
    "text": "."
  }
]