fork download
  1. import re
  2.  
  3. html = """<li class="info-wrap">
  4. <div class="hide info-json">
  5. <p>Content That I Want - JSON Data </p>
  6. </div>
  7. </li>
  8.  
  9. <li class="info-wrap hide">
  10. <div class="hide info-json">
  11. <p>Content That I Don't Want </p>
  12. </div>
  13. </li>
  14.  
  15. <li class="info-wrap no-meta">
  16. <div class="hide info-json">
  17. <p>Content That I Want - JSON Data </p>
  18. </div>
  19. </li>"""
  20.  
  21. l = re.findall(r"""<li\s+class="info-wrap(\s+no-meta)?"\s*>\s*
  22. <div\s+class="hide\s+info-json"\s*>
  23. \s*(.*?)\s*
  24. </div>\s*
  25. </li>
  26. """,html, flags=re.VERBOSE|re.IGNORECASE|re.DOTALL)
  27. l = [item[1] for item in l]
  28. print(l)
  29.  
Success #stdin #stdout 0.02s 9656KB
stdin
Standard input is empty
stdout
['<p>Content That I Want - JSON Data </p>', '<p>Content That I Want - JSON Data  </p>']