fork download
  1. from html.parser import HTMLParser
  2.  
  3. class MyHTMLParser(HTMLParser):
  4. def __init__(self):
  5. HTMLParser.__init__(self)
  6. self.dlist = []
  7.  
  8. def handle_data(self, data):
  9. self.dlist.append(data)
  10. print("Encountered some data :", data)
  11.  
  12. parser = MyHTMLParser()
  13.  
  14. parser.feed('''google-ai-clips/
  15. <img src="https://content-available-to-author-only.aolcdn....." /> Googleが新製品を''')
  16. print('抽出:',parser.dlist)
  17.  
Success #stdin #stdout 0.02s 10008KB
stdin
Standard input is empty
stdout
Encountered some data  : google-ai-clips/ 

Encountered some data  :  Googleが新製品を
抽出: ['google-ai-clips/ \n', ' Googleが新製品を']