fork download
  1. import re
  2.  
  3. child_keywords = ["土木一式工事", "産業用機器", "事務用品・機器"]
  4. body_lower = '''01 事務用品・機器
  5.  
  6. 大阪府警察大正警察署:指サック等の購入   :大阪市大正区
  7.  
  8. https://w...content-available-to-author-only...a.jp/CALS/Publish/EbController?Shori=SmallKokokuInfo&open_kokoku=01202350042214
  9.  
  10. 01 事務用品・機器
  11.  
  12. 府立学校大阪わかば高等学校:校内衛生用品7件 ★ :大阪市生野区
  13.  
  14. https://w...content-available-to-author-only...a.jp/CALS/Publish/EbController?Shori=SmallKokokuInfo&open_kokoku=01202350041978
  15.  
  16. 01 事務用品・機器
  17.  
  18. 府立学校工芸高等学校:イレパネ 他 購入   :大阪市阿倍野区
  19.  
  20. https://w...content-available-to-author-only...a.jp/CALS/Publish/EbController?Shori=SmallKokokuInfo&open_kokoku=01202350042117'''
  21.  
  22. for paragraph_text, child_keyword, url in re.findall(rf'([^\n]*({"|".join(map(re.escape, child_keywords))})[^\n]*).*?\b(https?://\S+)', body_lower, re.S):
  23. print(f'{paragraph_text=}', f'{child_keyword=}', f'{url=}', sep='\n')
Success #stdin #stdout 0.03s 9652KB
stdin
Standard input is empty
stdout
paragraph_text='01 事務用品・機器'
child_keyword='事務用品・機器'
url='https://w...content-available-to-author-only...a.jp/CALS/Publish/EbController?Shori=SmallKokokuInfo&open_kokoku=01202350042214'
paragraph_text='01 事務用品・機器'
child_keyword='事務用品・機器'
url='https://w...content-available-to-author-only...a.jp/CALS/Publish/EbController?Shori=SmallKokokuInfo&open_kokoku=01202350041978'
paragraph_text='01 事務用品・機器'
child_keyword='事務用品・機器'
url='https://w...content-available-to-author-only...a.jp/CALS/Publish/EbController?Shori=SmallKokokuInfo&open_kokoku=01202350042117'