'''
datのルール
<>はスペース1つ分
<br>は\n 相当
>>は>>
'''
import urllib .request
import os
headers = {
"User-Agent" : "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0"
}
def getURLs( ) :
#URLとスレタイを取得するプログラム
subject = "http://n...content-available-to-author-only...h.net/news4vip/subject.txt"
request = urllib .request .Request ( url= subject, headers= headers) #
with urllib .request .urlopen ( request) as nep:#withを使うことでcloseを省略,接続
text = nep.read ( ) .decode ( 'cp932' ) #subject.txtはcp932なので
global url, title
url = [ f"http://n...content-available-to-author-only...h.net/news4vip/dat/{line.split('<', 1)[0].strip()}" for line in text.splitlines ( ) ] #subject.txtから一行ずつに分けて、<以前にあるものを取得してきている。
title = [ f"{line.split('>', 1)[-1].strip()}" for line in text.splitlines ( ) ] #textから一行ずつに分けて、>以降にあるものを取得してきている。
#print(url)
#print(title)
def getALL( ) :
getURLs( )
for thread_title, dat in zip ( title, url) :
#print(thread_title,dat)#スレタイとURL
subject = dat
request = urllib .request .Request ( url= subject, headers= headers) #datとheaderのデータを入れる
with urllib .request .urlopen ( request) as nep:#withを使うことでcloseを省略,接続
text = nep.read ( ) .decode ( 'cp932' ) #datファイル読み込み。cp932?
lines = text.splitlines ( )
#ファイルの書き込み
#with open('./hisshi.txt','w') as file:
with open ( './test.txt' , 'a' ) as file :
#ここに文字列検索を入れる。もし、IDが見つかったら、スレタイとURLを記載する。textからIDが見つかったら、記入、改行する。
for line in lines:
if line.find ( ID) != -1 :
file .write ( thread_title)
file .write ( dat+str ( '\n ' ) )
file .write ( line)
print ( line)
file .write ( str ( '\n ' ) )
#continue
#continue
ID = input ( '記入方法は【ID:OOOOOOO】>>' )
getALL( )
'''
終わったこと
subject.txtからdatを取得してURL化したurl配列が完成
ついでにスレタイもゲット
input関数でIDを取得する。
やること
IDを見つけたらその行を取得する。
<br>を¥nに変換
>>を>>に変換
最後にテキストデータとして書き出したい
'''
JycnCmRhdOOBruODq+ODvOODqwo8PuOBr+OCueODmuODvOOCuTHjgaTliIYKPGJyPuOBr1xu55u45b2TCiZndDsmZ3Q744GvPj4KJycnCgppbXBvcnQgdXJsbGliLnJlcXVlc3QKaW1wb3J0IG9zCmhlYWRlcnMgPSB7CiAgICAgICAgIlVzZXItQWdlbnQiOiAiTW96aWxsYS81LjAgKFgxMTsgVWJ1bnR1OyBMaW51eCB4ODZfNjQ7IHJ2OjQ3LjApIEdlY2tvLzIwMTAwMTAxIEZpcmVmb3gvNDcuMCIKICAgICAgICB9CgpkZWYgZ2V0VVJMcygpOgogICAgI1VSTOOBqOOCueODrOOCv+OCpOOCkuWPluW+l+OBmeOCi+ODl+ODreOCsOODqeODoAogICAgc3ViamVjdCA9ICJodHRwOi8vbi4uLmNvbnRlbnQtYXZhaWxhYmxlLXRvLWF1dGhvci1vbmx5Li4uaC5uZXQvbmV3czR2aXAvc3ViamVjdC50eHQiCiAgICByZXF1ZXN0ID0gdXJsbGliLnJlcXVlc3QuUmVxdWVzdCh1cmw9c3ViamVjdCwgaGVhZGVycz1oZWFkZXJzKSMKCiAgICB3aXRoIHVybGxpYi5yZXF1ZXN0LnVybG9wZW4ocmVxdWVzdCkgYXMgbmVwOiN3aXRo44KS5L2/44GG44GT44Go44GnY2xvc2XjgpLnnIHnlaUs5o6l57aaCiAgICAgICAgdGV4dCA9IG5lcC5yZWFkKCkuZGVjb2RlKCdjcDkzMicpI3N1YmplY3QudHh044GvY3A5MzLjgarjga7jgacKCiAgICAgICAgZ2xvYmFsIHVybCx0aXRsZQogICAgICAgIHVybCA9IFtmImh0dHA6Ly9uLi4uY29udGVudC1hdmFpbGFibGUtdG8tYXV0aG9yLW9ubHkuLi5oLm5ldC9uZXdzNHZpcC9kYXQve2xpbmUuc3BsaXQoJzwnLCAxKVswXS5zdHJpcCgpfSIgZm9yIGxpbmUgaW4gdGV4dC5zcGxpdGxpbmVzKCldI3N1YmplY3QudHh044GL44KJ5LiA6KGM44Ga44Gk44Gr5YiG44GR44Gm44CBPOS7peWJjeOBq+OBguOCi+OCguOBruOCkuWPluW+l+OBl+OBpuOBjeOBpuOBhOOCi+OAggogICAgICAgIHRpdGxlID0gW2Yie2xpbmUuc3BsaXQoJz4nLCAxKVstMV0uc3RyaXAoKX0iIGZvciBsaW5lIGluIHRleHQuc3BsaXRsaW5lcygpXSN0ZXh044GL44KJ5LiA6KGM44Ga44Gk44Gr5YiG44GR44Gm44CBPuS7pemZjeOBq+OBguOCi+OCguOBruOCkuWPluW+l+OBl+OBpuOBjeOBpuOBhOOCi+OAggoKICAgICNwcmludCh1cmwpCiAgICAjcHJpbnQodGl0bGUpCgpkZWYgZ2V0QUxMKCk6CiAgICBnZXRVUkxzKCkKICAgIAogICAgZm9yIHRocmVhZF90aXRsZSwgZGF0IGluIHppcCh0aXRsZSwgdXJsKToKICAgICAgICAjcHJpbnQodGhyZWFkX3RpdGxlLGRhdCkj44K544Os44K/44Kk44GoVVJMCgogICAgICAgIHN1YmplY3QgPSBkYXQKICAgICAgICByZXF1ZXN0ID0gdXJsbGliLnJlcXVlc3QuUmVxdWVzdCh1cmw9c3ViamVjdCwgaGVhZGVycz1oZWFkZXJzKSNkYXTjgahoZWFkZXLjga7jg4fjg7zjgr/jgpLlhaXjgozjgosKCiAgICAgICAgd2l0aCB1cmxsaWIucmVxdWVzdC51cmxvcGVuKHJlcXVlc3QpIGFzIG5lcDojd2l0aOOCkuS9v+OBhuOBk+OBqOOBp2Nsb3Nl44KS55yB55WlLOaOpee2mgogICAgICAgICAgICB0ZXh0ID0gbmVwLnJlYWQoKS5kZWNvZGUoJ2NwOTMyJykjZGF044OV44Kh44Kk44Or6Kqt44G/6L6844G/44CCY3A5MzI/CiAgICAgICAgICAgIGxpbmVzID0gdGV4dC5zcGxpdGxpbmVzKCkKCiAgICAgICAgICAgICPjg5XjgqHjgqTjg6vjga7mm7jjgY3ovrzjgb8KICAgICAgICAgICAgI3dpdGggb3BlbignLi9oaXNzaGkudHh0JywndycpIGFzIGZpbGU6CiAgICAgICAgICAgIHdpdGggb3BlbignLi90ZXN0LnR4dCcsJ2EnKSBhcyBmaWxlOgogICAgICAgICAgICAj44GT44GT44Gr5paH5a2X5YiX5qSc57Si44KS5YWl44KM44KL44CC44KC44GX44CBSUTjgYzopovjgaTjgYvjgaPjgZ/jgonjgIHjgrnjg6zjgr/jgqTjgahVUkzjgpLoqJjovInjgZnjgovjgIJ0ZXh044GL44KJSUTjgYzopovjgaTjgYvjgaPjgZ/jgonjgIHoqJjlhaXjgIHmlLnooYzjgZnjgovjgIIKICAgICAgICAgICAgICAgIGZvciBsaW5lIGluIGxpbmVzOgogICAgICAgICAgICAgICAgICAgIGlmIGxpbmUuZmluZChJRCkgIT0gLTE6CiAgICAgICAgICAgICAgICAgICAgICAgIGZpbGUud3JpdGUodGhyZWFkX3RpdGxlKQogICAgICAgICAgICAgICAgICAgICAgICBmaWxlLndyaXRlKGRhdCtzdHIoJ1xuJykpCiAgICAgICAgICAgICAgICAgICAgICAgIGZpbGUud3JpdGUobGluZSkKICAgICAgICAgICAgICAgICAgICAgICAgcHJpbnQobGluZSkKICAgICAgICAgICAgICAgICAgICAgICAgZmlsZS53cml0ZShzdHIoJ1xuJykpCiAgICAgICAgICAgICNjb250aW51ZQogICAgICAgICNjb250aW51ZQoKCgoKSUQgPSBpbnB1dCgn6KiY5YWl5pa55rOV44Gv44CQSUQ6T09PT09PT+OAkT4+JykKCmdldEFMTCgpCgonJycK57WC44KP44Gj44Gf44GT44GoCnN1YmplY3QudHh044GL44KJZGF044KS5Y+W5b6X44GX44GmVVJM5YyW44GX44GfdXJs6YWN5YiX44GM5a6M5oiQCuOBpOOBhOOBp+OBq+OCueODrOOCv+OCpOOCguOCsuODg+ODiAppbnB1dOmWouaVsOOBp0lE44KS5Y+W5b6X44GZ44KL44CCCgrjgoTjgovjgZPjgagKSUTjgpLopovjgaTjgZHjgZ/jgonjgZ3jga7ooYzjgpLlj5blvpfjgZnjgovjgIIKPGJyPuOCksKlbuOBq+WkieaPmwomZ3Q7Jmd0O+OCkj4+44Gr5aSJ5o+bCuacgOW+jOOBq+ODhuOCreOCueODiOODh+ODvOOCv+OBqOOBl+OBpuabuOOBjeWHuuOBl+OBn+OBhAoKJycnCg==
compilation info
Traceback (most recent call last):
File "/usr/lib/python3.5/py_compile.py", line 125, in compile
_optimize=optimize)
File "<frozen importlib._bootstrap_external>", line 735, in source_to_code
File "<frozen importlib._bootstrap>", line 222, in _call_with_frames_removed
File "./prog.py", line 23
url = [f"http://next2ch.net/news4vip/dat/{line.split('<', 1)[0].strip()}" for line in text.splitlines()]#subject.txtから一行ずつに分けて、<以前にあるものを取得してきている。
^
SyntaxError: invalid syntax
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/usr/lib/python3.5/py_compile.py", line 129, in compile
raise py_exc
py_compile.PyCompileError: File "./prog.py", line 23
url = [f"http://next2ch.net/news4vip/dat/{line.split('<', 1)[0].strip()}" for line in text.splitlines()]#subject.txtから一行ずつに分けて、<以前にあるものを取得してきている。
^
SyntaxError: invalid syntax
stdout