import os
from multiprocessing import Process, cpu_count
from threading import Thread
from queue import Queue
# -------------------------------
# CONFIG
# -------------------------------
SEARCH_DIR = "data"
KEYWORD = "error"
THREADS_PER_PROCESS = 4
# -------------------------------
# THREAD WORKER
# -------------------------------
def thread_search(queue, keyword, process_id, thread_id):
while True:
file_path = queue.get()
if file_path is None:
break
try:
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
for line_no, line in enumerate(f, start=1):
if keyword in line:
print(
f"[P{process_id}-T{thread_id}] "
f"{file_path}:{line_no}: {line.strip()}"
)
except Exception as e:
print(f"[P{process_id}-T{thread_id}] Error: {e}")
queue.task_done()
# -------------------------------
# PROCESS WORKER
# -------------------------------
def process_search(files, keyword, process_id):
queue = Queue()
threads = []
for i in range(THREADS_PER_PROCESS):
t = Thread(
target=thread_search,
args=(queue, keyword, process_id, i)
)
t.start()
threads.append(t)
for f in files:
queue.put(f)
queue.join()
for _ in threads:
queue.put(None)
for t in threads:
t.join()
print(f"Process {process_id} finished")
# -------------------------------
# SPLIT LIST
# -------------------------------
def split_list(lst, n):
k = len(lst) // n
r = len(lst) % n
chunks = []
start = 0
for i in range(n):
end = start + k + (1 if i < r else 0)
chunks.append(lst[start:end])
start = end
return chunks
# -------------------------------
# MAIN
# -------------------------------
if __name__ == "__main__":
# Collect all text files
all_files = []
for root, _, files in os.walk(SEARCH_DIR):
for f in files:
if f.endswith(".txt"):
all_files.append(os.path.join(root, f))
if not all_files:
print("No files found")
exit()
num_processes = min(cpu_count(), len(all_files))
print(f"Using {num_processes} processes")
print(f"{THREADS_PER_PROCESS} threads per process")
print(f"Searching for keyword: '{KEYWORD}'\n")
chunks = split_list(all_files, num_processes)
processes = []
for i in range(num_processes):
p = Process(
target=process_search,
args=(chunks[i], KEYWORD, i)
)
p.start()
processes.append(p)
for p in processes:
p.join()
print("\nSearch completed ✔")
aW1wb3J0IG9zCmZyb20gbXVsdGlwcm9jZXNzaW5nIGltcG9ydCBQcm9jZXNzLCBjcHVfY291bnQKZnJvbSB0aHJlYWRpbmcgaW1wb3J0IFRocmVhZApmcm9tIHF1ZXVlIGltcG9ydCBRdWV1ZQoKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCiMgQ09ORklHCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQpTRUFSQ0hfRElSID0gImRhdGEiCktFWVdPUkQgPSAiZXJyb3IiClRIUkVBRFNfUEVSX1BST0NFU1MgPSA0CgoKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCiMgVEhSRUFEIFdPUktFUgojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KZGVmIHRocmVhZF9zZWFyY2gocXVldWUsIGtleXdvcmQsIHByb2Nlc3NfaWQsIHRocmVhZF9pZCk6CiAgICB3aGlsZSBUcnVlOgogICAgICAgIGZpbGVfcGF0aCA9IHF1ZXVlLmdldCgpCiAgICAgICAgaWYgZmlsZV9wYXRoIGlzIE5vbmU6CiAgICAgICAgICAgIGJyZWFrCgogICAgICAgIHRyeToKICAgICAgICAgICAgd2l0aCBvcGVuKGZpbGVfcGF0aCwgInIiLCBlbmNvZGluZz0idXRmLTgiLCBlcnJvcnM9Imlnbm9yZSIpIGFzIGY6CiAgICAgICAgICAgICAgICBmb3IgbGluZV9ubywgbGluZSBpbiBlbnVtZXJhdGUoZiwgc3RhcnQ9MSk6CiAgICAgICAgICAgICAgICAgICAgaWYga2V5d29yZCBpbiBsaW5lOgogICAgICAgICAgICAgICAgICAgICAgICBwcmludCgKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGYiW1B7cHJvY2Vzc19pZH0tVHt0aHJlYWRfaWR9XSAiCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBmIntmaWxlX3BhdGh9OntsaW5lX25vfToge2xpbmUuc3RyaXAoKX0iCiAgICAgICAgICAgICAgICAgICAgICAgICkKICAgICAgICBleGNlcHQgRXhjZXB0aW9uIGFzIGU6CiAgICAgICAgICAgIHByaW50KGYiW1B7cHJvY2Vzc19pZH0tVHt0aHJlYWRfaWR9XSBFcnJvcjoge2V9IikKCiAgICAgICAgcXVldWUudGFza19kb25lKCkKCgojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KIyBQUk9DRVNTIFdPUktFUgojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KZGVmIHByb2Nlc3Nfc2VhcmNoKGZpbGVzLCBrZXl3b3JkLCBwcm9jZXNzX2lkKToKICAgIHF1ZXVlID0gUXVldWUoKQogICAgdGhyZWFkcyA9IFtdCgogICAgZm9yIGkgaW4gcmFuZ2UoVEhSRUFEU19QRVJfUFJPQ0VTUyk6CiAgICAgICAgdCA9IFRocmVhZCgKICAgICAgICAgICAgdGFyZ2V0PXRocmVhZF9zZWFyY2gsCiAgICAgICAgICAgIGFyZ3M9KHF1ZXVlLCBrZXl3b3JkLCBwcm9jZXNzX2lkLCBpKQogICAgICAgICkKICAgICAgICB0LnN0YXJ0KCkKICAgICAgICB0aHJlYWRzLmFwcGVuZCh0KQoKICAgIGZvciBmIGluIGZpbGVzOgogICAgICAgIHF1ZXVlLnB1dChmKQoKICAgIHF1ZXVlLmpvaW4oKQoKICAgIGZvciBfIGluIHRocmVhZHM6CiAgICAgICAgcXVldWUucHV0KE5vbmUpCgogICAgZm9yIHQgaW4gdGhyZWFkczoKICAgICAgICB0LmpvaW4oKQoKICAgIHByaW50KGYiUHJvY2VzcyB7cHJvY2Vzc19pZH0gZmluaXNoZWQiKQoKCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQojIFNQTElUIExJU1QKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCmRlZiBzcGxpdF9saXN0KGxzdCwgbik6CiAgICBrID0gbGVuKGxzdCkgLy8gbgogICAgciA9IGxlbihsc3QpICUgbgogICAgY2h1bmtzID0gW10KICAgIHN0YXJ0ID0gMAoKICAgIGZvciBpIGluIHJhbmdlKG4pOgogICAgICAgIGVuZCA9IHN0YXJ0ICsgayArICgxIGlmIGkgPCByIGVsc2UgMCkKICAgICAgICBjaHVua3MuYXBwZW5kKGxzdFtzdGFydDplbmRdKQogICAgICAgIHN0YXJ0ID0gZW5kCgogICAgcmV0dXJuIGNodW5rcwoKCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQojIE1BSU4KIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCmlmIF9fbmFtZV9fID09ICJfX21haW5fXyI6CiAgICAjIENvbGxlY3QgYWxsIHRleHQgZmlsZXMKICAgIGFsbF9maWxlcyA9IFtdCiAgICBmb3Igcm9vdCwgXywgZmlsZXMgaW4gb3Mud2FsayhTRUFSQ0hfRElSKToKICAgICAgICBmb3IgZiBpbiBmaWxlczoKICAgICAgICAgICAgaWYgZi5lbmRzd2l0aCgiLnR4dCIpOgogICAgICAgICAgICAgICAgYWxsX2ZpbGVzLmFwcGVuZChvcy5wYXRoLmpvaW4ocm9vdCwgZikpCgogICAgaWYgbm90IGFsbF9maWxlczoKICAgICAgICBwcmludCgiTm8gZmlsZXMgZm91bmQiKQogICAgICAgIGV4aXQoKQoKICAgIG51bV9wcm9jZXNzZXMgPSBtaW4oY3B1X2NvdW50KCksIGxlbihhbGxfZmlsZXMpKQogICAgcHJpbnQoZiJVc2luZyB7bnVtX3Byb2Nlc3Nlc30gcHJvY2Vzc2VzIikKICAgIHByaW50KGYie1RIUkVBRFNfUEVSX1BST0NFU1N9IHRocmVhZHMgcGVyIHByb2Nlc3MiKQogICAgcHJpbnQoZiJTZWFyY2hpbmcgZm9yIGtleXdvcmQ6ICd7S0VZV09SRH0nXG4iKQoKICAgIGNodW5rcyA9IHNwbGl0X2xpc3QoYWxsX2ZpbGVzLCBudW1fcHJvY2Vzc2VzKQogICAgcHJvY2Vzc2VzID0gW10KCiAgICBmb3IgaSBpbiByYW5nZShudW1fcHJvY2Vzc2VzKToKICAgICAgICBwID0gUHJvY2VzcygKICAgICAgICAgICAgdGFyZ2V0PXByb2Nlc3Nfc2VhcmNoLAogICAgICAgICAgICBhcmdzPShjaHVua3NbaV0sIEtFWVdPUkQsIGkpCiAgICAgICAgKQogICAgICAgIHAuc3RhcnQoKQogICAgICAgIHByb2Nlc3Nlcy5hcHBlbmQocCkKCiAgICBmb3IgcCBpbiBwcm9jZXNzZXM6CiAgICAgICAgcC5qb2luKCkKCiAgICBwcmludCgiXG5TZWFyY2ggY29tcGxldGVkIOKclCIp