from time import process_time as timer
# 1,000 MOV files:
mov_files = [str(i) for i in range(1000)]
# Lets assume that most MOV files have
# corresponding PNG files. 800 PNG files:
png_files = [str(i) for i in range(100, 900)]
# The directory has both MOV files and PNG files
directory = mov_files.copy()
directory.extend(png_files)
# Best case scenario for the sorting method:
# We assume the directory listing
# comes pre-sorted so we do not have to sort it ourselves:
directory.sort()
# It really does not matter too much when using the
# set-based algorithm whether these lists are sorted
# or shuffled:
"""
from random import shuffle
shuffle(mov_files)
shuffle(png_files)
"""
N = 10_000 # Number of trials
def do_sorter():
t = timer()
for _ in range(N):
mov_files_to_delete_count = 0
i = 1
l = len(directory)
while i < l:
if directory[i] == directory[i-1]:
# MOV file has corresponding PNG file:
mov_files_to_delete_count += 1
i += 2 # Important for speed!
else:
i += 1
print('do_sorter time:', timer() - t)
return mov_files_to_delete_count
def do_setter():
t = timer()
for _ in range(N):
mov_files_to_delete_count = 0
s = set(png_files)
for mov in mov_files:
if mov in s:
mov_files_to_delete_count += 1
print('do_setter time:', timer() - t)
return mov_files_to_delete_count
print('do_sorted deletion count:', do_sorter())
print('do_setter deletion count:', do_setter())