fork download
  1. from time import process_time as timer
  2.  
  3. # 1,000 MOV files:
  4. mov_files = [str(i) for i in range(1000)]
  5.  
  6. # Lets assume that most MOV files have
  7. # corresponding PNG files. 800 PNG files:
  8. png_files = [str(i) for i in range(100, 900)]
  9.  
  10. # The directory has both MOV files and PNG files
  11. directory = mov_files.copy()
  12. directory.extend(png_files)
  13. # Best case scenario for the sorting method:
  14. # We assume the directory listing
  15. # comes pre-sorted so we do not have to sort it ourselves:
  16. directory.sort()
  17.  
  18. # It really does not matter too much when using the
  19. # set-based algorithm whether these lists are sorted
  20. # or shuffled:
  21. """
  22. from random import shuffle
  23. shuffle(mov_files)
  24. shuffle(png_files)
  25. """
  26.  
  27. N = 10_000 # Number of trials
  28.  
  29. def do_sorter():
  30. t = timer()
  31. for _ in range(N):
  32. mov_files_to_delete_count = 0
  33. i = 1
  34. l = len(directory)
  35. while i < l:
  36. if directory[i] == directory[i-1]:
  37. # MOV file has corresponding PNG file:
  38. mov_files_to_delete_count += 1
  39. i += 2 # Important for speed!
  40. else:
  41. i += 1
  42. print('do_sorter time:', timer() - t)
  43. return mov_files_to_delete_count
  44.  
  45. def do_setter():
  46. t = timer()
  47. for _ in range(N):
  48. mov_files_to_delete_count = 0
  49. s = set(png_files)
  50. for mov in mov_files:
  51. if mov in s:
  52. mov_files_to_delete_count += 1
  53. print('do_setter time:', timer() - t)
  54. return mov_files_to_delete_count
  55.  
  56. print('do_sorted deletion count:', do_sorter())
  57. print('do_setter deletion count:', do_setter())
  58.  
Success #stdin #stdout 3.29s 9832KB
stdin
Standard input is empty
stdout
do_sorter time: 2.37762035
do_sorted deletion count: 800
do_setter time: 0.877240789
do_setter deletion count: 800