fork download
  1. #coding=utf-8 # 全文utf-8编码
  2. import sys
  3. D = [['A','B','C','D'],['B','C','E'],['A','B','C','E'],['B','D','E'],['A','B','C','D']]
  4. F = apriori(D, 0.7)
  5. def apriori(D, minSup):
  6.  
  7. '''频繁项集用keys表示,
  8. key表示项集中的某一项,
  9. cutKeys表示经过剪枝步的某k项集。
  10. C表示某k项集的每一项在事务数据库D中的支持计数
  11. '''
  12.  
  13. C1 = {}
  14. for T in D:
  15. for I in T:
  16. if I in C1:
  17. C1[I] += 1
  18. else:
  19. C1[I] = 1
  20.  
  21. print C1
  22. _keys1 = C1.keys()
  23.  
  24. keys1 = []
  25. for i in _keys1:
  26. keys1.append([i])
  27.  
  28. n = len(D)
  29. cutKeys1 = []
  30. for k in keys1[:]:
  31. if C1[k[0]]*1.0/n >= minSup:
  32. cutKeys1.append(k)
  33.  
  34. cutKeys1.sort()
  35.  
  36.  
  37. keys = cutKeys1
  38. all_keys = []
  39. while keys != []:
  40. C = getC(D, keys)
  41. cutKeys = getCutKeys(keys, C, minSup, len(D))
  42. for key in cutKeys:
  43. all_keys.append(key)
  44. keys = aproiri_gen(cutKeys)
  45.  
  46. return all_keys
  47.  
  48. def getC(D, keys):
  49. '''对keys中的每一个key进行计数'''
  50. C = []
  51. for key in keys:
  52. c = 0
  53. for T in D:
  54. have = True
  55. for k in key:
  56. if k not in T:
  57. have = False
  58. if have:
  59. c += 1
  60. C.append(c)
  61. return C
  62.  
  63. def getCutKeys(keys, C, minSup, length):
  64. '''剪枝步'''
  65. for i, key in enumerate(keys):
  66. if float(C[i]) / length < minSup:
  67. keys.remove(key)
  68. return keys
  69.  
  70.  
  71.  
  72. def keyInT(key, T):
  73. '''判断项key是否在数据库中某一元组T中'''
  74. for k in key:
  75. if k not in T:
  76. return False
  77. return True
  78.  
  79.  
  80. def aproiri_gen(keys1):
  81. '''连接步'''
  82. keys2 = []
  83. for k1 in keys1:
  84. for k2 in keys1:
  85. if k1 != k2:
  86. key = []
  87. for k in k1:
  88. if k not in key:
  89. key.append(k)
  90. for k in k2:
  91. if k not in key:
  92. key.append(k)
  93. key.sort()
  94. if key not in keys2:
  95. keys2.append(key)
  96.  
  97. return keys2
  98.  
  99.  
  100.  
  101.  
  102.  
  103. print '\nfrequent itemset:\n', F
  104.  
Runtime error #stdin #stdout #stderr 0.02s 6812KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Traceback (most recent call last):
  File "prog.py", line 4, in <module>
NameError: name 'apriori' is not defined