from operator import itemgetter
from pprint import pprint
def find_maximum_set(sets):
"""
Tries to find the largest combined set, formed from the given sets, such
that all used sets are distinct.
Returns (size, indexes, union) where indexes are the index of all the sets
used.
Note: This is an approximation, and will not always return the absolute
maximum, but should be very close in most cases.
"""
results = []
for si,s in enumerate(sets):
sn = len(s)
new_set = set(s) # if nothing else works, add the set by itself
new_len = sn
new_is = [si]
# try to combine it with all the previous results, picking the one that
# would produce the largest union
for rn,ris,r in results:
if r.isdisjoint(s):
rs = r.union(s)
if rn+sn > new_len:
new_set = rs
new_len = rn+sn
new_is = ris + [si]
# add the new set to the result collection
results.append((new_len,new_is,new_set))
# return the largest result
return max(results, key=itemgetter(0))
def find_all_maximum_sets(sets):
"""
Merges sets such that the size of the largest sets are maximized.
>>> find_all_maximum_sets([[1,5,7], [2,6], [3,4,5], [1,2] , [4], [1]])
[set([1, 2, 4, 5, 6, 7]), set([1, 2, 3, 4, 5]), set([1])]
"""
sets = list(sets)
result = []
while len(sets) > 0:
_, indexes, largest = find_maximum_set(sets)
result.append(largest)
sets = [s for i,s in enumerate(sets) if i not in indexes]
return result
pprint(find_all_maximum_sets([[1,5,7], [2,6], [3,4,5], [1,2] , [4], [1]]))
ZnJvbSBvcGVyYXRvciBpbXBvcnQgaXRlbWdldHRlcgpmcm9tIHBwcmludCBpbXBvcnQgcHByaW50CgpkZWYgZmluZF9tYXhpbXVtX3NldChzZXRzKToKICAgICIiIgogICAgVHJpZXMgdG8gZmluZCB0aGUgbGFyZ2VzdCBjb21iaW5lZCBzZXQsIGZvcm1lZCBmcm9tIHRoZSBnaXZlbiBzZXRzLCBzdWNoCiAgICB0aGF0IGFsbCB1c2VkIHNldHMgYXJlIGRpc3RpbmN0LgogICAgCiAgICBSZXR1cm5zIChzaXplLCBpbmRleGVzLCB1bmlvbikgd2hlcmUgaW5kZXhlcyBhcmUgdGhlIGluZGV4IG9mIGFsbCB0aGUgc2V0cwogICAgdXNlZC4KICAgIAogICAgTm90ZTogVGhpcyBpcyBhbiBhcHByb3hpbWF0aW9uLCBhbmQgd2lsbCBub3QgYWx3YXlzIHJldHVybiB0aGUgYWJzb2x1dGUKICAgIG1heGltdW0sIGJ1dCBzaG91bGQgYmUgdmVyeSBjbG9zZSBpbiBtb3N0IGNhc2VzLgogICAgIiIiCiAgICByZXN1bHRzID0gW10KICAgIGZvciBzaSxzIGluIGVudW1lcmF0ZShzZXRzKToKICAgICAgICBzbiA9IGxlbihzKQogICAgICAgIG5ld19zZXQgPSBzZXQocykgIyBpZiBub3RoaW5nIGVsc2Ugd29ya3MsIGFkZCB0aGUgc2V0IGJ5IGl0c2VsZgogICAgICAgIG5ld19sZW4gPSBzbgogICAgICAgIG5ld19pcyA9IFtzaV0KICAgICAgICAKICAgICAgICAjIHRyeSB0byBjb21iaW5lIGl0IHdpdGggYWxsIHRoZSBwcmV2aW91cyByZXN1bHRzLCBwaWNraW5nIHRoZSBvbmUgdGhhdAogICAgICAgICMgd291bGQgcHJvZHVjZSB0aGUgbGFyZ2VzdCB1bmlvbgogICAgICAgIGZvciBybixyaXMsciBpbiByZXN1bHRzOgogICAgICAgICAgICBpZiByLmlzZGlzam9pbnQocyk6CiAgICAgICAgICAgICAgICBycyA9IHIudW5pb24ocykKICAgICAgICAgICAgICAgIGlmIHJuK3NuID4gbmV3X2xlbjoKICAgICAgICAgICAgICAgICAgICBuZXdfc2V0ID0gcnMKICAgICAgICAgICAgICAgICAgICBuZXdfbGVuID0gcm4rc24KICAgICAgICAgICAgICAgICAgICBuZXdfaXMgPSByaXMgKyBbc2ldCiAgICAgICAgCiAgICAgICAgIyBhZGQgdGhlIG5ldyBzZXQgdG8gdGhlIHJlc3VsdCBjb2xsZWN0aW9uCiAgICAgICAgcmVzdWx0cy5hcHBlbmQoKG5ld19sZW4sbmV3X2lzLG5ld19zZXQpKQogICAgICAgIAogICAgIyByZXR1cm4gdGhlIGxhcmdlc3QgcmVzdWx0CiAgICByZXR1cm4gbWF4KHJlc3VsdHMsIGtleT1pdGVtZ2V0dGVyKDApKQoKZGVmIGZpbmRfYWxsX21heGltdW1fc2V0cyhzZXRzKToKICAgICIiIgogICAgTWVyZ2VzIHNldHMgc3VjaCB0aGF0IHRoZSBzaXplIG9mIHRoZSBsYXJnZXN0IHNldHMgYXJlIG1heGltaXplZC4KICAgIAogICAgPj4+IGZpbmRfYWxsX21heGltdW1fc2V0cyhbWzEsNSw3XSwgWzIsNl0sIFszLDQsNV0sIFsxLDJdICwgWzRdLCBbMV1dKQogICAgW3NldChbMSwgMiwgNCwgNSwgNiwgN10pLCBzZXQoWzEsIDIsIDMsIDQsIDVdKSwgc2V0KFsxXSldCiAgICAiIiIKICAgIHNldHMgPSBsaXN0KHNldHMpCiAgICByZXN1bHQgPSBbXQogICAgd2hpbGUgbGVuKHNldHMpID4gMDoKICAgICAgICBfLCBpbmRleGVzLCBsYXJnZXN0ID0gZmluZF9tYXhpbXVtX3NldChzZXRzKQogICAgICAgIHJlc3VsdC5hcHBlbmQobGFyZ2VzdCkKICAgICAgICBzZXRzID0gW3MgZm9yIGkscyBpbiBlbnVtZXJhdGUoc2V0cykgaWYgaSBub3QgaW4gaW5kZXhlc10KICAgIHJldHVybiByZXN1bHQKCnBwcmludChmaW5kX2FsbF9tYXhpbXVtX3NldHMoW1sxLDUsN10sIFsyLDZdLCBbMyw0LDVdLCBbMSwyXSAsIFs0XSwgWzFdXSkp