from collections import defaultdict
class UnionFind:
"""
Detailed info on union find data structure can be found at:
http://a...content-available-to-author-only...n.edu/15uf/
Complexity of merge is O(log*n), not O(logn). Detailed info can be found at:
https://e...content-available-to-author-only...a.org/wiki/Iterated_logarithm
"""
def __init__(self, size):
self.parent = [i for i in range(size)]
self.size = [1] * size
def _root(self, a):
while self.parent[a] != a:
self.parent[a] = self.parent[self.parent[a]]
a = self.parent[a]
return a
def merge(self, a, b):
a, b = self._root(a), self._root(b)
if self.size[a] < self.size[b]:
a, b = b, a
self.parent[b] = a
self.size[a] += self.size[b]
def __repr__(self):
ret = defaultdict(list)
for i in range(len(self.parent)):
ret[self._root(i)].append(i)
return "\n".join(str(v) for k, v in ret.items())
def find_same_contacts(contacts):
"""
For each contact in contacts
For each element in contact
If element exists at seen_keys, merge contacts
Otherwise add element to seen_keys with contact id as value
Complexity is O(nlog*n)
"""
union_find = UnionFind(len(contacts))
seen_keys = {}
for i, contact in enumerate(contacts):
for elem in contact:
if elem in seen_keys:
union_find.merge(seen_keys[elem], i)
else:
seen_keys[elem] = i
print(union_find)
the_contacts = [["Gaurav", "gaurav@gmail.com", "gaurav@gfgQA.com"],
["Lucky", "lucky@gmail.com", "+1234567"],
["gaurav123", "+5412312", "gaurav123@skype.com"],
["gaurav1993", "+5412312", "gaurav@gfgQA.com"],
["raja", "+2231210", "raja@gfg.com"],
["bahubali", "+878312", "raja"]]
find_same_contacts(the_contacts)