import re string_array = ["http://w...content-available-to-author-only...s.com/reference/regex/regex_match/", "tcp://192.168.2.1:1234/hello/how/are/you", "https://mail.google.com/mail/u/0/?tab=wm#inbox/15178022db56df29?projector=1"] e = re.compile("^(?:([A-Za-z]+):)?(\\/{0,3})([0-9.A-Za-z-]+)(?::(\\d+))?(?:\\/([^?#]*))?(?:\\?([^#]*))?(?:#(.*))?$"); for i in range(len(string_array)): m = e.match(string_array[i]) print(m.groups())
Standard input is empty
('http', '//', 'www.cplusplus.com', None, 'reference/regex/regex_match/', None, None)
('tcp', '//', '192.168.2.1', '1234', 'hello/how/are/you', None, None)
('https', '//', 'mail.google.com', None, 'mail/u/0/', 'tab=wm', 'inbox/15178022db56df29?projector=1')