import re
import fileinput

# output list
words = []
positions = []
# temp accumulators
w_buffer = []
p_buffer = []

pattern = re.compile(ur'^(?:[ \t]*[(][ \t]*(\d+)[ \t]*,[ \t]*(\d+)[ \t]*,[ \t]*(\S+)[ \t]*[)][ \t]*)?$', re.UNICODE)

for line in fileinput.input():
    for (start, end, token) in re.findall(pattern, line):
        if start:
            w_buffer.append(token)
            p_buffer.append((int(start), int(end), token))
        else:
            words.append(tuple(w_buffer)); w_buffer = []
            positions.append(p_buffer); p_buffer = []
if start:
    words.append(tuple(w_buffer))
    positions.append(p_buffer)
    
# An optional prettified output
import pprint as pp
pp.pprint(words)
pp.pprint(positions)