fork download
  1. from collections import defaultdict
  2. from itertools import groupby
  3.  
  4. import sys
  5. file = sys.stdin
  6.  
  7. """
  8. I need to calculate timestamp difference of first line in group and the first line of the next, for all groups corresponding to same (column2, column3).
  9. """
  10. rows = (line.split() for line in file if line.strip()) # columns are space-separated
  11.  
  12. # find timestamps
  13. timestamps = defaultdict(list) # (source ip, destination ip) -> timestamps of 1st lines
  14. for ((source, dest), _), group in groupby(enumerate(rows),
  15. key=lambda (i, row): (row[1:3], i - int(row[3]))):
  16. ts = float(next(group)[1][0]) # a timestamp from the 1st line in a group
  17. timestamps[source, dest].append(ts)
  18.  
  19. # find differences
  20. for source_ip, dest_ip in sorted(timestamps, key=lambda (x,y): x):
  21. t = [0] + timestamps[source_ip, dest_ip] # prepend zero timestamp to support single groups
  22. diffs = [b - a for a, b in zip(t, t[1:])] # pairwise differences
  23. info = ", ".join(map(str, diffs))
  24. print("{source_ip} {dest_ip}: {info}".format(**vars()))
  25.  
Success #stdin #stdout 0.09s 8840KB
stdin
0.0 aa:bb:cc dd:ee:ff 100  000 ---------->line1
0.2 aa:bb:cc dd:ee:ff 101  011 ---------->line2
0.5 dd:ee:ff aa:bb:cc 230  001 ---------->line3
0.9 dd:ee:ff aa:bb:cc 231  110 ---------->line4
1.2 dd:ee:ff aa:bb:cc 232  101 ---------->line5
1.4 aa:bb:cc dd:ee:ff 102  1111 ---------->line6
1.6 aa:bb:cc dd:ee:ff 103  1101 ---------->line7
1.7 aa:bb:cc dd:ee:ff 108  1001 ---------->line8
2.4 dd:ee:ff aa:bb:cc 233  1000 ---------->line9  
2.8 gg:hh:ii jj:kk:ll 450  1110 ---------->line10
3.2 jj:kk:ll gg:hh:ii 600  010 ---------->line11  
stdout
aa:bb:cc dd:ee:ff: 0.0, 1.4, 0.3
dd:ee:ff aa:bb:cc: 0.5, 1.9
gg:hh:ii jj:kk:ll: 2.8
jj:kk:ll gg:hh:ii: 3.2