fork(1) download
  1. text = r'''"column1a","column2a","column
  2. 3a,",
  3. "column\"this is, a test\"4a"
  4. "column1a2","column2a2","column3a2","column4a2"
  5. "column1b","colu
  6. mn2b,","column3b",
  7. "column\"this is, a test\"4b"
  8. "column1c,","column2c","column3c",
  9. "column\"this is, a test\"4c"'''
  10.  
  11. import re
  12.  
  13. columns = 4
  14. buffer = ""
  15.  
  16. check = re.compile(r'"(?:[^"\\]*|\\.)+"')
  17.  
  18. for line in text.split("\n"):
  19. if buffer == "":
  20. matches = check.findall(line)
  21. if len(matches) == columns:
  22. print matches
  23. else:
  24. buffer = line.strip()
  25. else:
  26. buffer = buffer + line.strip()
  27. matches = check.findall(buffer)
  28. if len(matches) == columns:
  29. print matches
  30. buffer = ""
  31. elif len(check.findall(buffer)) > columns:
  32. print "Error: cannot parse line:\n" + buffer
  33. buffer = ""
  34.  
  35.  
  36.  
  37.  
Success #stdin #stdout 0.01s 7736KB
stdin
Standard input is empty
stdout
['"column1a"', '"column2a"', '"column3a,"', '"column\\"this is, a test\\"4a"']
['"column1a2"', '"column2a2"', '"column3a2"', '"column4a2"']
['"column1b"', '"column2b,"', '"column3b"', '"column\\"this is, a test\\"4b"']
['"column1c,"', '"column2c"', '"column3c"', '"column\\"this is, a test\\"4c"']