#!/bin/bash
# ideone boilerplate: run in temp dir
t=$(mktemp -d -t ideone.XXXXXXXX) || exit
trap 'rm -rf "$t"' ERR EXIT
cd "$t"
cat <<\: >some.csv
header,second,third
"quoted
value","over
multiple
lines","with
""quoted""
value
embedded"
back,to,normal
:
cat <<\: >prog.py
import csv
def rawcsv(filename):
with open(filename, "r") as csvdata, open(
filename, "rb") as rawdata:
reader = csv.reader(csvdata)
prev = 0
for row in reader:
# Where is the file pointer now?
pos = reader.line_num
# Read the same amount of rawdata
raw = b"".join([rawdata.readline() for _ in range(pos - prev)])
prev = pos
yield raw, row
for raw, row in rawcsv("some.csv"):
print(f"Raw: {raw}")
print(f"Row: {row}")
:
python3 prog.py
IyEvYmluL2Jhc2gKIyBpZGVvbmUgYm9pbGVycGxhdGU6IHJ1biBpbiB0ZW1wIGRpcgp0PSQobWt0ZW1wIC1kIC10IGlkZW9uZS5YWFhYWFhYWCkgfHwgZXhpdAp0cmFwICdybSAtcmYgIiR0IicgRVJSIEVYSVQKY2QgIiR0IgoKY2F0IDw8XDogPnNvbWUuY3N2CmhlYWRlcixzZWNvbmQsdGhpcmQKInF1b3RlZAp2YWx1ZSIsIm92ZXIKbXVsdGlwbGUKbGluZXMiLCJ3aXRoCiIicXVvdGVkIiIKdmFsdWUKZW1iZWRkZWQiCmJhY2ssdG8sbm9ybWFsCjoKCmNhdCA8PFw6ID5wcm9nLnB5CmltcG9ydCBjc3YKCmRlZiByYXdjc3YoZmlsZW5hbWUpOgogICAgd2l0aCBvcGVuKGZpbGVuYW1lLCAiciIpIGFzIGNzdmRhdGEsIG9wZW4oCiAgICAgICAgICAgIGZpbGVuYW1lLCAicmIiKSBhcyByYXdkYXRhOgogICAgICAgIHJlYWRlciA9IGNzdi5yZWFkZXIoY3N2ZGF0YSkKICAgICAgICBwcmV2ID0gMAogICAgICAgIGZvciByb3cgaW4gcmVhZGVyOgogICAgICAgICAgICAjIFdoZXJlIGlzIHRoZSBmaWxlIHBvaW50ZXIgbm93PwogICAgICAgICAgICBwb3MgPSByZWFkZXIubGluZV9udW0KICAgICAgICAgICAgIyBSZWFkIHRoZSBzYW1lIGFtb3VudCBvZiByYXdkYXRhCiAgICAgICAgICAgIHJhdyA9IGIiIi5qb2luKFtyYXdkYXRhLnJlYWRsaW5lKCkgZm9yIF8gaW4gcmFuZ2UocG9zIC0gcHJldildKQogICAgICAgICAgICBwcmV2ID0gcG9zCiAgICAgICAgICAgIHlpZWxkIHJhdywgcm93Cgpmb3IgcmF3LCByb3cgaW4gcmF3Y3N2KCJzb21lLmNzdiIpOgogICAgcHJpbnQoZiJSYXc6IHtyYXd9IikKICAgIHByaW50KGYiUm93OiB7cm93fSIpCjoKCnB5dGhvbjMgcHJvZy5weQo=
Raw: b'header,second,third\n'
Row: ['header', 'second', 'third']
Raw: b'"quoted\nvalue","over\nmultiple\nlines","with\n""quoted""\nvalue\nembedded"\n'
Row: ['quoted\nvalue', 'over\nmultiple\nlines', 'with\n"quoted"\nvalue\nembedded']
Raw: b'back,to,normal\n'
Row: ['back', 'to', 'normal']