#ifndef CSV_C
#define CSV_C
#endif
/**********************************************************
* CSV.C
* A simple csv-parser.
*
* It was written due to a post at a popular Greek forum:
* http://w...content-available-to-author-only...a.gr/topic/530662-coding-challenge/
*
* For features, limitations, etc pleases see the file: csv.h
**********************************************************
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "csv.h"
#include "f_size/f_size.h" // see: http://i...content-available-to-author-only...e.com/eCXzVn
#define CSV_VALIDTYPE( typid ) \
( CSVFLAT == (typid) || CSVLINED == (typid) )
struct _CsvLine {
// size_t nfields; // count of currently used fields (not supported)
CsvString *fields; // dynamic array
};
struct _CsvLined {
size_t nlines; // total count of parsed lines
int nfields; // count of fields per parsed line
struct _CsvLine *data; // dynamic array
};
struct _CsvFlat {
size_t nfields; // total count of parsed fields
CsvString *data; // dynamic array
};
struct Csv {
char *buf;
int typid; // model id
struct _CsvFlat *flat;
struct _CsvLined *lined;
};
/*********************************************************//**
* Two private macros to be inlined inside the FLAT model
* constructors (they will save me typing)
*************************************************************
*/
/* ---------------------------------------------------- */
#define _PREPARE_FLAT_MODEL_OR_DIE( csv, fname, f )\
do {\
(csv) = calloc( 1, sizeof( *(csv) ) );\
if ( !(csv) ) {\
return NULL;\
}\
(csv)->flat = calloc( 1, sizeof( *((csv)->flat) ) );\
if ( !(csv)->flat ) {\
free( (csv) );\
return NULL;\
}\
(csv)->buf = _make_csvbuf( (fname) );\
if ( !(csv)->buf ) {\
free( (csv)->flat );\
free( (csv) );\
return NULL;\
}\
\
(csv)->typid = CSVFLAT;\
\
/* count fields present in buf */\
(f) = 0;\
char *cp = (csv)->buf;\
for (; *cp; cp++) {\
switch (*cp) {\
case ',': case '\n': (f)++;\
}\
}\
\
/* allocate mem for nfields */\
(csv)->flat->nfields = (f);\
(csv)->flat->data = calloc( (f), sizeof(*((csv)->flat->data)) );\
if ( !(csv)->flat->data ) {\
free( (csv)->flat );\
free( (csv)->buf );\
free( (csv) );\
return NULL;\
}\
}while(0)
/* ---------------------------------------------------- */
#define _POPULATE_FLAT_MODEL(csv, f, callback, userdata)\
do {\
(f) = 0;\
for (char *cp=(csv)->buf, *pre=cp; *cp; cp++)\
{\
switch (*cp) {\
case ',':\
case '\n':\
*cp = '\0';\
(csv)->flat->data[(f)].cstr = pre;\
(csv)->flat->data[(f)].len = cp - pre;\
if ( (callback) ) {\
(*callback)(&(csv)->flat->data[(f)], (userdata));\
}\
pre = cp + 1;\
(f)++;\
break;\
}\
}\
}while(0)
/*********************************************************//**
* Two private macros to be inlined inside the LINED model
* constructors (they will save me typing)
*************************************************************
*/
/* ---------------------------------------------------- */
#define _PREPARE_LINED_MODEL_OR_DIE( csv, fname, ln, f )\
do {\
(csv) = calloc( 1, sizeof(*(csv)) );\
if ( !(csv) ) {\
return NULL;\
}\
(csv)->lined = calloc( 1, sizeof( *((csv)->lined) ) );\
if ( !(csv)->lined ) {\
free( (csv) );\
return NULL;\
}\
(csv)->buf = _make_csvbuf( fname );\
if ( !(csv)->buf ) {\
free( (csv)->lined );\
free( (csv) );\
return NULL;\
}\
\
(csv)->typid = CSVLINED;\
\
/* count total lines */\
(ln) = 0;\
char *cp = (csv)->buf;\
for (; *cp; cp++) {\
if ( '\n' == *cp ) {\
(ln)++;\
}\
}\
/* count fields of 1st line (we assume all lines have same # of fields) */\
f = 0;\
for (cp = (csv)->buf; *cp && '\n' != *cp; cp++) {\
if ( ',' == *cp ) {\
(f)++;\
}\
}\
(f)++;\
\
/* allocate mem for ln lines, each one having f fields */\
(csv)->lined->data = calloc( (ln), sizeof( *((csv)->lined->data) ) );\
if ( !(csv)->lined->data ) {\
free( (csv)->lined );\
free( (csv)->buf );\
free( (csv) );\
}\
for (size_t i=0; i < ln; i++) {\
(csv)->lined->data[i].fields = calloc( (f), sizeof( *((csv)->lined->data[i].fields)) );\
if ( !(csv)->lined->data[i].fields ) {\
for (size_t j=0; j < i; j++) {\
free( (csv)->lined->data[j].fields );\
}\
free( (csv)->lined->data );\
free( (csv)->lined );\
free( (csv)->buf );\
free( (csv) );\
return NULL;\
}\
}\
(csv)->lined->nlines = (ln); /* total lines */\
(csv)->lined->nfields = (f); /* fields per line */\
}while(0)
/* ---------------------------------------------------- */
#define _POPULATE_LINED_MODEL(csv, ln, f, callback, userdata)\
do {\
(ln) = (f) = 0;\
for (char *cp=(csv)->buf, *pre=cp; *cp; cp++)\
{\
switch (*cp) {\
case ',': {\
*cp = '\0';\
CsvString *s = &(csv)->lined->data[(ln)].fields[(f)];\
s->cstr = pre; /* no mem copying */\
s->len = cp - pre;\
pre = cp + 1;\
(f)++;\
break;\
}\
\
case '\n': {\
*cp = '\0';\
CsvString *s = &(csv)->lined->data[(ln)].fields[(f)];\
s->cstr = pre; /* no mem copying */\
s->len = cp - pre;\
if ( callback ) {\
(*callback)(&(csv)->lined->data[(ln)], userdata);\
}\
pre = cp + 1;\
(f) = 0;\
(ln)++;\
break;\
}\
}\
}\
}while(0)
/*********************************************************//**
* (Private Helper) The csv file is first read as a whole in csv->buf,
* via fread(). This buffer will be LATER ON parsed & modified by the
* constructors of the supported parsing-models (currently FLAT and LINED).
*************************************************************
*/
static inline char *_make_csvbuf( const char *fname )
{
int64_t fsize = f_size( fname );
if ( fsize < 1 ) {
return NULL;
}
FILE
*fp
= fopen( fname
, "rb" ); if ( !fp ) {
return NULL;
}
char *buf
= calloc( 1, 1+fsize
); if ( !buf ) {
return NULL;
}
size_t n
= fread( buf
, 1, fsize
, fp
); return NULL;
}
return buf;
}
/*********************************************************//**
* (Private Helper) Constructor of the FLAT model
*************************************************************
*/
static inline Csv *_make_flat_csv( const char *fname )
{
Csv *csv = NULL;
size_t f = 0; // fields counter
int (*dummy)(CsvString *, void *) = NULL;
_PREPARE_FLAT_MODEL_OR_DIE( csv, fname, f );
_POPULATE_FLAT_MODEL( csv, f, dummy, NULL );
return csv;
}
/*********************************************************//**
* (Private Helper) Constructor of the LINED model
*************************************************************
*/
static inline Csv *_make_lined_csv( const char *fname )
{
Csv *csv = NULL;
size_t ln = 0; // lines counter
int f = 0; // fields counter
int (*dummy)(CsvLine *, void *userdata) = NULL;
_PREPARE_LINED_MODEL_OR_DIE( csv, fname, ln, f );
_POPULATE_LINED_MODEL( csv, ln, f, dummy, NULL );
return csv;
}
/*********************************************************//**
* Normal Constructor:
* Parsed csv-files are stored in memory either as consecutive
* fields (FLAT model) or as lines of fields (LINED model).
* The typid argument dictates which model will be used.
*************************************************************
*/
Csv *make_csv( const char *fname, int typid )
{
if ( !fname || !CSV_VALIDTYPE(typid) ) {
return NULL;
}
Csv *csv = NULL;
switch ( typid )
{
case CSVFLAT:
return _make_flat_csv( fname );
case CSVLINED:
return _make_lined_csv( fname );
}
return csv;
}
/*********************************************************//**
* Specialized Constructor of the FLAT model with foreach callback
*************************************************************
*/
Csv *make_flat_csv_foreach(
const char *fname,
int (*foreach)(CsvString *field, void *userdata),
void *userdata
)
{
Csv *csv = NULL;
size_t f = 0; // fields counter
_PREPARE_FLAT_MODEL_OR_DIE( csv, fname, f );
_POPULATE_FLAT_MODEL( csv, f, foreach, userdata );
return csv;
}
/*********************************************************//**
* Specialized Constructor of the LINED model with foreach callback
*************************************************************
*/
Csv *make_lined_csv_foreach(
const char *fname,
int (*foreach)(CsvLine *csvline, void *userdata),
void *userdata
)
{
Csv *csv = NULL;
size_t ln = 0; // lines counter
int f = 0; // fields counter
_PREPARE_LINED_MODEL_OR_DIE( csv, fname, ln, f );
_POPULATE_LINED_MODEL( csv, ln, f, foreach, userdata );
return csv;
}
/*********************************************************//**
* Destructor
*************************************************************
*/
Csv *csv_free( Csv *csv )
{
if ( !csv ) {
return NULL;
}
// if ( csv->buf ) {
// }
if ( CSVFLAT == csv->typid && csv->flat ) {
//csv->flat->data = NULL;
}
else if ( CSVLINED == csv->typid && csv->lined )
{
if ( csv->lined->data ) {
size_t nlines = csv->lined->nlines;
for (size_t ln=0; ln < nlines; ln++) {
free( csv
->lined
->data
[ln
].
fields ); }
free( csv
->lined
->data
); }
}
return NULL;
}
/*********************************************************//**
*
*************************************************************
*/
void csv_print( const Csv *csv )
{
if ( !csv ) {
return;
}
int typid = csv->typid;
if ( CSVFLAT == typid && csv->flat && csv->flat->data )
{
size_t nfields = csv->flat->nfields;
printf( "[ %zd fields ]\n", nfields
);
for (size_t i=0; i < nfields; i++) {
"%s [len: %zd]\n",
csv->flat->data[i].cstr,
csv->flat->data[i].len
);
}
}
else if ( CSVLINED == typid && csv->lined && csv->lined->data )
{
size_t nlines = csv->lined->nlines;
int nfields = csv->lined->nfields; // lines have same # of fields
printf( "[ %zd lines ]\n", nlines
);
for (size_t ln=0; ln < nlines; ln++)
{
printf( "--- line %zd (%d fields) ----\n", 1+ln
, nfields
); for (int f=0; f < nfields; f++) {
"%s [len: %zd]\n",
csv->lined->data[ln].fields[f].cstr,
csv->lined->data[ln].fields[f].len
);
}
}
}
else {
puts( "*** error: invalid typid OR no data" ); }
}
/*********************************************************//**
* (FLAT model) Return the total count of parsed fields.
*************************************************************
*/
size_t csv_flat_nfields( Csv *csv )
{
return csv->flat->nfields;
}
/*********************************************************//**
* (FLAT model) Return a pointer to the field specified by idx.
*************************************************************
*/
CsvString *csv_flat_field( Csv *csv, size_t idx )
{
return &csv->flat->data[idx];
}
/*********************************************************//**
* (LINED model) Return the total count of parsed lines.
*************************************************************
*/
size_t csv_lined_nlines( Csv *csv )
{
return csv->lined->nlines;
}
/*********************************************************//**
* (LINED model) Return the count of fields per parsed line.
* NOTE: the LINED model assumes fixed number of fields per line!
*************************************************************
*/
int csv_lined_nfields( Csv *csv )
{
return csv->lined->nfields;
}
/*********************************************************//**
* (LINED model) Given a Csv, return a pointer to the field
* specified by idx in the line specified by ln.
*************************************************************
*/
CsvString *csv_lined_field( Csv *csv, size_t ln, int idx )
{
return &csv->lined->data[ln].fields[idx];
}
/*********************************************************//**
* (LINED model) Given a CsvLine, return a pointer to the
* field specified by idx.
* NOTE: This function is useful inside callback functions to
* be passed to the _foreach() constructor (see csv.h
* for details and sample code).
*************************************************************
*/
CsvString *csvline_field( CsvLine *csvline, int idx )
{
return &csvline->fields[idx];
}
/*********************************************************//**
* Compare equality of a plain cstring against a csv field (CsvString).
* NOTE: See csv.h for a brief description of this function.
*************************************************************
*/
int csv_eql_cstr_field( size_t cstrlen, char *cstring, CsvString *field )
{
return cstrlen
== field
->len
&& !memcmp( cstring
, field
->cstr
, cstrlen
); }