/**********************************************************
 * CSV.C
 * A simple csv-parser.
 *
 * It was written due to a post at a popular Greek forum:
 * http://w...content-available-to-author-only...a.gr/topic/530662-coding-challenge/
 *
 * The parser supports two parsing-models:
 * a) FLAT : Tokens are stored in memory as consecutive fields
 * b) LINED: Tokens are stored in memory as lines of fields
 *
 * Limitations:
 *
 * a) Only single-byte strings (i.e. no Unicode support)
 * b) Only commas and newlines are recognised as delimiters
 * c) No handling of quoted tokens (they are stored unchanged)
 * d) No handling of blanks among tokens (they are embedded in tokens)
 * f) In LINED model, all lines MUST have the same number of tokens
 * g) CSV-files larger than the available memory are not handled.
 *
 * Implementation Notes:
 *
 * 1. Most (if not all) utility functions perform NO sanity-checks
 *    on their arguments. Constructors, however, return NULL on error.
 *
 * 2. CSV files are read at once into a memory buffer, via fread(),
 *    and then the buffer is parsed in memory. This is usually faster
 *    than reading & parsing blocks of the file, unless the write-cache
 *    of the OS gets fulled. Also, files larger than the available
 *    memory will not be loaded.
 *
 * 3. The Csv constructors build their parsing-model mostly by setting
 *    pointers on the loaded buffer. Thus, the parsing does not involve
 *    memory copying. It only involves minimal memory allocation for
 *    the internal structure of the parsing-model, along with at least
 *    one traversing of the buffered data.
 *
 * 4. The LINED parsing-model represents more accurately the logical
 *    structure of the csv-file, but the FLAT parsing-model takes
 *    less time to get initialized and populated.
 *
 * 5. For both models, a specialized _foreach() constructor is provided
 *    accepting a callback function and userdata as the last parameters.
 *    During the construction of the parsing-model, this function is
 *    called upon each parsing-unit (CsvString units for the FLAT model,
 *    CsvLine units for the LINED model).
 *
 *    For complex calculations on all the data, this MAY decrease the
 *    overall time needed. However, for simple calculation it MAY
 *    increase the overall time (due to the overhead involved for the
 *    function calls and the extra pointers dereferencing).
 *
 *    So, DO NOT use the _foreach() constructors blindly!
 *
 *    For the LINED model, you can use the function csvline_field()
 *    inside your callback function, in order to get a pointer to
 *    any field of the current line. For example, to print the cstring
 *    and the length of the 4th field of EACH line during construction,
 *    you can use a callback function like the following:
 *
 *    int callback_lined_foreach_print_4th_token(CsvLine *csvline, void *dummy)
 *    {
 *          CsvString *field = csvline_field(csvline, 3); // 0-based index
 *          printf( "%s (len: %zd)\n", field->cstr, field->len );
 *          return 1;
 *    }
 *
 *    Now, your main() function may be something like the following:
 *
 *    int main( void )
 *    {
 *        Csv *csv = make_lined_csv_foreach(
 *                       "test.csv",
 *                       callback_lined_foreach_print_4th_token,
 *                       NULL
 *                       );
 *        if ( !csv ) {
 *            return 1;
 *        }
 *        csv_free( csv );
 *        return 0;
 *    }
 **********************************************************
 */

#ifndef CSV_H
#define CSV_H

#include <stddef.h>

/* ---------------------------
 * Constants & Macros
 * ---------------------------
 */

/*
 * Supported Parsing Models
 */
enum {
	CSVNONE = 0,
	CSVFLAT,
	CSVLINED
};

/* ---------------------------
 * Data Types
 * ---------------------------
 */

/*
 * The dominant Csv data-type is hidden (can only be manipulated via funcs)
 */
typedef struct Csv Csv;

/* CsvString is exposed so clients can access directly the len and cstr
 * fields of pointers returned by csv_flat_field() & csv_lined_field().
 * Clients may also pass CsvString pointers to the csv_eql_cstr_field() function.
 */
typedef struct CsvString CsvString;
struct CsvString {
	size_t len;
	char   *cstr;
};

typedef struct _CsvLine CsvLine;

/* ---------------------------
 * Function Prototypes
 * ---------------------------
 */

#ifndef CSV_C
/*
 * constructors & destructors
 */
extern Csv  *make_csv( const char *fname, int typid );
extern Csv  *make_flat_csv_foreach(
		const char *fname,
		int (*foreach)(CsvString *field, void *userdata),
		void *userdata
		);
extern Csv *make_lined_csv_foreach(
		const char *fname,
		int (*foreach)(CsvLine *csvline, void *userdata),
		void *userdata
		);

extern Csv  *csv_free( Csv *csv );

/*
 * for data parsed in FLAT model
 */
extern size_t    csv_flat_nfields( Csv *csv );
extern CsvString *csv_flat_field( Csv *csv, size_t idx );

/*
 * for data parsed in LINED model
 */
extern size_t    csv_lined_nlines( Csv *csv );
extern int       csv_lined_nfields( Csv *csv );
extern CsvString *csv_lined_field( Csv *csv, size_t ln, int idx );
extern CsvString *csvline_field( CsvLine *csvline, int idx );

/*
 * misc functions
 */
extern void      csv_print( const Csv *csv );

/**
 @brief Check byte equality between a plain cstring and a CsvString->cstr.
 @param cstrlen: The length of the plain cstring, w/o the trailing NUL byte.
 @param cstring: The plain cstring to be compared.
 @param *field:  A pointer to an already csv parsed field, whose .cstr
                 member is to be checked against cstring.
 @note	Depending on the nature of the parsed data, this function may be
        faster than strcmp/memcmp because it calls memcmp only if cstrlen
        differs from field->len.
 */
extern int csv_eql_cstr_field( size_t cstrlen, char *cstring, CsvString *field);
#endif

#endif
