#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <inttypes.h>
#include "f_size/f_size.h" // x-platform routine for getting a filesize
#define FNAME "test1.csv" // csv file to be processed
#define FIELDS_AHEAD 16 // alloc-ahead number of line-fields
#define STEPPED_TIMING 1 // 0 to disable individual timing for passes
// during mem parsing
typedef struct {
size_t nfields; // count of currently used fields
char **fields; // fields buffer (array of cstrings)
} Line;
typedef struct
{
// mostly used for loading a csv file in mem
char *buf;
char *fname;
int64_t fsize;
int64_t n;
// mostly used for parsing loaded data
Line *lines;
int64_t nlines;
} CallbackData;
/*********************************************************//**
* Return the size of a file in bytes, or -1 on error (it only
* works for files with size <= LONG_MAX).
* N O T E : Use this function if the more sophisticated f_size()
* does not work with your compiler (see: #include "f_size.h").
*************************************************************
*/
long int file_size( const char *fname )
{
long int size;
FILE *fp;
if ( NULL
== (fp
= fopen(fname
, "rb")) ) /* binary mode */ return -1;
if ( 0 != fseek(fp
, 0, SEEK_END
) ) { return -1;
}
return size;
}
/*********************************************************//**
* Return the wall-clock time spent for executing the callback
* function, in secs. On error, return -1.0
*************************************************************
*/
#include <sys/time.h>
double time_it( void *userdata, int (*callback)(void *userdata) )
{
double tstart, tend;
struct timeval tv;
gettimeofday( &tv, NULL );
tstart = tv.tv_sec + tv.tv_usec / 1000000.0;
/* Code we want timed here */
if ( 0 == (*callback)(userdata) ) {
return -1.0;
}
gettimeofday( &tv, NULL );
tend = tv.tv_sec + tv.tv_usec / 1000000.0;
return tend - tstart;
}
/*********************************************************//**
* Return the cpu-time spent for executing the callback function,
* in secs. On error, return -1.0
* N O T E : Depending on the implementation, on Windows it MOST
* PROBABLY returns the wall-clock time instead of the cpu-time.
*************************************************************
*/
#include <time.h>
double clock_it( void *userdata, int (*callback)(void *userdata) )
{
clock_t tstart
= clock();
/* Code we want timed here */
if ( 0 == (*callback)(userdata) ) {
return -1.0;
}
return ((double) (clock() - tstart
)) / CLOCKS_PER_SEC
; }
/*********************************************************//**
* x-platform alternative to Windows system( "pause" )
*************************************************************
*/
void press_enter( void )
{
int c = '\0';
while ( '\n' != (c
=getchar()) && EOF
!= c
) ;
}
/*********************************************************//**
* Duplicate a c-string. Return NULL on error.
*************************************************************
*/
char *s_strdup( const char *src )
{
char *s = NULL;
size_t size = 0;
/* sanity check */
if ( !src ) {
return NULL;
}
return NULL;
}
return memcpy( s
, src
, size
); }
/*********************************************************//**
* Timing callback function for reading a file into a buffer.
* All required variables are passed and/or get modified via
* the data pointer.
*************************************************************
*/
int cb_read_file_to_buf( void *data )
{
CallbackData *d = (CallbackData *) data;
d->fsize = f_size( d->fname );
if ( d->fsize < 1 ) {
return 0;
}
d
->buf
= malloc( 1 + d
->fsize
); if ( ! d->buf ) {
return 0;
}
d->buf[ d->fsize ] = '\0';
FILE
*fp
= fopen( d
->fname
, "r" ); if ( !fp ) {
d->buf = NULL;
return 0;
}
d
->n
= fread( d
->buf
, 1, d
->fsize
, fp
); d->buf = NULL;
return 0;
}
d->buf[ d->n ] = '\0';
return 1;
}
/*********************************************************//**
* Append the cstring data as a field into the given line.
* idx must correspond to the 1st empty slot in the fields-buffer
* of the line. If idx is also the last slot of the fields-buffer,
* then the buffer gets increased by FIELDS_AHEAD fields. On error,
* the function returns 0.
* N O T E : No sanity check on arguments (for improved efficiency).
*************************************************************
*/
static inline int line_append_field( Line *line, size_t idx, char *data )
{
if ( idx % FIELDS_AHEAD == 0 ) {
line->fields,
(idx + FIELDS_AHEAD) * sizeof(char *)
);
if ( !try ) {
line->nfields = idx;
return 0;
}
line->fields = try;
}
line->fields[idx] = s_strdup( data );
if ( !line->fields[idx] ) {
return 0;
}
return 1;
}
/*********************************************************//**
* Timing callback function for counting lines in the csv buf.
* All required variables are passed and/or get modified via
* the data pointer.
* N O T E : This function is used when STEPPED_TIMING is enabled.
*************************************************************
*/
static inline int cb_parse_csvbuf_pass1( void *data )
{
CallbackData *d = (CallbackData *) data;
// if ( !d || !d->buf ) {
// return 0;
// }
d->nlines = 0;
for (char *cp = d->buf; *cp; cp++) {
if ( '\n' == *cp )
(d->nlines)++;
}
return 1;
}
/*********************************************************//**
* Timing callback function for converting csv buf into Line structs.
* All required variables are passed and/or get modified via
* the data pointer.
* N O T E : This function is used when STEPPED_TIMING is enabled.
*************************************************************
*/
static inline int cb_parse_csvbuf_pass2( void *data )
{
CallbackData *d = (CallbackData *) data;
// if ( !d || !d->buf || d->nlines < 1 ) {
// return 0;
// }
d
->lines
= calloc( d
->nlines
, sizeof(Line
) ); if ( !d->lines ) {
return 0;
}
int64_t l = 0; // lines counter
size_t f = 0; // fields counter for current line
char *cp = d->buf;
char *pre = d->buf;
while ( *cp )
{
if ( ',' == *cp ) {
*cp = '\0';
if ( !line_append_field(&d->lines[l], f, pre) ) {
return 0;
}
f++;
pre = cp + 1;
}
else if ( '\n' == *cp ) {
*cp = '\0';
if ( !line_append_field(&d->lines[l], f, pre) ) {
return 0;
}
d->lines[l].nfields = 1 + f;
f = 0;
l++;
pre = cp + 1;
}
cp++;
}
return 1;
}
/*********************************************************//**
* Timing callback function for counting lines in the csv buf
* and then converting the csv buf into an array of Line structs.
* All required variables are passed and/or get modified via
* the data pointer.
* N O T E : This function is used when STEPPED_TIMING is disabled.
*************************************************************
*/
int cb_parse_csvbuf( void *data )
{
return cb_parse_csvbuf_pass1(data) && cb_parse_csvbuf_pass2(data);
}
/*********************************************************//**
* Free memory reserved for the given array of Line structs
*************************************************************
*/
void lines_destroy( Line *lines, int64_t nlines )
{
if ( !lines ) {
return;
}
for (int64_t l=0; l < nlines; l++) {
for (size_t f=0; f < lines[l].nfields; f++) {
if ( lines[l].fields[f] ) {
free( lines
[l
].
fields[f
] ); }
}
}
}
/*********************************************************//**
* Print the given array of Line structs
*************************************************************
*/
void lines_print( const Line *lines, int64_t nlines )
{
if ( !lines ) {
return;
}
for (int64_t l=0; l < nlines; l++)
{
"---- line: %" PRId64 " (%zu fields) ----\n",
l+1,
lines[l].nfields
);
for (size_t f=0; f < lines[l].nfields; f++)
{
char *s = lines[l].fields[f];
if ( s ) {
}
}
}
}
/*********************************************************//**
*
*************************************************************
*/
int main( void )
{
CallbackData cd = {
.buf = NULL,
.fname = FNAME,
.fsize = 0,
.n = 0,
.lines = NULL,
.nlines = 0,
};
double elapsed0 = 0.0;
double elapsed1 = 0.0;
double elapsed2 = 0.0;
/* Read csv file into memory buffer. */
printf( "*** %s ****\n\n", cd.
fname );
elapsed0 = time_it( (void *)&cd, cb_read_file_to_buf );
if ( -1.0 == elapsed0 ) {
goto exit_failure;
}
"%.5f secs\n(%" PRId64 " text-bytes | %" PRId64 " binary-bytes)\n\n",
elapsed0,
cd.n,
cd.fsize
);
/* Parse buf into lines & fields. */
#if STEPPED_TIMING
elapsed1 = time_it( (void *)&cd, cb_parse_csvbuf_pass1 );
if ( -1.0 == elapsed1 ) {
goto exit_failure;
}
printf( "%-.5f secs\n", elapsed1
);
elapsed2 = time_it( (void *)&cd, cb_parse_csvbuf_pass2 );
if ( -1.0 == elapsed2 ) {
goto exit_failure;
}
printf( "%-.5f secs\n", elapsed2
);
"Parsed in: %-.5f secs (%" PRId64 " lines)\n\n",
elapsed1 + elapsed2,
cd.nlines
);
#else // #if !STEPPED_TIMING
elapsed1 = time_it( (void *)&cd, cb_parse_csvbuf );
if ( -1.0 == elapsed1 ) {
goto exit_failure;
}
printf( "%-.5f secs (%" PRId64
" lines)\n\n", elapsed1
, cd.
nlines ); #endif
"Total: %-.5f secs (loading + parsing)\n",
elapsed0 + elapsed1 + elapsed2
);
//lines_print( cd.lines, cd.nlines );
/* Cleanup and exit. */
printf( "\ncleaning up... " );
lines_destroy( cd.lines, cd.nlines );
press_enter();
exit_failure:
fputs( "*** error ***\n", stderr
); printf( "\ncleaning up... " ); if ( cd.lines ) {
lines_destroy( cd.lines, cd.nlines );
}
if ( cd.buf ) {
}
press_enter();
}