fork download
  1. #include <stdio.h>
  2. #include <string.h>
  3. #include <stdlib.h>
  4. #include <inttypes.h>
  5.  
  6. #include "f_size/f_size.h" // x-platform routine for getting a filesize
  7.  
  8. #define FNAME "test1.csv" // csv file to be processed
  9. #define FIELDS_AHEAD 16 // alloc-ahead number of line-fields
  10. #define STEPPED_TIMING 1 // 0 to disable individual timing for passes
  11. // during mem parsing
  12.  
  13. typedef struct {
  14. size_t nfields; // count of currently used fields
  15. char **fields; // fields buffer (array of cstrings)
  16. } Line;
  17.  
  18. typedef struct
  19. {
  20. // mostly used for loading a csv file in mem
  21. char *buf;
  22. char *fname;
  23. int64_t fsize;
  24. int64_t n;
  25.  
  26. // mostly used for parsing loaded data
  27. Line *lines;
  28. int64_t nlines;
  29. } CallbackData;
  30.  
  31. /*********************************************************//**
  32.  * Return the size of a file in bytes, or -1 on error (it only
  33.  * works for files with size <= LONG_MAX).
  34.  * N O T E : Use this function if the more sophisticated f_size()
  35.  * does not work with your compiler (see: #include "f_size.h").
  36.  *************************************************************
  37.  */
  38. long int file_size( const char *fname )
  39. {
  40. long int size;
  41. FILE *fp;
  42.  
  43. if ( NULL == (fp = fopen(fname, "rb")) ) /* binary mode */
  44. return -1;
  45.  
  46. if ( 0 != fseek(fp, 0, SEEK_END) ) {
  47. fclose(fp);
  48. return -1;
  49. }
  50.  
  51. size = ftell( fp );
  52. fclose( fp );
  53.  
  54. return size;
  55. }
  56.  
  57. /*********************************************************//**
  58.  * Return the wall-clock time spent for executing the callback
  59.  * function, in secs. On error, return -1.0
  60.  *************************************************************
  61.  */
  62. #include <sys/time.h>
  63. double time_it( void *userdata, int (*callback)(void *userdata) )
  64. {
  65. double tstart, tend;
  66. struct timeval tv;
  67.  
  68. gettimeofday( &tv, NULL );
  69. tstart = tv.tv_sec + tv.tv_usec / 1000000.0;
  70.  
  71. /* Code we want timed here */
  72. if ( 0 == (*callback)(userdata) ) {
  73. return -1.0;
  74. }
  75.  
  76. gettimeofday( &tv, NULL );
  77. tend = tv.tv_sec + tv.tv_usec / 1000000.0;
  78.  
  79. return tend - tstart;
  80. }
  81.  
  82. /*********************************************************//**
  83.  * Return the cpu-time spent for executing the callback function,
  84.  * in secs. On error, return -1.0
  85.  * N O T E : Depending on the implementation, on Windows it MOST
  86.  * PROBABLY returns the wall-clock time instead of the cpu-time.
  87.  *************************************************************
  88.  */
  89. #include <time.h>
  90. double clock_it( void *userdata, int (*callback)(void *userdata) )
  91. {
  92. clock_t tstart = clock();
  93.  
  94. /* Code we want timed here */
  95. if ( 0 == (*callback)(userdata) ) {
  96. return -1.0;
  97. }
  98.  
  99. return ((double) (clock() - tstart)) / CLOCKS_PER_SEC;
  100. }
  101.  
  102. /*********************************************************//**
  103.  * x-platform alternative to Windows system( "pause" )
  104.  *************************************************************
  105.  */
  106. void press_enter( void )
  107. {
  108. int c = '\0';
  109. printf( "Press ENTER..." );
  110. fflush( stdout );
  111. while ( '\n' != (c=getchar()) && EOF != c )
  112. ;
  113. }
  114.  
  115. /*********************************************************//**
  116.  * Duplicate a c-string. Return NULL on error.
  117.  *************************************************************
  118.  */
  119. char *s_strdup( const char *src )
  120. {
  121. char *s = NULL;
  122. size_t size = 0;
  123.  
  124. /* sanity check */
  125. if ( !src ) {
  126. return NULL;
  127. }
  128.  
  129. size = 1 + strlen( src );
  130. if ( !(s = malloc(size)) ) {
  131. return NULL;
  132. }
  133.  
  134. return memcpy( s, src, size );
  135. }
  136.  
  137. /*********************************************************//**
  138.  * Timing callback function for reading a file into a buffer.
  139.  * All required variables are passed and/or get modified via
  140.  * the data pointer.
  141.  *************************************************************
  142.  */
  143. int cb_read_file_to_buf( void *data )
  144. {
  145. CallbackData *d = (CallbackData *) data;
  146.  
  147. d->fsize = f_size( d->fname );
  148. if ( d->fsize < 1 ) {
  149. return 0;
  150. }
  151.  
  152. d->buf = malloc( 1 + d->fsize );
  153. if ( ! d->buf ) {
  154. return 0;
  155. }
  156. d->buf[ d->fsize ] = '\0';
  157.  
  158. FILE *fp = fopen( d->fname, "r" );
  159. if ( !fp ) {
  160. free( d->buf );
  161. d->buf = NULL;
  162. return 0;
  163. }
  164.  
  165. d->n = fread( d->buf, 1, d->fsize, fp );
  166. if ( ferror(fp) ) {
  167. free( d->buf );
  168. d->buf = NULL;
  169. fclose( fp );
  170. return 0;
  171. }
  172. fclose( fp );
  173. d->buf[ d->n ] = '\0';
  174.  
  175. return 1;
  176. }
  177.  
  178. /*********************************************************//**
  179.  * Append the cstring data as a field into the given line.
  180.  * idx must correspond to the 1st empty slot in the fields-buffer
  181.  * of the line. If idx is also the last slot of the fields-buffer,
  182.  * then the buffer gets increased by FIELDS_AHEAD fields. On error,
  183.  * the function returns 0.
  184.  * N O T E : No sanity check on arguments (for improved efficiency).
  185.  *************************************************************
  186.  */
  187. static inline int line_append_field( Line *line, size_t idx, char *data )
  188. {
  189. if ( idx % FIELDS_AHEAD == 0 ) {
  190. char **try = realloc(
  191. line->fields,
  192. (idx + FIELDS_AHEAD) * sizeof(char *)
  193. );
  194. if ( !try ) {
  195. line->nfields = idx;
  196. return 0;
  197. }
  198. line->fields = try;
  199. }
  200.  
  201. line->fields[idx] = s_strdup( data );
  202. if ( !line->fields[idx] ) {
  203. return 0;
  204. }
  205.  
  206. return 1;
  207. }
  208.  
  209. /*********************************************************//**
  210.  * Timing callback function for counting lines in the csv buf.
  211.  * All required variables are passed and/or get modified via
  212.  * the data pointer.
  213.  * N O T E : This function is used when STEPPED_TIMING is enabled.
  214.  *************************************************************
  215.  */
  216. static inline int cb_parse_csvbuf_pass1( void *data )
  217. {
  218. CallbackData *d = (CallbackData *) data;
  219. // if ( !d || !d->buf ) {
  220. // return 0;
  221. // }
  222.  
  223. d->nlines = 0;
  224. for (char *cp = d->buf; *cp; cp++) {
  225. if ( '\n' == *cp )
  226. (d->nlines)++;
  227. }
  228.  
  229. return 1;
  230. }
  231.  
  232. /*********************************************************//**
  233.  * Timing callback function for converting csv buf into Line structs.
  234.  * All required variables are passed and/or get modified via
  235.  * the data pointer.
  236.  * N O T E : This function is used when STEPPED_TIMING is enabled.
  237.  *************************************************************
  238.  */
  239. static inline int cb_parse_csvbuf_pass2( void *data )
  240. {
  241. CallbackData *d = (CallbackData *) data;
  242.  
  243. // if ( !d || !d->buf || d->nlines < 1 ) {
  244. // return 0;
  245. // }
  246.  
  247. d->lines = calloc( d->nlines, sizeof(Line) );
  248. if ( !d->lines ) {
  249. return 0;
  250. }
  251.  
  252. int64_t l = 0; // lines counter
  253. size_t f = 0; // fields counter for current line
  254. char *cp = d->buf;
  255.  
  256. char *pre = d->buf;
  257. while ( *cp )
  258. {
  259. if ( ',' == *cp ) {
  260. *cp = '\0';
  261.  
  262. if ( !line_append_field(&d->lines[l], f, pre) ) {
  263. return 0;
  264. }
  265. f++;
  266. pre = cp + 1;
  267. }
  268. else if ( '\n' == *cp ) {
  269. *cp = '\0';
  270.  
  271. if ( !line_append_field(&d->lines[l], f, pre) ) {
  272. return 0;
  273. }
  274. d->lines[l].nfields = 1 + f;
  275. f = 0;
  276. l++;
  277. pre = cp + 1;
  278. }
  279. cp++;
  280. }
  281.  
  282. return 1;
  283. }
  284.  
  285. /*********************************************************//**
  286.  * Timing callback function for counting lines in the csv buf
  287.  * and then converting the csv buf into an array of Line structs.
  288.  * All required variables are passed and/or get modified via
  289.  * the data pointer.
  290.  * N O T E : This function is used when STEPPED_TIMING is disabled.
  291.  *************************************************************
  292.  */
  293. int cb_parse_csvbuf( void *data )
  294. {
  295. return cb_parse_csvbuf_pass1(data) && cb_parse_csvbuf_pass2(data);
  296. }
  297.  
  298. /*********************************************************//**
  299.  * Free memory reserved for the given array of Line structs
  300.  *************************************************************
  301.  */
  302. void lines_destroy( Line *lines, int64_t nlines )
  303. {
  304. if ( !lines ) {
  305. return;
  306. }
  307.  
  308. for (int64_t l=0; l < nlines; l++) {
  309. for (size_t f=0; f < lines[l].nfields; f++) {
  310. if ( lines[l].fields[f] ) {
  311. free( lines[l].fields[f] );
  312. }
  313. }
  314. }
  315. free( lines );
  316. }
  317.  
  318. /*********************************************************//**
  319.  * Print the given array of Line structs
  320.  *************************************************************
  321.  */
  322. void lines_print( const Line *lines, int64_t nlines )
  323. {
  324. if ( !lines ) {
  325. return;
  326. }
  327.  
  328. for (int64_t l=0; l < nlines; l++)
  329. {
  330. "---- line: %" PRId64 " (%zu fields) ----\n",
  331. l+1,
  332. lines[l].nfields
  333. );
  334. for (size_t f=0; f < lines[l].nfields; f++)
  335. {
  336. char *s = lines[l].fields[f];
  337. if ( s ) {
  338. puts( *s ? s : "\\0" );
  339. }
  340. }
  341. }
  342. }
  343.  
  344. /*********************************************************//**
  345.  *
  346.  *************************************************************
  347.  */
  348. int main( void )
  349. {
  350. CallbackData cd = {
  351. .buf = NULL,
  352. .fname = FNAME,
  353. .fsize = 0,
  354. .n = 0,
  355.  
  356. .lines = NULL,
  357. .nlines = 0,
  358. };
  359.  
  360. double elapsed0 = 0.0;
  361. double elapsed1 = 0.0;
  362. double elapsed2 = 0.0;
  363.  
  364. /* Read csv file into memory buffer. */
  365.  
  366. printf( "*** %s ****\n\n", cd.fname );
  367.  
  368. printf( "Loading... " );
  369. fflush( stdout );
  370.  
  371. elapsed0 = time_it( (void *)&cd, cb_read_file_to_buf );
  372. if ( -1.0 == elapsed0 ) {
  373. goto exit_failure;
  374. }
  375. "%.5f secs\n(%" PRId64 " text-bytes | %" PRId64 " binary-bytes)\n\n",
  376. elapsed0,
  377. cd.n,
  378. cd.fsize
  379. );
  380.  
  381. /* Parse buf into lines & fields. */
  382.  
  383. printf( "Parsing... " );
  384. fflush( stdout );
  385.  
  386. #if STEPPED_TIMING
  387. printf( "\npass-1... " );
  388. fflush( stdout );
  389. elapsed1 = time_it( (void *)&cd, cb_parse_csvbuf_pass1 );
  390. if ( -1.0 == elapsed1 ) {
  391. goto exit_failure;
  392. }
  393. printf( "%-.5f secs\n", elapsed1 );
  394.  
  395. printf( "pass-2... " );
  396. fflush( stdout );
  397. elapsed2 = time_it( (void *)&cd, cb_parse_csvbuf_pass2 );
  398. if ( -1.0 == elapsed2 ) {
  399. goto exit_failure;
  400. }
  401. printf( "%-.5f secs\n", elapsed2 );
  402.  
  403. "Parsed in: %-.5f secs (%" PRId64 " lines)\n\n",
  404. elapsed1 + elapsed2,
  405. cd.nlines
  406. );
  407.  
  408. #else // #if !STEPPED_TIMING
  409. elapsed1 = time_it( (void *)&cd, cb_parse_csvbuf );
  410. if ( -1.0 == elapsed1 ) {
  411. goto exit_failure;
  412. }
  413. printf( "%-.5f secs (%" PRId64 " lines)\n\n", elapsed1, cd.nlines );
  414. #endif
  415.  
  416. "Total: %-.5f secs (loading + parsing)\n",
  417. elapsed0 + elapsed1 + elapsed2
  418. );
  419.  
  420. //lines_print( cd.lines, cd.nlines );
  421.  
  422. /* Cleanup and exit. */
  423.  
  424. printf( "\ncleaning up... " );
  425. fflush( stdout );
  426.  
  427. lines_destroy( cd.lines, cd.nlines );
  428. free( cd.buf );
  429.  
  430. press_enter();
  431.  
  432. exit( EXIT_SUCCESS );
  433.  
  434. exit_failure:
  435. fputs( "*** error ***\n", stderr );
  436. printf( "\ncleaning up... " );
  437. fflush( stdout );
  438. if ( cd.lines ) {
  439. lines_destroy( cd.lines, cd.nlines );
  440. }
  441. if ( cd.buf ) {
  442. free( cd.buf);
  443. }
  444.  
  445. press_enter();
  446.  
  447. exit( EXIT_FAILURE );
  448. }
  449.  
Compilation error #stdin compilation error #stdout 0s 0KB
stdin
Standard input is empty
compilation info
prog.c:6:80: fatal error: f_size/f_size.h: No such file or directory
 #include "f_size/f_size.h"         // x-platform routine for getting a filesize
                                                                                ^
compilation terminated.
stdout
Standard output is empty