fork download
  1. #ifndef CSV_C
  2. #define CSV_C
  3. #endif
  4.  
  5. /**********************************************************
  6.  * CSV.C
  7.  * A simple csv-parser.
  8.  *
  9.  * It was written due to a post at a popular Greek forum:
  10.  * http://w...content-available-to-author-only...a.gr/topic/530662-coding-challenge/
  11.  *
  12.  * For features, limitations, etc pleases see the file: csv.h
  13.  **********************************************************
  14.  */
  15.  
  16. #include <stdio.h>
  17. #include <stdlib.h>
  18. #include <string.h>
  19.  
  20. #include "csv.h"
  21. #include "f_size/f_size.h" // see: http://i...content-available-to-author-only...e.com/eCXzVn
  22.  
  23. #define CSV_VALIDTYPE( typid ) \
  24. ( CSVFLAT == (typid) || CSVLINED == (typid) )
  25.  
  26. struct _CsvLine {
  27. // size_t nfields; // count of currently used fields (not supported)
  28. CsvString *fields; // dynamic array
  29. };
  30.  
  31. struct _CsvLined {
  32. size_t nlines; // total count of parsed lines
  33. int nfields; // count of fields per parsed line
  34. struct _CsvLine *data; // dynamic array
  35. };
  36.  
  37. struct _CsvFlat {
  38. size_t nfields; // total count of parsed fields
  39. CsvString *data; // dynamic array
  40. };
  41.  
  42. struct Csv {
  43. char *buf;
  44. int typid; // model id
  45. struct _CsvFlat *flat;
  46. struct _CsvLined *lined;
  47. };
  48.  
  49. /*********************************************************//**
  50.  * Two private macros to be inlined inside the FLAT model
  51.  * constructors (they will save me typing)
  52.  *************************************************************
  53.  */
  54. /* ---------------------------------------------------- */
  55. #define _PREPARE_FLAT_MODEL_OR_DIE( csv, fname, f )\
  56. do {\
  57. (csv) = calloc( 1, sizeof( *(csv) ) );\
  58. if ( !(csv) ) {\
  59. return NULL;\
  60. }\
  61. (csv)->flat = calloc( 1, sizeof( *((csv)->flat) ) );\
  62. if ( !(csv)->flat ) {\
  63. free( (csv) );\
  64. return NULL;\
  65. }\
  66. (csv)->buf = _make_csvbuf( (fname) );\
  67. if ( !(csv)->buf ) {\
  68. free( (csv)->flat );\
  69. free( (csv) );\
  70. return NULL;\
  71. }\
  72. \
  73. (csv)->typid = CSVFLAT;\
  74. \
  75. /* count fields present in buf */\
  76. (f) = 0;\
  77. char *cp = (csv)->buf;\
  78. for (; *cp; cp++) {\
  79. switch (*cp) {\
  80. case ',': case '\n': (f)++;\
  81. }\
  82. }\
  83. \
  84. /* allocate mem for nfields */\
  85. (csv)->flat->nfields = (f);\
  86. (csv)->flat->data = calloc( (f), sizeof(*((csv)->flat->data)) );\
  87. if ( !(csv)->flat->data ) {\
  88. free( (csv)->flat );\
  89. free( (csv)->buf );\
  90. free( (csv) );\
  91. return NULL;\
  92. }\
  93. }while(0)
  94. /* ---------------------------------------------------- */
  95. #define _POPULATE_FLAT_MODEL(csv, f, callback, userdata)\
  96. do {\
  97. (f) = 0;\
  98. for (char *cp=(csv)->buf, *pre=cp; *cp; cp++)\
  99. {\
  100. switch (*cp) {\
  101. case ',':\
  102. case '\n':\
  103. *cp = '\0';\
  104. (csv)->flat->data[(f)].cstr = pre;\
  105. (csv)->flat->data[(f)].len = cp - pre;\
  106. if ( (callback) ) {\
  107. (*callback)(&(csv)->flat->data[(f)], (userdata));\
  108. }\
  109. pre = cp + 1;\
  110. (f)++;\
  111. break;\
  112. }\
  113. }\
  114. }while(0)
  115.  
  116. /*********************************************************//**
  117.  * Two private macros to be inlined inside the LINED model
  118.  * constructors (they will save me typing)
  119.  *************************************************************
  120.  */
  121. /* ---------------------------------------------------- */
  122. #define _PREPARE_LINED_MODEL_OR_DIE( csv, fname, ln, f )\
  123. do {\
  124. (csv) = calloc( 1, sizeof(*(csv)) );\
  125. if ( !(csv) ) {\
  126. return NULL;\
  127. }\
  128. (csv)->lined = calloc( 1, sizeof( *((csv)->lined) ) );\
  129. if ( !(csv)->lined ) {\
  130. free( (csv) );\
  131. return NULL;\
  132. }\
  133. (csv)->buf = _make_csvbuf( fname );\
  134. if ( !(csv)->buf ) {\
  135. free( (csv)->lined );\
  136. free( (csv) );\
  137. return NULL;\
  138. }\
  139. \
  140. (csv)->typid = CSVLINED;\
  141. \
  142. /* count total lines */\
  143. (ln) = 0;\
  144. char *cp = (csv)->buf;\
  145. for (; *cp; cp++) {\
  146. if ( '\n' == *cp ) {\
  147. (ln)++;\
  148. }\
  149. }\
  150. /* count fields of 1st line (we assume all lines have same # of fields) */\
  151. f = 0;\
  152. for (cp = (csv)->buf; *cp && '\n' != *cp; cp++) {\
  153. if ( ',' == *cp ) {\
  154. (f)++;\
  155. }\
  156. }\
  157. (f)++;\
  158. \
  159. /* allocate mem for ln lines, each one having f fields */\
  160. (csv)->lined->data = calloc( (ln), sizeof( *((csv)->lined->data) ) );\
  161. if ( !(csv)->lined->data ) {\
  162. free( (csv)->lined );\
  163. free( (csv)->buf );\
  164. free( (csv) );\
  165. }\
  166. for (size_t i=0; i < ln; i++) {\
  167. (csv)->lined->data[i].fields = calloc( (f), sizeof( *((csv)->lined->data[i].fields)) );\
  168. if ( !(csv)->lined->data[i].fields ) {\
  169. for (size_t j=0; j < i; j++) {\
  170. free( (csv)->lined->data[j].fields );\
  171. }\
  172. free( (csv)->lined->data );\
  173. free( (csv)->lined );\
  174. free( (csv)->buf );\
  175. free( (csv) );\
  176. return NULL;\
  177. }\
  178. }\
  179. (csv)->lined->nlines = (ln); /* total lines */\
  180. (csv)->lined->nfields = (f); /* fields per line */\
  181. }while(0)
  182. /* ---------------------------------------------------- */
  183. #define _POPULATE_LINED_MODEL(csv, ln, f, callback, userdata)\
  184. do {\
  185. (ln) = (f) = 0;\
  186. for (char *cp=(csv)->buf, *pre=cp; *cp; cp++)\
  187. {\
  188. switch (*cp) {\
  189. case ',': {\
  190. *cp = '\0';\
  191. CsvString *s = &(csv)->lined->data[(ln)].fields[(f)];\
  192. s->cstr = pre; /* no mem copying */\
  193. s->len = cp - pre;\
  194. pre = cp + 1;\
  195. (f)++;\
  196. break;\
  197. }\
  198. \
  199. case '\n': {\
  200. *cp = '\0';\
  201. CsvString *s = &(csv)->lined->data[(ln)].fields[(f)];\
  202. s->cstr = pre; /* no mem copying */\
  203. s->len = cp - pre;\
  204. if ( callback ) {\
  205. (*callback)(&(csv)->lined->data[(ln)], userdata);\
  206. }\
  207. pre = cp + 1;\
  208. (f) = 0;\
  209. (ln)++;\
  210. break;\
  211. }\
  212. }\
  213. }\
  214. }while(0)
  215.  
  216. /*********************************************************//**
  217.  * (Private Helper) The csv file is first read as a whole in csv->buf,
  218.  * via fread(). This buffer will be LATER ON parsed & modified by the
  219.  * constructors of the supported parsing-models (currently FLAT and LINED).
  220.  *************************************************************
  221.  */
  222. static inline char *_make_csvbuf( const char *fname )
  223. {
  224. int64_t fsize = f_size( fname );
  225. if ( fsize < 1 ) {
  226. return NULL;
  227. }
  228.  
  229. FILE *fp = fopen( fname, "rb" );
  230. if ( !fp ) {
  231. return NULL;
  232. }
  233.  
  234. char *buf = calloc( 1, 1+fsize );
  235. if ( !buf ) {
  236. return NULL;
  237. }
  238.  
  239. size_t n = fread( buf, 1, fsize, fp );
  240. if ( ferror(fp) ) {
  241. free( buf );
  242. fclose( fp );
  243. return NULL;
  244. }
  245. fclose( fp );
  246.  
  247. return buf;
  248. }
  249.  
  250. /*********************************************************//**
  251.  * (Private Helper) Constructor of the FLAT model
  252.  *************************************************************
  253.  */
  254. static inline Csv *_make_flat_csv( const char *fname )
  255. {
  256. Csv *csv = NULL;
  257. size_t f = 0; // fields counter
  258. int (*dummy)(CsvString *, void *) = NULL;
  259.  
  260. _PREPARE_FLAT_MODEL_OR_DIE( csv, fname, f );
  261. _POPULATE_FLAT_MODEL( csv, f, dummy, NULL );
  262.  
  263. return csv;
  264. }
  265.  
  266. /*********************************************************//**
  267.  * (Private Helper) Constructor of the LINED model
  268.  *************************************************************
  269.  */
  270. static inline Csv *_make_lined_csv( const char *fname )
  271. {
  272. Csv *csv = NULL;
  273. size_t ln = 0; // lines counter
  274. int f = 0; // fields counter
  275. int (*dummy)(CsvLine *, void *userdata) = NULL;
  276.  
  277. _PREPARE_LINED_MODEL_OR_DIE( csv, fname, ln, f );
  278. _POPULATE_LINED_MODEL( csv, ln, f, dummy, NULL );
  279.  
  280. return csv;
  281. }
  282.  
  283. /*********************************************************//**
  284.  * Normal Constructor:
  285.  * Parsed csv-files are stored in memory either as consecutive
  286.  * fields (FLAT model) or as lines of fields (LINED model).
  287.  * The typid argument dictates which model will be used.
  288.  *************************************************************
  289.  */
  290. Csv *make_csv( const char *fname, int typid )
  291. {
  292. if ( !fname || !CSV_VALIDTYPE(typid) ) {
  293. return NULL;
  294. }
  295.  
  296. Csv *csv = NULL;
  297. switch ( typid )
  298. {
  299. case CSVFLAT:
  300. return _make_flat_csv( fname );
  301.  
  302. case CSVLINED:
  303. return _make_lined_csv( fname );
  304. }
  305.  
  306. return csv;
  307. }
  308.  
  309. /*********************************************************//**
  310.  * Specialized Constructor of the FLAT model with foreach callback
  311.  *************************************************************
  312.  */
  313. Csv *make_flat_csv_foreach(
  314. const char *fname,
  315. int (*foreach)(CsvString *field, void *userdata),
  316. void *userdata
  317. )
  318. {
  319. Csv *csv = NULL;
  320. size_t f = 0; // fields counter
  321.  
  322. _PREPARE_FLAT_MODEL_OR_DIE( csv, fname, f );
  323. _POPULATE_FLAT_MODEL( csv, f, foreach, userdata );
  324.  
  325. return csv;
  326. }
  327.  
  328. /*********************************************************//**
  329.  * Specialized Constructor of the LINED model with foreach callback
  330.  *************************************************************
  331.  */
  332. Csv *make_lined_csv_foreach(
  333. const char *fname,
  334. int (*foreach)(CsvLine *csvline, void *userdata),
  335. void *userdata
  336. )
  337. {
  338. Csv *csv = NULL;
  339. size_t ln = 0; // lines counter
  340. int f = 0; // fields counter
  341.  
  342. _PREPARE_LINED_MODEL_OR_DIE( csv, fname, ln, f );
  343. _POPULATE_LINED_MODEL( csv, ln, f, foreach, userdata );
  344.  
  345. return csv;
  346. }
  347.  
  348. /*********************************************************//**
  349.  * Destructor
  350.  *************************************************************
  351.  */
  352. Csv *csv_free( Csv *csv )
  353. {
  354. if ( !csv ) {
  355. return NULL;
  356. }
  357.  
  358. // if ( csv->buf ) {
  359. free( csv->buf );
  360. // }
  361.  
  362. if ( CSVFLAT == csv->typid && csv->flat ) {
  363. free( csv->flat->data );
  364. free( csv->flat );
  365. //csv->flat->data = NULL;
  366. }
  367.  
  368. else if ( CSVLINED == csv->typid && csv->lined )
  369. {
  370. if ( csv->lined->data ) {
  371. size_t nlines = csv->lined->nlines;
  372. for (size_t ln=0; ln < nlines; ln++) {
  373. free( csv->lined->data[ln].fields );
  374. }
  375. free( csv->lined->data );
  376. }
  377. free( csv->lined );
  378. }
  379.  
  380. free( csv );
  381. return NULL;
  382. }
  383.  
  384. /*********************************************************//**
  385.  *
  386.  *************************************************************
  387.  */
  388. void csv_print( const Csv *csv )
  389. {
  390. if ( !csv ) {
  391. return;
  392. }
  393.  
  394. int typid = csv->typid;
  395.  
  396. if ( CSVFLAT == typid && csv->flat && csv->flat->data )
  397. {
  398. size_t nfields = csv->flat->nfields;
  399. printf( "[ %zd fields ]\n", nfields );
  400.  
  401. for (size_t i=0; i < nfields; i++) {
  402. "%s [len: %zd]\n",
  403. csv->flat->data[i].cstr,
  404. csv->flat->data[i].len
  405. );
  406. }
  407. }
  408. else if ( CSVLINED == typid && csv->lined && csv->lined->data )
  409. {
  410. size_t nlines = csv->lined->nlines;
  411. int nfields = csv->lined->nfields; // lines have same # of fields
  412. printf( "[ %zd lines ]\n", nlines );
  413.  
  414. for (size_t ln=0; ln < nlines; ln++)
  415. {
  416. printf( "--- line %zd (%d fields) ----\n", 1+ln, nfields );
  417. for (int f=0; f < nfields; f++) {
  418. "%s [len: %zd]\n",
  419. csv->lined->data[ln].fields[f].cstr,
  420. csv->lined->data[ln].fields[f].len
  421. );
  422. }
  423. }
  424. }
  425. else {
  426. puts( "*** error: invalid typid OR no data" );
  427. }
  428. }
  429.  
  430. /*********************************************************//**
  431.  * (FLAT model) Return the total count of parsed fields.
  432.  *************************************************************
  433.  */
  434. size_t csv_flat_nfields( Csv *csv )
  435. {
  436. return csv->flat->nfields;
  437. }
  438. /*********************************************************//**
  439.  * (FLAT model) Return a pointer to the field specified by idx.
  440.  *************************************************************
  441.  */
  442. CsvString *csv_flat_field( Csv *csv, size_t idx )
  443. {
  444. return &csv->flat->data[idx];
  445. }
  446.  
  447. /*********************************************************//**
  448.  * (LINED model) Return the total count of parsed lines.
  449.  *************************************************************
  450.  */
  451. size_t csv_lined_nlines( Csv *csv )
  452. {
  453. return csv->lined->nlines;
  454. }
  455.  
  456. /*********************************************************//**
  457.  * (LINED model) Return the count of fields per parsed line.
  458.  * NOTE: the LINED model assumes fixed number of fields per line!
  459.  *************************************************************
  460.  */
  461. int csv_lined_nfields( Csv *csv )
  462. {
  463. return csv->lined->nfields;
  464. }
  465.  
  466. /*********************************************************//**
  467.  * (LINED model) Given a Csv, return a pointer to the field
  468.  * specified by idx in the line specified by ln.
  469.  *************************************************************
  470.  */
  471. CsvString *csv_lined_field( Csv *csv, size_t ln, int idx )
  472. {
  473. return &csv->lined->data[ln].fields[idx];
  474. }
  475.  
  476. /*********************************************************//**
  477.  * (LINED model) Given a CsvLine, return a pointer to the
  478.  * field specified by idx.
  479.  * NOTE: This function is useful inside callback functions to
  480.  * be passed to the _foreach() constructor (see csv.h
  481.  * for details and sample code).
  482.  *************************************************************
  483.  */
  484. CsvString *csvline_field( CsvLine *csvline, int idx )
  485. {
  486. return &csvline->fields[idx];
  487. }
  488.  
  489. /*********************************************************//**
  490.  * Compare equality of a plain cstring against a csv field (CsvString).
  491.  * NOTE: See csv.h for a brief description of this function.
  492.  *************************************************************
  493.  */
  494. int csv_eql_cstr_field( size_t cstrlen, char *cstring, CsvString *field )
  495. {
  496. return cstrlen == field->len && !memcmp( cstring, field->cstr, cstrlen );
  497. }
  498.  
Compilation error #stdin compilation error #stdout 0s 0KB
stdin
Standard input is empty
compilation info
prog.c:20:17: fatal error: csv.h: No such file or directory
 #include "csv.h"
                 ^
compilation terminated.
stdout
Standard output is empty