fork download
  1. /**********************************************************
  2.  * CSV.C
  3.  * A simple csv-parser.
  4.  *
  5.  * It was written due to a post at a popular Greek forum:
  6.  * http://w...content-available-to-author-only...a.gr/topic/530662-coding-challenge/
  7.  *
  8.  * The parser supports two parsing-models:
  9.  * a) FLAT : Tokens are stored in memory as consecutive fields
  10.  * b) LINED: Tokens are stored in memory as lines of fields
  11.  *
  12.  * Limitations:
  13.  *
  14.  * a) Only single-byte strings (i.e. no Unicode support)
  15.  * b) Only commas and newlines are recognised as delimiters
  16.  * c) No handling of quoted tokens (they are stored unchanged)
  17.  * d) No handling of blanks among tokens (they are embedded in tokens)
  18.  * f) In LINED model, all lines MUST have the same number of tokens
  19.  * g) CSV-files larger than the available memory are not handled.
  20.  *
  21.  * Implementation Notes:
  22.  *
  23.  * 1. Most (if not all) utility functions perform NO sanity-checks
  24.  * on their arguments. Constructors, however, return NULL on error.
  25.  *
  26.  * 2. CSV files are read at once into a memory buffer, via fread(),
  27.  * and then the buffer is parsed in memory. This is usually faster
  28.  * than reading & parsing blocks of the file, unless the write-cache
  29.  * of the OS gets fulled. Also, files larger than the available
  30.  * memory will not be loaded.
  31.  *
  32.  * 3. The Csv constructors build their parsing-model mostly by setting
  33.  * pointers on the loaded buffer. Thus, the parsing does not involve
  34.  * memory copying. It only involves minimal memory allocation for
  35.  * the internal structure of the parsing-model, along with at least
  36.  * one traversing of the buffered data.
  37.  *
  38.  * 4. The LINED parsing-model represents more accurately the logical
  39.  * structure of the csv-file, but the FLAT parsing-model takes
  40.  * less time to get initialized and populated.
  41.  *
  42.  * 5. For both models, a specialized _foreach() constructor is provided
  43.  * accepting a callback function and userdata as the last parameters.
  44.  * During the construction of the parsing-model, this function is
  45.  * called upon each parsing-unit (CsvString units for the FLAT model,
  46.  * CsvLine units for the LINED model).
  47.  *
  48.  * For complex calculations on all the data, this MAY decrease the
  49.  * overall time needed. However, for simple calculation it MAY
  50.  * increase the overall time (due to the overhead involved for the
  51.  * function calls and the extra pointers dereferencing).
  52.  *
  53.  * So, DO NOT use the _foreach() constructors blindly!
  54.  *
  55.  * For the LINED model, you can use the function csvline_field()
  56.  * inside your callback function, in order to get a pointer to
  57.  * any field of the current line. For example, to print the cstring
  58.  * and the length of the 4th field of EACH line during construction,
  59.  * you can use a callback function like the following:
  60.  *
  61.  * int callback_lined_foreach_print_4th_token(CsvLine *csvline, void *dummy)
  62.  * {
  63.  * CsvString *field = csvline_field(csvline, 3); // 0-based index
  64.  * printf( "%s (len: %zd)\n", field->cstr, field->len );
  65.  * return 1;
  66.  * }
  67.  *
  68.  * Now, your main() function may be something like the following:
  69.  *
  70.  * int main( void )
  71.  * {
  72.  * Csv *csv = make_lined_csv_foreach(
  73.  * "test.csv",
  74.  * callback_lined_foreach_print_4th_token,
  75.  * NULL
  76.  * );
  77.  * if ( !csv ) {
  78.  * return 1;
  79.  * }
  80.  * csv_free( csv );
  81.  * return 0;
  82.  * }
  83.  **********************************************************
  84.  */
  85.  
  86. #ifndef CSV_H
  87. #define CSV_H
  88.  
  89. #include <stddef.h>
  90.  
  91. /* ---------------------------
  92.  * Constants & Macros
  93.  * ---------------------------
  94.  */
  95.  
  96. /*
  97.  * Supported Parsing Models
  98.  */
  99. enum {
  100. CSVNONE = 0,
  101. CSVFLAT,
  102. CSVLINED
  103. };
  104.  
  105. /* ---------------------------
  106.  * Data Types
  107.  * ---------------------------
  108.  */
  109.  
  110. /*
  111.  * The dominant Csv data-type is hidden (can only be manipulated via funcs)
  112.  */
  113. typedef struct Csv Csv;
  114.  
  115. /* CsvString is exposed so clients can access directly the len and cstr
  116.  * fields of pointers returned by csv_flat_field() & csv_lined_field().
  117.  * Clients may also pass CsvString pointers to the csv_eql_cstr_field() function.
  118.  */
  119. typedef struct CsvString CsvString;
  120. struct CsvString {
  121. size_t len;
  122. char *cstr;
  123. };
  124.  
  125. typedef struct _CsvLine CsvLine;
  126.  
  127. /* ---------------------------
  128.  * Function Prototypes
  129.  * ---------------------------
  130.  */
  131.  
  132. #ifndef CSV_C
  133. /*
  134.  * constructors & destructors
  135.  */
  136. extern Csv *make_csv( const char *fname, int typid );
  137. extern Csv *make_flat_csv_foreach(
  138. const char *fname,
  139. int (*foreach)(CsvString *field, void *userdata),
  140. void *userdata
  141. );
  142. extern Csv *make_lined_csv_foreach(
  143. const char *fname,
  144. int (*foreach)(CsvLine *csvline, void *userdata),
  145. void *userdata
  146. );
  147.  
  148. extern Csv *csv_free( Csv *csv );
  149.  
  150. /*
  151.  * for data parsed in FLAT model
  152.  */
  153. extern size_t csv_flat_nfields( Csv *csv );
  154. extern CsvString *csv_flat_field( Csv *csv, size_t idx );
  155.  
  156. /*
  157.  * for data parsed in LINED model
  158.  */
  159. extern size_t csv_lined_nlines( Csv *csv );
  160. extern int csv_lined_nfields( Csv *csv );
  161. extern CsvString *csv_lined_field( Csv *csv, size_t ln, int idx );
  162. extern CsvString *csvline_field( CsvLine *csvline, int idx );
  163.  
  164. /*
  165.  * misc functions
  166.  */
  167. extern void csv_print( const Csv *csv );
  168.  
  169. /**
  170.  @brief Check byte equality between a plain cstring and a CsvString->cstr.
  171.  @param cstrlen: The length of the plain cstring, w/o the trailing NUL byte.
  172.  @param cstring: The plain cstring to be compared.
  173.  @param *field: A pointer to an already csv parsed field, whose .cstr
  174.   member is to be checked against cstring.
  175.  @note Depending on the nature of the parsed data, this function may be
  176.   faster than strcmp/memcmp because it calls memcmp only if cstrlen
  177.   differs from field->len.
  178.  */
  179. extern int csv_eql_cstr_field( size_t cstrlen, char *cstring, CsvString *field);
  180. #endif
  181.  
  182. #endif
  183.  
Compilation error #stdin compilation error #stdout 0s 0KB
stdin
Standard input is empty
compilation info
/usr/lib/gcc/i486-linux-gnu/4.8/../../../i386-linux-gnu/crt1.o: In function `_start':
(.text+0x18): undefined reference to `main'
collect2: error: ld returned 1 exit status
stdout
Standard output is empty