fork download
  1. #include <iostream>
  2. #include <fstream>
  3. #include <vector>
  4. #include <iomanip>
  5. #include <cstring> // memset()
  6.  
  7. extern "C"{
  8. #include "f_size/f_size.h" // x-platform routine for getting a filesize
  9. }
  10.  
  11. using namespace std;
  12.  
  13. #define PARSE_TWICE 1 // 0 to enable totally dynamic insertions
  14. #define STEPPED_TIMING 1 // 0 to disable individual timing for passes
  15. // (only available when PARSE_TWICE )
  16.  
  17. const char* FNAME = (const char*) "test1.csv";
  18.  
  19. typedef struct
  20. {
  21. // mostly used for loading a csv file in mem
  22. char *buf;
  23. const char *fname;
  24. int64_t fsize;
  25. int64_t n;
  26.  
  27. // mostly used for parsing loaded data
  28. vector< vector<string> > lines;
  29. size_t nlines;
  30. } CallbackData;
  31.  
  32. /*********************************************************//**
  33.  * Return the size of a file in bytes, or -1 on error (it only
  34.  * works for files with size <= LONG_MAX).
  35.  * N O T E : Use this function if the more sophisticated f_size()
  36.  * does not work with your compiler (see: #include "f_size.h").
  37.  *************************************************************
  38.  */
  39. std::streampos file_size( const char* fname )
  40. {
  41. std::streampos fsize = -1;
  42. std::ifstream f( fname, std::ios::binary | std::ios::ate ); // binary-mode
  43. if ( f.is_open() ) {
  44. fsize = f.tellg();
  45. f.close();
  46. }
  47.  
  48. return fsize;
  49. }
  50.  
  51. /*********************************************************//**
  52.  * Return the wall-clock time spent for executing the callback
  53.  * function, in secs. On error, return -1.0
  54.  *************************************************************
  55.  */
  56. #include <sys/time.h>
  57. double time_it( void *userdata, int (*callback)(void *userdata) )
  58. {
  59. double tstart, tend;
  60. struct timeval tv;
  61.  
  62. gettimeofday( &tv, NULL );
  63. tstart = tv.tv_sec + tv.tv_usec / 1000000.0;
  64.  
  65. /* Code we want timed here */
  66. if ( 0 == (*callback)(userdata) ) {
  67. return -1.0;
  68. }
  69.  
  70. gettimeofday( &tv, NULL );
  71. tend = tv.tv_sec + tv.tv_usec / 1000000.0;
  72.  
  73. return tend - tstart;
  74. }
  75.  
  76. /*********************************************************//**
  77.  * Return the cpu-time spent for executing the callback function,
  78.  * in secs. On error, return -1.0
  79.  * N O T E : Depending on the implementation, on Windows it MOST
  80.  * PROBABLY returns the wall-clock time instead of the cpu-time.
  81.  *************************************************************
  82.  */
  83. #include <ctime>
  84. double clock_it( void *userdata, int (*callback)(void *userdata) )
  85. {
  86. clock_t tstart = clock();
  87.  
  88. /* Code we want timed here */
  89. if ( 0 == (*callback)(userdata) ) {
  90. return -1.0;
  91. }
  92.  
  93. return ((double) (clock() - tstart)) / CLOCKS_PER_SEC;
  94. }
  95.  
  96. /*********************************************************//**
  97.  * x-platform alternative to Windows system( "pause" );
  98.  *************************************************************
  99.  */
  100. void press_enter( void )
  101. {
  102. int c;
  103.  
  104. cout << "press ENTER... ";
  105. cout.flush();
  106. cin.clear();
  107. while ( '\n' != (c=cin.get()) && EOF != c )
  108. ;
  109. }
  110.  
  111. /*********************************************************//**
  112.  * Timing callback function for reading a file into a buffer.
  113.  * All required variables are passed and/or get modified via
  114.  * the data pointer.
  115.  *************************************************************
  116.  */
  117. int cb_read_file_to_buf( void *data )
  118. {
  119. CallbackData *d = (CallbackData *) data;
  120. ifstream f;
  121.  
  122. // get filesize
  123. d->fsize = f_size( d->fname );
  124. if ( d->fsize < 1 ) {
  125. return 0;
  126. }
  127.  
  128. f.open( d->fname ); // text-mode
  129. if ( !f.is_open() ) {
  130. return 0;
  131. }
  132.  
  133. d->buf = new char[ 1 + d->fsize ];
  134. f.read( d->buf, d->fsize );
  135. d->n = f.gcount(); // actual bytes read
  136. f.close();
  137. d->buf[d->n] = d->buf[d->fsize] = '\0'; // NUL terminte buf
  138.  
  139. return 1;
  140. }
  141.  
  142. /*********************************************************//**
  143.  * Timing callback function for parsing the csv buf once, in
  144.  * order to convert it into a vector of lines, where each line
  145.  * is a vector of fields (strings).
  146.  * All required variables are passed and/or get modified via
  147.  * the data pointer.
  148.  * N O T E : This function is used when !PARSED_TWICE
  149.  *************************************************************
  150.  */
  151. int cb_parse_csvbuf_once( void *data )
  152. {
  153. CallbackData *d = (CallbackData *) data;
  154.  
  155. // if ( !d->buf ) {
  156. // return 0;
  157. // }
  158.  
  159. size_t l = 0;
  160. char* cp = d->buf;
  161. char* pre = d->buf;
  162. vector<string> field;
  163.  
  164. while ( *cp )
  165. {
  166. if ( ',' == *cp ) {
  167. *cp = '\0';
  168. field.push_back( pre );
  169. pre = cp + 1;
  170. }
  171. else if ( '\n' == *cp ) {
  172. *cp = '\0';
  173. field.push_back( pre );
  174. d->lines.push_back( field );
  175. field.clear();
  176. pre = cp + 1;
  177. l++;
  178. }
  179. cp++;
  180. }
  181.  
  182. d->nlines = l;
  183. return 1;
  184. }
  185.  
  186. /*********************************************************//**
  187.  * Timing callback function for counting lines in the csv buf.
  188.  * All required variables are passed and/or get modified via
  189.  * the data pointer.
  190.  * N O T E : This function is used when PARSED_TWICE && STEPPED_TIMING
  191.  *************************************************************
  192.  */
  193. static inline int cb_parse_csvbuf_pass1( void *data )
  194. {
  195. CallbackData *d = (CallbackData *) data;
  196. // if ( !d->buf ) {
  197. // return 0;
  198. // }
  199.  
  200. d->nlines = 0;
  201. for (char* cp = d->buf; *cp; cp++) {
  202. if ( '\n' == *cp )
  203. (d->nlines)++;
  204. }
  205.  
  206. return 1;
  207. }
  208.  
  209. /*********************************************************//**
  210.  * Timing callback function for converting the csv buf into a
  211.  * vector of lines, where each line is a vector of fields (strings).
  212.  * Contrary to the function cb_parse_csvbuf_once() which allocates
  213.  * all vectors progressively, this one assumes that the function
  214.  * cb_parse_csvbuf_pass1() has been already called (and thus has
  215.  * already set data->nlines), in order to pre-allocate the line
  216.  * vectors.
  217.  * All required variables are passed and/or get modified via
  218.  * the data pointer.
  219.  * N O T E : This function is used when PARSED_TWICE && STEPPED_TIMING
  220.  *************************************************************
  221.  */
  222. static inline int cb_parse_csvbuf_pass2( void *data )
  223. {
  224. CallbackData *d = (CallbackData *) data;
  225.  
  226. // if ( !d->buf || d->nlines < 1 ) {
  227. // return 0;
  228. // }
  229.  
  230. d->lines.resize( d->nlines ); // pre-allocate the line vectors
  231.  
  232. size_t l = 0; // lines counter
  233. char* cp = d->buf; // bytes counter
  234. char* pre = d->buf; // start of current cstring in buf
  235.  
  236. while ( *cp )
  237. {
  238. if ( ',' == *cp ) {
  239. *cp = '\0';
  240. d->lines[l].push_back( pre );
  241. pre = cp + 1;
  242. }
  243. else if ( '\n' == *cp ) {
  244. *cp = '\0';
  245. d->lines[l].push_back( pre );
  246. pre = cp + 1;
  247. l++;
  248. }
  249. cp++;
  250. }
  251.  
  252. return 1;
  253. }
  254.  
  255. /*********************************************************//**
  256.  * Timing callback function for counting lines in the csv buf
  257.  * and then converting the csv buf into a vector of lines,
  258.  * where each line is a vector of fields (strings).
  259.  * All required variables are passed and/or get modified via
  260.  * the data pointer.
  261.  * N O T E : This function is used when PARSED_TWICE && !STEPPED_TIMING
  262.  *************************************************************
  263.  */
  264. int cb_parse_csvbuf_twice( void *data )
  265. {
  266. return cb_parse_csvbuf_pass1(data) && cb_parse_csvbuf_pass2(data);
  267. }
  268.  
  269. /*********************************************************//**
  270.  * Print the given line vectors
  271.  *************************************************************
  272.  */
  273. void lines_print( const vector< vector<string> >& lines )
  274. {
  275. cout << "lines: " << lines.size() << endl;
  276. for (size_t l=0; l < lines.size(); l++) {
  277. cout << "--- line: " << l+1 << " (" << lines[l].size() << " fields) ----\n";
  278. for (size_t f=0; f < lines[l].size(); f++) {
  279. cout << lines[l][f] << endl;
  280. }
  281. //cout << endl;
  282. }
  283. /*
  284. // or in C++11
  285.  
  286. size_t ln=0;
  287. cout << "lines: " << lines.size() << endl;
  288. for ( auto& l: lines ) {
  289. cout << "--- line: " << ++ln << " (" << l.size() << " fields) ----\n";
  290. for ( auto& f: l ) {
  291. cout << f << endl;
  292. }
  293. }
  294. */
  295. }
  296.  
  297. /*********************************************************//**
  298.  *
  299.  *************************************************************
  300.  */
  301. int main()
  302. {
  303. CallbackData cd;
  304. memset( &cd, 0, sizeof(cd) );
  305. cd.fname = FNAME;
  306.  
  307. double elapsed0 = 0.0;
  308. double elapsed1 = 0.0;
  309. double elapsed2 = 0.0;
  310.  
  311. cout.precision(5);
  312. cout << fixed;
  313.  
  314. /* Read csv file into memory buffer. */
  315.  
  316. cout << "*** " << cd.fname << " ***\n\n";
  317.  
  318. cout << "Loading... ";
  319. cout.flush();
  320.  
  321. elapsed0 = time_it( (void *)&cd, cb_read_file_to_buf );
  322. if ( -1.0 == elapsed0 ) {
  323. goto exit_failure;
  324. }
  325. cout << elapsed0 << " secs\n";
  326. cout << "(" << cd.n << " text-bytes | " << cd.fsize << " binary-bytes)\n\n";
  327.  
  328. /* Parse buf into lines & fields. */
  329.  
  330. cout << "Parsing... " << endl;;
  331. {
  332. #if PARSE_TWICE
  333. #if STEPPED_TIMING
  334. cout << "pass-1... ";
  335. cout.flush();
  336. elapsed1 = time_it( (void *)&cd, cb_parse_csvbuf_pass1 );
  337. if ( -1.0 == elapsed1 ) {
  338. goto exit_failure;
  339. }
  340. cout << elapsed1 << " secs\n";
  341.  
  342. cout << "pass-2... ";
  343. cout.flush();
  344. elapsed2 = time_it( (void *)&cd, cb_parse_csvbuf_pass2 );
  345. if ( -1.0 == elapsed2 ) {
  346. goto exit_failure;
  347. }
  348. cout << elapsed2 << " secs" << endl;
  349. cout << "Parsed in: " << elapsed1 + elapsed2 << " secs (" << cd.nlines << " lines)\n\n";
  350. #else
  351. cout << "2 passes...";
  352. cout.flush();
  353. elapsed1 = time_it( (void *)&cd, cb_parse_csvbuf_twice );
  354. if ( -1.0 == elapsed1 ) {
  355. goto exit_failure;
  356. }
  357. cout << elapsed1 << " secs (" << cd.nlines << " lines)\n\n";
  358. #endif
  359. #else
  360. cout << "1 pass... ";
  361. elapsed1 = time_it( (void *)&cd, cb_parse_csvbuf_once );
  362. if ( -1.0 == elapsed1 ) {
  363. goto exit_failure;
  364. }
  365. cout << elapsed1 << " secs (" << cd.nlines << " lines)\n\n";
  366. #endif
  367. }
  368. cout << "Total: " << elapsed0 + elapsed1 + elapsed2 << " secs (loading + parsing)\n\n";
  369.  
  370. //lines_print( cd.lines );
  371.  
  372. /* Cleanup and exit. */
  373.  
  374. delete[] cd.buf;
  375.  
  376. press_enter();
  377. cout << "cleaning up, please wait..." << endl;
  378.  
  379. return 0;
  380.  
  381. exit_failure:
  382. cerr << "*** error ***" << endl;
  383. cout << "(cleaning up, please wait...)" << endl;
  384. if ( cd.buf ) {
  385. delete[] cd.buf;
  386. }
  387.  
  388. press_enter();
  389.  
  390. return 1;
  391. }
  392.  
  393.  
Compilation error #stdin compilation error #stdout 0s 0KB
stdin
Standard input is empty
compilation info
prog.cpp:8:72: fatal error: f_size/f_size.h: No such file or directory
 #include "f_size/f_size.h" // x-platform routine for getting a filesize
                                                                        ^
compilation terminated.
stdout
Standard output is empty