fork(14) download
  1. /* Tiny CSV Reader */
  2. /* Copyright (C) 2015, Deligiannidis Konstantinos
  3.  
  4. This program is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8.  
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13.  
  14. You should have received a copy of the GNU General Public License
  15. along with this program. If not, see <http://w...content-available-to-author-only...u.org/licenses/>. */
  16.  
  17.  
  18. #include <stdio.h>
  19. #include <string.h>
  20. #include <stdlib.h>
  21.  
  22.  
  23. /* For more that 100 columns or lines (when delimiter = \n), minor modifications are needed. */
  24. int getcols( const char * const line, const char * const delim, char ***out_storage )
  25.  
  26. {
  27. const char *start_ptr, *end_ptr, *iter;
  28. char **out;
  29. int i; //For "for" loops in the old c style.
  30. int tokens_found = 1, delim_size, line_size; //Calculate "line_size" indirectly, without strlen() call.
  31. int start_idx[100], end_idx[100]; //Store the indexes of tokens. Example "Power;": loc('P')=1, loc(';')=6
  32. //Change 100 with MAX_TOKENS or use malloc() for more than 100 tokens. Example: "b1;b2;b3;...;b200"
  33.  
  34. if ( *out_storage != NULL ) return -4; //This SHOULD be NULL: Not Already Allocated
  35. if ( !line || !delim ) return -1; //NULL pointers Rejected Here
  36. if ( (delim_size = strlen( delim )) == 0 ) return -2; //Delimiter not provided
  37.  
  38. start_ptr = line; //Start visiting input. We will distinguish tokens in a single pass, for good performance.
  39. //Then we are allocating one unified memory region & doing one memory copy.
  40. while ( ( end_ptr = strstr( start_ptr, delim ) ) ) {
  41.  
  42. start_idx[ tokens_found -1 ] = start_ptr - line; //Store the Index of current token
  43. end_idx[ tokens_found - 1 ] = end_ptr - line; //Store Index of first character that will be replaced with
  44. //'\0'. Example: "arg1||arg2||end" -> "arg1\0|arg2\0|end"
  45. tokens_found++; //Accumulate the count of tokens.
  46. start_ptr = end_ptr + delim_size; //Set pointer to the next c-string within the line
  47. }
  48.  
  49. for ( iter = start_ptr; (*iter!='\0') ; iter++ );
  50.  
  51. start_idx[ tokens_found -1 ] = start_ptr - line; //Store the Index of current token: of last token here.
  52. end_idx[ tokens_found -1 ] = iter - line; //and the last element that will be replaced with \0
  53.  
  54. line_size = iter - line; //Saving CPU cycles: Indirectly Count the size of *line without using strlen();
  55.  
  56. int size_ptr_region = (1 + tokens_found)*sizeof( char* ); //The size to store pointers to c-strings + 1 (*NULL).
  57. out = (char**) malloc( size_ptr_region + ( line_size + 1 ) + 5 ); //Fit everything there...it is all memory.
  58. //It reserves a contiguous space for both (char**) pointers AND string region. 5 Bytes for "Out of Range" tests.
  59. *out_storage = out; //Update the char** pointer of the caller function.
  60.  
  61. //"Out of Range" TEST. Verify that the extra reserved characters will not be changed. Assign Some Values.
  62. //char *extra_chars = (char*) out + size_ptr_region + ( line_size + 1 );
  63. //extra_chars[0] = 1; extra_chars[1] = 2; extra_chars[2] = 3; extra_chars[3] = 4; extra_chars[4] = 5;
  64.  
  65. for ( i = 0; i < tokens_found; i++ ) //Assign adresses first part of the allocated memory pointers that point to
  66. out[ i ] = (char*) out + size_ptr_region + start_idx[ i ]; //the second part of the memory, reserved for Data.
  67. out[ tokens_found ] = (char*) NULL; //[ ptr1, ptr2, ... , ptrN, (char*) NULL, ... ]: We just added the (char*) NULL.
  68. //Now assign the Data: c-strings. (\0 terminated strings):
  69. char *str_region = (char*) out + size_ptr_region; //Region inside allocated memory which contains the String Data.
  70. memcpy( str_region, line, line_size ); //Copy input with delimiter characters: They will be replaced with \0.
  71.  
  72. //Now we should replace: "arg1||arg2||arg3" with "arg1\0|arg2\0|arg3". Don't worry for characters after '\0'
  73. //They are not used in standard c lbraries.
  74. for( i = 0; i < tokens_found; i++) str_region[ end_idx[ i ] ] = '\0';
  75.  
  76. //"Out of Range" TEST. Wait until Assigned Values are Printed back.
  77. //for ( int i=0; i < 5; i++ ) printf("c=%x ", extra_chars[i] ); printf("\n");
  78.  
  79. // *out memory should now contain (example data):
  80. //[ ptr1, ptr2,...,ptrN, (char*) NULL, "token1\0", "token2\0",...,"tokenN\0", 5 bytes for tests ]
  81. // |__________________________________^ ^ ^ ^
  82. // |_______________________________________| | |
  83. // |_____________________________________________| These 5 Bytes should be intact.
  84.  
  85. return tokens_found;
  86. }
  87.  
  88. int main()
  89.  
  90. {
  91.  
  92. char in_line[] = "Arg1;;Th;s is not Del;m;ter;;Arg3;;;;Final";
  93. char delim[] = ";;";
  94. char **columns;
  95. int i;
  96.  
  97. printf("Example1:\n");
  98. columns = NULL; //Should be NULL to indicate that it is not assigned to allocated memory. Otherwise return -4;
  99.  
  100. int cols_found = getcols( in_line, delim, &columns);
  101. for ( i = 0; i < cols_found; i++ ) printf("Column[ %d ] = %s\n", i, columns[ i ] ); //<- (1st way).
  102. // (2nd way) // for ( i = 0; columns[ i ]; i++) printf("start_idx[ %d ] = %s\n", i, columns[ i ] );
  103.  
  104. free( columns ); //Release the Single Contiguous Memory Space.
  105. columns = NULL; //Pointer = NULL to indicate it does not reserve space and that is ready for the next malloc().
  106.  
  107. printf("\n\nExample2, Nested:\n\n");
  108.  
  109. char example_file[] = "ID;Day;Month;Year;Telephone;email;Date of registration\n"
  110. "1;Sunday;january;2009;123-124-456;jitter@go.xyz;2015-05-13\n"
  111. "2;Monday;March;2011;(+30)333-22-55;buffer@wl.it;2009-05-23";
  112.  
  113. char **rows;
  114. int j;
  115.  
  116. rows = NULL; //getcols() requires it to be NULL. (Avoid dangling pointers, leaks e.t.c).
  117.  
  118. getcols( example_file, "\n", &rows);
  119. for ( i = 0; rows[ i ]; i++) {
  120. {
  121. printf("Line[ %d ] = %s\n", i, rows[ i ] );
  122. char **columnX = NULL;
  123. getcols( rows[ i ], ";", &columnX);
  124. for ( j = 0; columnX[ j ]; j++) printf(" Col[ %d ] = %s\n", j, columnX[ j ] );
  125. free( columnX );
  126. }
  127. }
  128.  
  129. free( rows );
  130. rows = NULL;
  131.  
  132. return 0;
  133. }
Success #stdin #stdout 0s 2184KB
stdin
Standard input is empty
stdout
Example1:
Column[ 0 ] = Arg1
Column[ 1 ] = Th;s is not Del;m;ter
Column[ 2 ] = Arg3
Column[ 3 ] = 
Column[ 4 ] = Final


Example2, Nested:

Line[ 0 ] = ID;Day;Month;Year;Telephone;email;Date of registration
  Col[ 0 ] = ID
  Col[ 1 ] = Day
  Col[ 2 ] = Month
  Col[ 3 ] = Year
  Col[ 4 ] = Telephone
  Col[ 5 ] = email
  Col[ 6 ] = Date of registration
Line[ 1 ] = 1;Sunday;january;2009;123-124-456;jitter@go.xyz;2015-05-13
  Col[ 0 ] = 1
  Col[ 1 ] = Sunday
  Col[ 2 ] = january
  Col[ 3 ] = 2009
  Col[ 4 ] = 123-124-456
  Col[ 5 ] = jitter@go.xyz
  Col[ 6 ] = 2015-05-13
Line[ 2 ] = 2;Monday;March;2011;(+30)333-22-55;buffer@wl.it;2009-05-23
  Col[ 0 ] = 2
  Col[ 1 ] = Monday
  Col[ 2 ] = March
  Col[ 3 ] = 2011
  Col[ 4 ] = (+30)333-22-55
  Col[ 5 ] = buffer@wl.it
  Col[ 6 ] = 2009-05-23