fork download
  1. #include <algorithm>
  2. #include <cstring>
  3. #include <iostream>
  4. #include <iterator>
  5. #include <memory>
  6. #include <sstream>
  7. #include <string>
  8. #include <vector>
  9.  
  10. #include <boost/algorithm/string/classification.hpp>
  11. #include <boost/algorithm/string/split.hpp>
  12. #include <boost/tokenizer.hpp>
  13.  
  14. char* FindToken(
  15. char* str, const char* delim, char** saveptr)
  16. {
  17. #if (_SVID_SOURCE || _BSD_SOURCE || _POSIX_C_SOURCE >= 1 || _XOPEN_SOURCE || _POSIX_SOURCE)
  18. return ::strtok_r(str, delim, saveptr);
  19. #elif defined(_MSC_VER) && (_MSC_VER >= 1800)
  20. return strtok_s(token, delim, saveptr);
  21. #else
  22. return std::strtok(token, delim);
  23. #endif
  24. }
  25.  
  26. wchar_t* FindToken(
  27. wchar_t* token, const wchar_t* delim, wchar_t** saveptr)
  28. {
  29. #if ( (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) \
  30.   || (defined(__cplusplus) && (__cplusplus >= 201103L)) )
  31. return std::wcstok(token, delim, saveptr);
  32. #elif defined(_MSC_VER) && (_MSC_VER >= 1800)
  33. return wcstok_s(token, delim, saveptr);
  34. #else
  35. return std::wcstok(token, delim);
  36. #endif
  37. }
  38.  
  39. char* CopyString(char* destination, const char* source)
  40. {
  41. return std::strcpy(destination, source);
  42. }
  43.  
  44. wchar_t* CopyString(wchar_t* destination, const wchar_t* source)
  45. {
  46. return std::wcscpy(destination, source);
  47. }
  48.  
  49. template <class charType>
  50. size_t splitWithFindToken(
  51. const std::basic_string<charType>& str,
  52. const std::basic_string<charType>& delim,
  53. std::vector< std::basic_string<charType> >& tokens)
  54. {
  55. std::unique_ptr<charType[]> ptr = std::make_unique<charType[]>(str.length() + 1);
  56. memset(ptr.get(), 0, (str.length() + 1) * sizeof(charType));
  57. CopyString(ptr.get(), str.c_str());
  58. charType* saveptr;
  59. charType* token = FindToken(ptr.get(), delim.c_str(), &saveptr);
  60. while (token != nullptr)
  61. {
  62. tokens.push_back(token);
  63. token = FindToken(nullptr, delim.c_str(), &saveptr);
  64. }
  65. return tokens.size();
  66. }
  67.  
  68. template <class charType>
  69. size_t splitWithStringStream(
  70. const std::basic_string<charType>& str,
  71. std::vector< std::basic_string<charType> >& tokens)
  72. {
  73. typedef std::basic_string<charType> my_string;
  74. typedef std::vector< std::basic_string<charType> > my_vector;
  75. typedef std::basic_istringstream<
  76. charType, std::char_traits<charType> >
  77. my_istringstream;
  78. typedef std::istream_iterator<
  79. std::basic_string<charType>, charType,
  80. std::char_traits<charType> >
  81. my_istream_iterator;
  82. tokens.clear();
  83. if (str.empty())
  84. {
  85. return 0;
  86. }
  87. my_istringstream iss(str);
  88. std::copy(
  89. my_istream_iterator{iss}, my_istream_iterator(),
  90. std::back_inserter<my_vector>(tokens));
  91. return tokens.size();
  92. }
  93.  
  94. template <class charType>
  95. size_t splitWithStringStream1(
  96. const std::basic_string<charType>& str,
  97. std::vector< std::basic_string<charType> >& tokens)
  98. {
  99. typedef std::basic_string<charType> my_string;
  100. typedef std::vector< std::basic_string<charType> > my_vector;
  101. typedef std::basic_istringstream<
  102. charType, std::char_traits<charType> >
  103. my_istringstream;
  104. typedef std::istream_iterator<
  105. std::basic_string<charType>, charType,
  106. std::char_traits<charType> >
  107. my_istream_iterator;
  108. tokens.clear();
  109. if (str.empty())
  110. {
  111. return 0;
  112. }
  113. my_istringstream iss(str);
  114. std::vector<my_string> results(
  115. my_istream_iterator{iss}, my_istream_iterator());
  116. tokens.swap(results);
  117. return tokens.size();
  118. }
  119.  
  120. template<typename charType>
  121. size_t splitWithGetLine(
  122. const std::basic_string<charType>& str,
  123. const charType delim,
  124. std::vector< std::basic_string<charType> >& tokens)
  125. {
  126. typedef std::basic_string<charType> my_string;
  127. typedef std::basic_istringstream<
  128. charType, std::char_traits<charType> >
  129. my_istringstream;
  130. tokens.clear();
  131. if (str.empty())
  132. {
  133. return 0;
  134. }
  135. my_istringstream iss(str);
  136. my_string token;
  137. while (std::getline(iss, token, delim))
  138. {
  139. tokens.push_back(token);
  140. }
  141. return tokens.size();
  142. }
  143.  
  144. template<typename charType>
  145. size_t splitWithBasicString(
  146. const std::basic_string<charType>& str,
  147. const charType delim,
  148. std::vector< std::basic_string<charType> > &tokens,
  149. const bool trimEmpty = false,
  150. const size_t maxTokens = (size_t)(-1))
  151. {
  152. typedef std::basic_string<charType> my_string;
  153. typedef typename my_string::size_type my_size_type;
  154. tokens.clear();
  155. if (str.empty())
  156. {
  157. return 0;
  158. }
  159. my_size_type len = str.length();
  160. // Skip delimiters at beginning.
  161. my_size_type left = str.find_first_not_of(delim, 0);
  162. size_t i = 1;
  163. if (!trimEmpty && left != 0)
  164. {
  165. tokens.push_back(my_string());
  166. ++i;
  167. }
  168. while (i < maxTokens)
  169. {
  170. my_size_type right = str.find(delim, left);
  171. if (right == my_string::npos)
  172. {
  173. break;
  174. }
  175. if (!trimEmpty || right - left > 0)
  176. {
  177. tokens.push_back(str.substr(left, right - left));
  178. ++i;
  179. }
  180. left = right + 1;
  181. }
  182. if (left < len)
  183. {
  184. tokens.push_back(str.substr(left));
  185. }
  186. return tokens.size();
  187. }
  188.  
  189. template<typename charType>
  190. size_t splitWithBasicString(
  191. const std::basic_string<charType>& str,
  192. const std::basic_string<charType>& delim,
  193. std::vector< std::basic_string<charType> >& tokens,
  194. const bool trimEmpty = false,
  195. const size_t maxTokens = (size_t)(-1))
  196. {
  197. typedef std::basic_string<charType> my_string;
  198. typedef typename my_string::size_type my_size_type;
  199. tokens.clear();
  200. if (str.empty())
  201. {
  202. return 0;
  203. }
  204. my_size_type len = str.length();
  205. // Skip delimiters at beginning.
  206. my_size_type left = str.find_first_not_of(delim, 0);
  207. size_t i = 1;
  208. if (!trimEmpty && left != 0)
  209. {
  210. tokens.push_back(my_string());
  211. ++i;
  212. }
  213. while (i < maxTokens)
  214. {
  215. my_size_type right = str.find_first_of(delim, left);
  216. if (right == my_string::npos)
  217. {
  218. break;
  219. }
  220. if (!trimEmpty || right - left > 0)
  221. {
  222. tokens.push_back(str.substr(left, right - left));
  223. ++i;
  224. }
  225. left = right + 1;
  226. }
  227. if (left < len)
  228. {
  229. tokens.push_back(str.substr(left));
  230. }
  231. return tokens.size();
  232. }
  233.  
  234. int main()
  235. {
  236. std::wstring str(L"The quick brown fox jumped over the lazy dog.");
  237. std::wstring str1(L"This is a test.||This is only a test.|This concludes this test.");
  238. std::wstring str2(L"This is a test.|,This is only a test.\tThis concludes this test.");
  239. std::vector<std::wstring> tokens;
  240. size_t s = splitWithStringStream(str, tokens);
  241. std::wcout << L"s: " << s << std::endl;
  242. if (s)
  243. {
  244. std::wcout
  245. << L"tokens.at(0): " << tokens.at(0)
  246. << std::endl;
  247. }
  248. std::vector<std::wstring> tokens1;
  249. size_t s1 = splitWithStringStream1(str, tokens1);
  250. std::wcout << L"s1: " << s1 << std::endl;
  251. if (s1)
  252. {
  253. std::wcout
  254. << L"tokens1.at(0): " << tokens1.at(0)
  255. << std::endl;
  256. }
  257. std::vector<std::wstring> tokens2;
  258. size_t s2 = splitWithGetLine(str1, L'|', tokens2);
  259. std::wcout << L"s2: " << s2 << std::endl;
  260. if (s2)
  261. {
  262. std::wcout
  263. << L"tokens2.at(0): " << tokens2.at(0)
  264. << std::endl;
  265. }
  266. std::vector<std::wstring> tokens3;
  267. size_t s3 = splitWithBasicString(str1, L'|', tokens3, true);
  268. std::wcout << L"s3: " << s3 << std::endl;
  269. if (s3)
  270. {
  271. std::wcout
  272. << L"tokens3.at(0): " << tokens3.at(0)
  273. << std::endl;
  274. }
  275. std::vector<std::wstring> tokens4;
  276. size_t s4 = splitWithBasicString(
  277. str2, std::wstring(L"|,\t"), tokens4, true);
  278. std::wcout << L"s4: " << s4 << std::endl;
  279. if (s4)
  280. {
  281. std::wcout
  282. << L"tokens4.at(0): " << tokens4.at(0)
  283. << std::endl;
  284. }
  285. std::vector<std::wstring> tokens5;
  286. boost::split(tokens5, str2, boost::is_any_of(L"|,\t"));
  287. size_t s5 = tokens5.size();
  288. std::wcout << L"s5: " << s5 << std::endl;
  289. if (s5)
  290. {
  291. std::wcout
  292. << L"tokens5.at(0): " << tokens5.at(0)
  293. << std::endl;
  294. }
  295. std::vector<std::wstring> tokens6;
  296. size_t s6 = splitWithFindToken(
  297. str2, std::wstring(L"|,\t"), tokens6);
  298. std::wcout << L"s6: " << s6 << std::endl;
  299. if (s6)
  300. {
  301. std::wcout
  302. << L"tokens6.at(0): " << tokens6.at(0)
  303. << std::endl;
  304. }
  305. typedef boost::char_separator<wchar_t> my_separator;
  306. typedef boost::tokenizer<
  307. my_separator, std::wstring::const_iterator,
  308. std::wstring > my_tokenizer;
  309. my_separator sep(L"|,\t");
  310. my_tokenizer tokens7(str2, sep);
  311. my_tokenizer::iterator itEnd = tokens7.end();
  312. for (my_tokenizer::iterator it = tokens7.begin(); it != itEnd; ++it)
  313. {
  314. std::wcout << *it << std::endl;
  315. }
  316. return 0;
  317. }
  318.  
Success #stdin #stdout 0s 15272KB
stdin
Standard input is empty
stdout
s: 9
tokens.at(0): The
s1: 9
tokens1.at(0): The
s2: 4
tokens2.at(0): This is a test.
s3: 3
tokens3.at(0): This is a test.
s4: 3
tokens4.at(0): This is a test.
s5: 4
tokens5.at(0): This is a test.
s6: 3
tokens6.at(0): This is a test.
This is a test.
This is only a test.
This concludes this test.