#include <algorithm>
#include <cstring>
#include <iostream>
#include <iterator>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/tokenizer.hpp>
char* FindToken(
char* str, const char* delim, char** saveptr)
{
#if (_SVID_SOURCE || _BSD_SOURCE || _POSIX_C_SOURCE >= 1 || _XOPEN_SOURCE || _POSIX_SOURCE)
return ::strtok_r(str, delim, saveptr);
#elif defined(_MSC_VER) && (_MSC_VER >= 1800)
return strtok_s(token, delim, saveptr);
#else
return std::strtok(token, delim);
#endif
}
wchar_t* FindToken(
wchar_t* token, const wchar_t* delim, wchar_t** saveptr)
{
#if ( (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) \
|| (defined(__cplusplus) && (__cplusplus >= 201103L)) )
return std::wcstok(token, delim, saveptr);
#elif defined(_MSC_VER) && (_MSC_VER >= 1800)
return wcstok_s(token, delim, saveptr);
#else
return std::wcstok(token, delim);
#endif
}
char* CopyString(char* destination, const char* source)
{
return std::strcpy(destination, source);
}
wchar_t* CopyString(wchar_t* destination, const wchar_t* source)
{
return std::wcscpy(destination, source);
}
template <class charType>
size_t splitWithFindToken(
const std::basic_string<charType>& str,
const std::basic_string<charType>& delim,
std::vector< std::basic_string<charType> >& tokens)
{
std::unique_ptr<charType[]> ptr = std::make_unique<charType[]>(str.length() + 1);
memset(ptr.get(), 0, (str.length() + 1) * sizeof(charType));
CopyString(ptr.get(), str.c_str());
charType* saveptr;
charType* token = FindToken(ptr.get(), delim.c_str(), &saveptr);
while (token != nullptr)
{
tokens.push_back(token);
token = FindToken(nullptr, delim.c_str(), &saveptr);
}
return tokens.size();
}
template <class charType>
size_t splitWithStringStream(
const std::basic_string<charType>& str,
std::vector< std::basic_string<charType> >& tokens)
{
typedef std::basic_string<charType> my_string;
typedef std::vector< std::basic_string<charType> > my_vector;
typedef std::basic_istringstream<
charType, std::char_traits<charType> >
my_istringstream;
typedef std::istream_iterator<
std::basic_string<charType>, charType,
std::char_traits<charType> >
my_istream_iterator;
tokens.clear();
if (str.empty())
{
return 0;
}
my_istringstream iss(str);
std::copy(
my_istream_iterator{iss}, my_istream_iterator(),
std::back_inserter<my_vector>(tokens));
return tokens.size();
}
template <class charType>
size_t splitWithStringStream1(
const std::basic_string<charType>& str,
std::vector< std::basic_string<charType> >& tokens)
{
typedef std::basic_string<charType> my_string;
typedef std::vector< std::basic_string<charType> > my_vector;
typedef std::basic_istringstream<
charType, std::char_traits<charType> >
my_istringstream;
typedef std::istream_iterator<
std::basic_string<charType>, charType,
std::char_traits<charType> >
my_istream_iterator;
tokens.clear();
if (str.empty())
{
return 0;
}
my_istringstream iss(str);
std::vector<my_string> results(
my_istream_iterator{iss}, my_istream_iterator());
tokens.swap(results);
return tokens.size();
}
template<typename charType>
size_t splitWithGetLine(
const std::basic_string<charType>& str,
const charType delim,
std::vector< std::basic_string<charType> >& tokens)
{
typedef std::basic_string<charType> my_string;
typedef std::basic_istringstream<
charType, std::char_traits<charType> >
my_istringstream;
tokens.clear();
if (str.empty())
{
return 0;
}
my_istringstream iss(str);
my_string token;
while (std::getline(iss, token, delim))
{
tokens.push_back(token);
}
return tokens.size();
}
template<typename charType>
size_t splitWithBasicString(
const std::basic_string<charType>& str,
const charType delim,
std::vector< std::basic_string<charType> > &tokens,
const bool trimEmpty = false,
const size_t maxTokens = (size_t)(-1))
{
typedef std::basic_string<charType> my_string;
typedef typename my_string::size_type my_size_type;
tokens.clear();
if (str.empty())
{
return 0;
}
my_size_type len = str.length();
// Skip delimiters at beginning.
my_size_type left = str.find_first_not_of(delim, 0);
size_t i = 1;
if (!trimEmpty && left != 0)
{
tokens.push_back(my_string());
++i;
}
while (i < maxTokens)
{
my_size_type right = str.find(delim, left);
if (right == my_string::npos)
{
break;
}
if (!trimEmpty || right - left > 0)
{
tokens.push_back(str.substr(left, right - left));
++i;
}
left = right + 1;
}
if (left < len)
{
tokens.push_back(str.substr(left));
}
return tokens.size();
}
template<typename charType>
size_t splitWithBasicString(
const std::basic_string<charType>& str,
const std::basic_string<charType>& delim,
std::vector< std::basic_string<charType> >& tokens,
const bool trimEmpty = false,
const size_t maxTokens = (size_t)(-1))
{
typedef std::basic_string<charType> my_string;
typedef typename my_string::size_type my_size_type;
tokens.clear();
if (str.empty())
{
return 0;
}
my_size_type len = str.length();
// Skip delimiters at beginning.
my_size_type left = str.find_first_not_of(delim, 0);
size_t i = 1;
if (!trimEmpty && left != 0)
{
tokens.push_back(my_string());
++i;
}
while (i < maxTokens)
{
my_size_type right = str.find_first_of(delim, left);
if (right == my_string::npos)
{
break;
}
if (!trimEmpty || right - left > 0)
{
tokens.push_back(str.substr(left, right - left));
++i;
}
left = right + 1;
}
if (left < len)
{
tokens.push_back(str.substr(left));
}
return tokens.size();
}
int main()
{
std::wstring str(L"The quick brown fox jumped over the lazy dog.");
std::wstring str1(L"This is a test.||This is only a test.|This concludes this test.");
std::wstring str2(L"This is a test.|,This is only a test.\tThis concludes this test.");
std::vector<std::wstring> tokens;
size_t s = splitWithStringStream(str, tokens);
std::wcout << L"s: " << s << std::endl;
if (s)
{
std::wcout
<< L"tokens.at(0): " << tokens.at(0)
<< std::endl;
}
std::vector<std::wstring> tokens1;
size_t s1 = splitWithStringStream1(str, tokens1);
std::wcout << L"s1: " << s1 << std::endl;
if (s1)
{
std::wcout
<< L"tokens1.at(0): " << tokens1.at(0)
<< std::endl;
}
std::vector<std::wstring> tokens2;
size_t s2 = splitWithGetLine(str1, L'|', tokens2);
std::wcout << L"s2: " << s2 << std::endl;
if (s2)
{
std::wcout
<< L"tokens2.at(0): " << tokens2.at(0)
<< std::endl;
}
std::vector<std::wstring> tokens3;
size_t s3 = splitWithBasicString(str1, L'|', tokens3, true);
std::wcout << L"s3: " << s3 << std::endl;
if (s3)
{
std::wcout
<< L"tokens3.at(0): " << tokens3.at(0)
<< std::endl;
}
std::vector<std::wstring> tokens4;
size_t s4 = splitWithBasicString(
str2, std::wstring(L"|,\t"), tokens4, true);
std::wcout << L"s4: " << s4 << std::endl;
if (s4)
{
std::wcout
<< L"tokens4.at(0): " << tokens4.at(0)
<< std::endl;
}
std::vector<std::wstring> tokens5;
boost::split(tokens5, str2, boost::is_any_of(L"|,\t"));
size_t s5 = tokens5.size();
std::wcout << L"s5: " << s5 << std::endl;
if (s5)
{
std::wcout
<< L"tokens5.at(0): " << tokens5.at(0)
<< std::endl;
}
std::vector<std::wstring> tokens6;
size_t s6 = splitWithFindToken(
str2, std::wstring(L"|,\t"), tokens6);
std::wcout << L"s6: " << s6 << std::endl;
if (s6)
{
std::wcout
<< L"tokens6.at(0): " << tokens6.at(0)
<< std::endl;
}
typedef boost::char_separator<wchar_t> my_separator;
typedef boost::tokenizer<
my_separator, std::wstring::const_iterator,
std::wstring > my_tokenizer;
my_separator sep(L"|,\t");
my_tokenizer tokens7(str2, sep);
my_tokenizer::iterator itEnd = tokens7.end();
for (my_tokenizer::iterator it = tokens7.begin(); it != itEnd; ++it)
{
std::wcout << *it << std::endl;
}
return 0;
}