fork download
  1. // ExternalSorting.cpp : Defines the entry point for the console application.
  2. //
  3.  
  4. //#include "stdafx.h"
  5. #include <string>
  6. #include <sstream>
  7. #include <iostream>
  8. #include <fstream>
  9. #include <list>
  10. #include <algorithm>
  11. #include <iterator>
  12. #include <vector>
  13. #include <chrono>
  14.  
  15.  
  16. using StringContainer = std::list<std::string>;
  17.  
  18. struct TBuffer
  19. {
  20. private:
  21. void _reload()
  22. {
  23. bEOF = !static_cast<bool>(std::getline(_input_stream, _curString, '\n'));
  24. }
  25. public:
  26. template <class T>
  27. TBuffer(T&& fileName)
  28. :_input_stream(std::forward<T>(fileName))
  29. {
  30. _reload();
  31. }
  32.  
  33. inline void operator++(int)
  34. {
  35. _reload();
  36. }
  37.  
  38. ~TBuffer()
  39. {
  40. _input_stream.close();
  41. }
  42.  
  43. std::ifstream _input_stream;
  44. std::string _curString;
  45.  
  46. bool bEOF = false;
  47. };
  48.  
  49. using BufferContainer = std::list<TBuffer>;
  50.  
  51. std::string __merge2Files(const std::string& filename1, const std::string& filename2)
  52. {
  53. std::string output = filename1 + ".0";
  54.  
  55. if (true)
  56. {
  57. TBuffer buf1(filename1);
  58. TBuffer buf2(filename2);
  59.  
  60. std::ofstream ofs(output);
  61.  
  62. while (!buf1.bEOF && !buf2.bEOF)
  63. {
  64. if (buf1._curString < buf2._curString)
  65. {
  66. ofs << buf1._curString << '\n';
  67. buf1++;
  68. }
  69. else
  70. {
  71. ofs << buf2._curString << '\n';
  72. buf2++;
  73. }
  74. }
  75.  
  76. // write any tail
  77. while (!buf1.bEOF)
  78. {
  79. ofs << buf1._curString;
  80. buf1++;
  81. if (!buf1.bEOF)
  82. ofs << '\n';
  83. }
  84. while (!buf2.bEOF)
  85. {
  86. ofs << buf2._curString;
  87. buf2++;
  88. if (!buf2.bEOF)
  89. ofs << '\n';
  90. }
  91.  
  92. ofs.close();
  93. }
  94.  
  95. // delete old files
  96. std::remove(filename1.c_str());
  97. std::remove(filename2.c_str());
  98.  
  99.  
  100. return output;
  101. }
  102.  
  103. std::string __mergeFiles(StringContainer filelist)
  104. {
  105. if (filelist.size() == 2)
  106. {
  107. return __merge2Files(filelist.front(), filelist.back());
  108. }
  109. if (filelist.size() == 1)
  110. {
  111. return filelist.front(); //already merged
  112. }
  113.  
  114. while (filelist.size() > 1)
  115. {
  116. std::string fileName1 = filelist.front();
  117. filelist.pop_front();
  118. std::string fileName2 = filelist.front();
  119. filelist.pop_front();
  120. filelist.push_back(__merge2Files(fileName1, fileName2));
  121. }
  122.  
  123. return filelist.front();
  124. }
  125.  
  126. std::string MergeFiles(StringContainer filelist)
  127. {
  128. return __mergeFiles(std::move(filelist));
  129. }
  130.  
  131.  
  132. template<typename Function, typename... Args>
  133. int64_t do_profiling(Function&& _func, const Args&...args)
  134. {
  135. using namespace std::chrono;
  136. auto start = high_resolution_clock::now();
  137. _func(args...);
  138. return duration_cast<milliseconds>(high_resolution_clock::now() - start).count();
  139. }
  140.  
  141. template <class T>
  142. auto GetFileSize(T&& filename)
  143. {
  144. std::fstream ifs(std::forward<T>(filename), std::fstream::ios_base::in | std::ios::binary | std::fstream::ios_base::ate);
  145. auto size = ifs.tellg();
  146. ifs.close();
  147. return size;
  148. }
  149.  
  150. int main(int argc, char* argv[])
  151. {
  152. try
  153. {
  154. // Check command line arguments.
  155. if (argc != 5)
  156. {
  157. std::cerr << "Usage: extsort.exe <input file name> <output file name> <memory size> <temp folder>\n";
  158. std::cerr << " For example, try:\n";
  159. std::cerr << " extsort.exe \"c:\\in.txt\" \"c:\\out.txt\" 1000000 \"c:\\temp\\\"\n";
  160. return 1;
  161. }
  162.  
  163. std::string inFileName = argv[1];
  164. std::string outFilename = argv[2];
  165. unsigned long memorySize = std::stoul(std::string(argv[3]), nullptr, 10);
  166. std::string tempFolder = argv[4];
  167.  
  168. auto fileSize = GetFileSize(inFileName);
  169. if (!fileSize)
  170. {
  171. std::cout << "Input file empty" << std::endl;
  172. return 0;
  173. }
  174.  
  175. std::ifstream ifs(inFileName);
  176.  
  177. StringContainer strList;
  178. StringContainer tempFileNames;
  179.  
  180. unsigned int bufferLen = 0;
  181. unsigned int tempFileCount = 0;
  182. std::string line;
  183.  
  184. std::cout << "Start reading from: " << inFileName << std::endl;
  185. int64_t filTime = do_profiling([&]()
  186. {
  187. while (true)
  188. {
  189. auto bEOF = !static_cast<bool>(std::getline(ifs, line, '\n'));
  190.  
  191. if (bEOF || (bufferLen + line.size() > memorySize))
  192. {
  193. strList.sort();
  194.  
  195. std::ofstream ofs(*tempFileNames.emplace(tempFileNames.end(), tempFolder + "temp." + std::to_string(tempFileCount++)));
  196. std::copy(strList.begin(), strList.end(), std::ostream_iterator<std::string>(ofs, "\n"));
  197. ofs.close();
  198.  
  199. strList.clear();
  200. bufferLen = 0;
  201. }
  202.  
  203. if (bEOF)
  204. break;
  205.  
  206. bufferLen += line.size();
  207.  
  208. strList.emplace_back(std::move(line));
  209. }
  210. ifs.close();
  211. });
  212. std::cout << "Completed in: " << filTime << " milliseconds" << std::endl;
  213.  
  214.  
  215. if (tempFileNames.size() == 1)
  216. {
  217. std::remove(outFilename.c_str());
  218. std::rename(tempFileNames.front().c_str(), outFilename.c_str());
  219.  
  220. return 0;
  221. }
  222.  
  223. std::cout << "Start merging and write to: " << outFilename << std::endl;
  224. filTime = do_profiling([&]()
  225. {
  226. std::string mergedfilename = MergeFiles(tempFileNames);
  227.  
  228. std::remove(outFilename.c_str());
  229. std::rename(mergedfilename.c_str(), outFilename.c_str());
  230.  
  231. });
  232. std::cout << "Completed in: " << filTime << " milliseconds" << std::endl;
  233.  
  234. }
  235. catch (std::exception& e)
  236. {
  237. std::cerr << "exception: " << e.what() << "\n";
  238. }
  239.  
  240. return 0;
  241. }
  242.  
  243.  
Runtime error #stdin #stdout #stderr 0s 3480KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Usage: extsort.exe <input file name> <output file name> <memory size> <temp folder>
  For example, try:
    extsort.exe "c:\in.txt" "c:\out.txt" 1000000 "c:\temp\"