fork download
  1. #include <cstring>
  2. #include <cassert>
  3. #include <cstdio>
  4.  
  5. #include <algorithm>
  6.  
  7. // Abstract
  8. // [!] In C++14 many of this funcs can be 'constexpr' [!]
  9. class MathUtils {
  10.  
  11. public:
  12.  
  13. // 'hashSizeInBits' AND 'FNVPrime' should NOT be zero
  14. // (for FNV primes see here: http://i...content-available-to-author-only...e.com/chongo/tech/comp/fnv/)
  15. // 'hashSizeInBits' should NOT be larger (OR even equal)
  16. // then size of 'unsigned long long int' in bits
  17. // In the general case, almost any offset_basis will serve so long as it is non-zero
  18. static unsigned long long int generateFNV1OffsetBasis(const size_t hashSizeInBits,
  19. const unsigned long long int FNVPrime) throw()
  20. {
  21. static const auto ULL_SIZE_IN_BITS = sizeof(unsigned long long int) * 8U;
  22. static const char* const OFFSET_STR =
  23. "chongo <Landon Curt Noll> /\\../\\!"; // http://w...content-available-to-author-only...e.com/chongo/odd/bat.html
  24.  
  25. if (!hashSizeInBits || hashSizeInBits >= ULL_SIZE_IN_BITS || !FNVPrime) return 0ULL;
  26.  
  27. const unsigned long long int hashMod =
  28. static_cast<decltype(hashMod)>(std::pow(2.0, ULL_SIZE_IN_BITS));
  29. auto offsetBasis = 0ULL;
  30.  
  31. for (auto currChar = OFFSET_STR; *currChar; ++currChar) {
  32. offsetBasis = (offsetBasis * FNVPrime) % hashMod;
  33. offsetBasis ^= *currChar;
  34. }
  35. return offsetBasis;
  36. }
  37.  
  38. // 'str' SHOULD be a POD C null-terminated str.
  39. // Returns zero for an empty str.
  40. // FNV-1a algorithm description: http://i...content-available-to-author-only...e.com/chongo/tech/comp/fnv/#FNV-1a
  41. static size_t getFNV1aHash(const char* str) throw() {
  42. if (!str || !*str) return size_t();
  43. static_assert(4U == sizeof(size_t) || 8U == sizeof(size_t),
  44. "Known primes & offsets for 32 OR 64 bit hashes ONLY");
  45.  
  46. //// C++11 OPTIMIZATION HINT: better use 'constexpr' instead of 'const'
  47.  
  48. // In the general case, almost any offset_basis will serve so long as it is non - zero
  49. static const unsigned long long int BASISES[] =
  50. {2166136261ULL, 14695981039346656037ULL}; // 32 bit, 64 bit
  51. static const size_t OFFSET_BASIS =
  52. static_cast<decltype(OFFSET_BASIS)>(BASISES[sizeof(size_t) / 4U - 1U]);
  53.  
  54. // Some primes do hash better than other primes for a given integer size
  55. static const unsigned long long int PRIMES[] =
  56. {16777619ULL, 1099511628211ULL}; // 32 bit, 64 bit
  57. static const size_t PRIME =
  58. static_cast<decltype(OFFSET_BASIS)>(PRIMES[sizeof(size_t) / 4U - 1U]);
  59.  
  60. auto hash = OFFSET_BASIS;
  61. do {
  62. hash ^= *str++; // xor is performed on the low order octet (8 bits) of hash
  63. hash *= PRIME;
  64. } while (*str);
  65.  
  66. return hash;
  67. }
  68.  
  69. private:
  70.  
  71. MathUtils() throw() = delete;
  72. ~MathUtils() throw() = delete;
  73. MathUtils(const MathUtils&) throw() = delete;
  74. MathUtils(MathUtils&&) throw() = delete;
  75. MathUtils& operator=(const MathUtils&) throw() = delete;
  76. MathUtils& operator=(MathUtils&&) throw() = delete;
  77. };
  78.  
  79. #include <stdlib.h>
  80. #include <ctime>
  81. #include <cmath>
  82.  
  83. #include <time.h>
  84.  
  85. #include <string>
  86. #include <fstream>
  87. #include <iostream>
  88. #include <stdexcept>
  89. #include <unordered_map>
  90.  
  91. class HashTester {
  92.  
  93. public:
  94.  
  95. typedef std::unordered_map<size_t, std::string> THashCodeToStrMap;
  96.  
  97. static const auto MIN_CHAR_ = 'a';
  98. static const auto MAX_CHAR_ = 'z'; // 'a'-'z': 26
  99. static const auto ALPHABET_LEN_ = MAX_CHAR_ - MIN_CHAR_ + 1U;
  100. static const char* const OUT_FILE_NAME_;
  101. static const char* const OUT_FILE_NAME_EXT_;
  102.  
  103. struct Stats {
  104. Stats(const unsigned long long int comboCount = 0ULL) throw() : comboCount(comboCount) {}
  105.  
  106. unsigned long long int totalCount = 0ULL, duplicateCount = 0ULL, comboCount;
  107. };
  108.  
  109. // Internal
  110. template <const size_t BufCapacity>
  111. struct Params {
  112. Params(char (&strBuf)[BufCapacity], const size_t currLen = size_t(),
  113. THashCodeToStrMap* const map = nullptr,
  114. Stats* const stats = nullptr,
  115. std::ofstream* const outFile = nullptr) throw()
  116. : currLen(currLen), strBuf(strBuf), map(map), stats(stats), outFile(outFile)
  117. {}
  118.  
  119. size_t currLen = size_t();
  120. char (&strBuf)[BufCapacity];
  121. size_t lastCompletenessPercentage = size_t();
  122.  
  123. THashCodeToStrMap* map = nullptr;
  124. Stats* stats = nullptr;
  125. std::ofstream* outFile = nullptr;
  126. };
  127.  
  128. // Char Sequence Generator Type
  129. enum class ECCharSeqGenType {
  130. CSGT_ALL_VARIANTS // a^N combinations (a - alphabet power; N - length of the seq., fixed)
  131. };
  132.  
  133. template <class THashFunctor, const size_t MaxStrLen = 4U>
  134. static double getCollisionProbability(bool& error, const bool logCollisions = true) throw() {
  135. char strBuf[MaxStrLen + 1U] = {0}; // even to 8: MaxStrLen + 8U - (MaxStrLen % 8U)
  136. strBuf[std::extent<decltype(strBuf)>::value - 1U] = '\0';
  137. static const auto COMBINATION_COUNT_ =
  138. static_cast<unsigned long long int>(std::pow(ALPHABET_LEN_, MaxStrLen));
  139.  
  140. THashCodeToStrMap map;
  141. auto toReserveBucketCount
  142. = static_cast<size_t>(COMBINATION_COUNT_ / static_cast<size_t>(std::pow(10.0, MaxStrLen / 2.0)));
  143. while (true) {
  144. try {
  145. map.reserve(toReserveBucketCount);
  146. break;
  147. } catch (...) {
  148. toReserveBucketCount /= 2U;
  149. }
  150. }
  151. std::ofstream outFile;
  152.  
  153. static const auto OUT_FILE_NAME_BUF_SIZE_ = 64U;
  154. char outFileName[OUT_FILE_NAME_BUF_SIZE_] = {0};
  155. if (logCollisions && !openFile(outFile, outFileName, sizeof(outFileName))) {
  156. error = true;
  157. return nan("");
  158. }
  159. Stats stats(COMBINATION_COUNT_);
  160. Params<std::extent<decltype(strBuf)>::value> params(strBuf, 0U, &map, &stats, &outFile);
  161.  
  162. if (params.stats->comboCount) std::cout << std::endl;
  163. error = !createAndTestCharSeq<THashFunctor>(params, logCollisions); // returns false on ANY error
  164. if (params.stats->comboCount) std::cout << std::endl;
  165.  
  166. if (!params.stats->duplicateCount) { // NO duplicates - empty file
  167. outFile.close(); // file will be auto flushed
  168. const auto fileDeleteResult = remove(outFileName); // on failure, a nonzero value is returned
  169. }
  170. { // Shrink-to-fit
  171. decltype(map) mapClearer(std::move(map)); // release mem. before flushing file
  172. }
  173. // File will be auto flushed AND closed here
  174. return static_cast<double>(stats.duplicateCount) / stats.totalCount * 100.0;
  175. }
  176.  
  177. // Generic strategy to test auto. generated char. sequences
  178. // (used to test hashes, through can be useful elsewhere)
  179. // [!] Will stop if mets zero hash. (AND return false) [!]
  180. template <class TStrFunctor, const size_t BufCapacity>
  181. static bool createAndTestCharSeq(Params<BufCapacity>& params, bool logCollisions = true) throw() {
  182. static_assert(MIN_CHAR_ < MAX_CHAR_, "Invalid char. sequence");
  183. static const auto LAST_CHAR_IDX_ = BufCapacity - 1U;
  184.  
  185. logCollisions = logCollisions && params.outFile;
  186.  
  187. const auto currIdx = params.currLen;
  188. const auto nextIdx = currIdx + 1U;
  189. const auto toReserveCount = nextIdx + 1U;
  190. size_t currHash = 0U;
  191. std::string currStr;
  192. decltype(std::make_pair(currHash, std::move(currStr))) currPair;
  193. decltype(params.map->insert(std::move(currPair))) insertResult;
  194. TStrFunctor strFunctor;
  195. auto completenessPercentage = 0U;
  196.  
  197. auto tryInsert = [&]() throw() {
  198. //// Fill pair
  199. currStr.reserve(toReserveCount);
  200. currStr = params.strBuf;
  201. currHash = strFunctor(currStr);
  202.  
  203. if (!params.map) return true; // skip other part
  204. currPair.first = currHash;
  205. if (logCollisions) currPair.second = currStr; // do NOT move, COPY
  206.  
  207. try {
  208. insertResult = params.map->insert(std::move(currPair));
  209. } catch (...) {
  210. return false;
  211. };
  212.  
  213. if (!insertResult.second) { // if NOT inserted (duplicate)
  214. if (params.stats) ++params.stats->duplicateCount; // if collect stats
  215.  
  216. if (logCollisions) {
  217. auto& dupStr = (*insertResult.first).second;
  218. (*params.outFile) << currHash << ": '" << currStr << "' == '" << dupStr << "'\n";
  219. if (!(*params.outFile)) return false;
  220. }
  221. }
  222. return true;
  223. };
  224.  
  225. static_assert(MIN_CHAR_ < MAX_CHAR_, "Invalid char codes");
  226. for (char currChar = MIN_CHAR_; currChar <= MAX_CHAR_; ++currChar) {
  227. params.strBuf[currIdx] = currChar;
  228.  
  229. if (nextIdx < LAST_CHAR_IDX_) { // prolong seq.
  230. params.currLen = nextIdx;
  231. if (!createAndTestCharSeq<TStrFunctor, BufCapacity>(params, logCollisions)) return false;
  232. } else { // seq. end
  233. if (!tryInsert()) return false; // saving OR logging failed
  234. if (!currHash) return false; // force stop (skip other) [OPTIMIZATION]
  235.  
  236. if (params.stats) { // if collect stats
  237. ++params.stats->totalCount;
  238.  
  239. if (params.stats->comboCount) { // show progress
  240. completenessPercentage
  241. = static_cast<decltype(completenessPercentage)>(static_cast<double>(params.stats->totalCount)
  242. / params.stats->comboCount * 100.0);
  243.  
  244. if (completenessPercentage > params.lastCompletenessPercentage &&
  245. !(completenessPercentage % 10U))
  246. { // step = 10U
  247. std::cout << ' ' << completenessPercentage;
  248. params.lastCompletenessPercentage = completenessPercentage;
  249. }
  250. }
  251. }
  252. }
  253. } // 'for currChar' END
  254. return true;
  255. } // 'createAndTestCharSeqA' END
  256.  
  257. private:
  258.  
  259. static bool openFile(std::ofstream& outFile, char* const fileNameBuf,
  260. const size_t fileNameBufSize) throw()
  261. {
  262. if (!fileNameBuf || fileNameBufSize < 2U) return false;
  263. //// Create unique file name (ALSO clock ticks can be replaced with the time AND/OR random)
  264. //// i. e. C++11 'std::chrono' AND/OR 'std::uniform_int_distribution'
  265. //// ALSO C 'tmpnam' / 'tmpfile'
  266. strncat(fileNameBuf, OUT_FILE_NAME_, fileNameBufSize - 1U);
  267. static const auto STR_TIME_BUF_SIZE_ = 32U;
  268. char strTimeBuf[STR_TIME_BUF_SIZE_] = {0};
  269. // Better use C++11 http://e...content-available-to-author-only...e.com/w/cpp/chrono/high_resolution_clock
  270. const auto nowTime = time(nullptr);
  271. sprintf(strTimeBuf, "%lli", nowTime);
  272. strncat(fileNameBuf, strTimeBuf, fileNameBufSize - 1U);
  273. strncat(fileNameBuf, OUT_FILE_NAME_EXT_, fileNameBufSize - 1U);
  274.  
  275. outFile.open(fileNameBuf, std::ios::out | std::ios::trunc);
  276. return !(!outFile);
  277. }
  278. };
  279.  
  280. const char* const HashTester::OUT_FILE_NAME_ = "collison_test_results#";
  281. const char* const HashTester::OUT_FILE_NAME_EXT_ = ".txt ";
  282.  
  283. // Functor
  284. #define ADD_HASHER_FOR(ClassName, HasherName) \
  285. template <typename T = ClassName>\
  286. struct HasherName {\
  287.   auto operator()(const T& obj) const throw() -> decltype(obj.hash()) {\
  288.   return obj.hash();\
  289.   }\
  290. };
  291.  
  292. // Functor
  293. #define ADD_STD_HASHER_FOR(ClassName) \
  294. namespace std {\
  295.   template <>\
  296.   class hash<ClassName> {\
  297.   public:\
  298.   size_t operator()(const ClassName& obj) const throw() {\
  299.   return obj.hash();\
  300.   }\
  301.   };\
  302. };
  303.  
  304. template <class TStrType>
  305. struct FNV1aHashFunctor {
  306. auto operator()(const TStrType& str) throw() -> decltype(MathUtils::getFNV1aHash(str.c_str())) {
  307. return MathUtils::getFNV1aHash(str.c_str());
  308. }
  309. };
  310.  
  311. template <class TStrType>
  312. struct STLHashFunctor {
  313. auto operator()(const TStrType& str) throw() -> decltype(std::hash<std::string>()(str)) {
  314. return std::hash<std::string>()(str);
  315. }
  316. };
  317.  
  318. int main() {
  319. bool error = false;
  320. const auto FNV1aHashCollisionProbability_
  321. = HashTester::getCollisionProbability<FNV1aHashFunctor<std::string>>(error, false);
  322. std::cout << std::endl << "FNV1a Collision Probability: "
  323. << FNV1aHashCollisionProbability_ << ", error: " << error << std::endl;
  324.  
  325. // 'std::hash<std::string>' can be used directly
  326. error = false;
  327. const auto STLHashCollisionProbability_
  328. = HashTester::getCollisionProbability<STLHashFunctor<std::string>>(error, false);
  329. std::cout << std::endl << "STL Collision Probability: "
  330. << STLHashCollisionProbability_ << ", error: " << error << std::endl;
  331. return 0;
  332. }
Success #stdin #stdout 0.63s 10616KB
stdin
Standard input is empty
stdout
 10 20 30 40 50 60 70 80 90 100

FNV1a Collision Probability: 0, error: 0

 10 20 30 40 50 60 70 80 90 100

STL Collision Probability: 0, error: 0