fork(7) download
  1. /******************************************************************************
  2.  
  3.   Online C++ Compiler.
  4.   Code, Compile, Run and Debug C++ program online.
  5. Write your code in this editor and press "Run" button to compile and execute it.
  6.  
  7. *******************************************************************************/
  8.  
  9. #include <iostream>
  10. #include <cpuid.h> // GCC-provided
  11. #include <stdio.h>
  12. #include <stdint.h>
  13. /*
  14.  * CompressStringLib.h
  15.  *
  16.  * Created on: Mar 5, 2022
  17.  * Author: tugrul
  18.  */
  19.  
  20. #ifndef COMPRESSSTRINGLIB_H_
  21. #define COMPRESSSTRINGLIB_H_
  22.  
  23. #include <algorithm>
  24. #include <vector>
  25. #include <map>
  26. #include <iostream>
  27. #include <functional>
  28. #include <string>
  29. #include <memory>
  30. #include<chrono>
  31. #include <cstring>
  32. #include<mutex>
  33.  
  34.  
  35. namespace CompressedStringLib
  36. {
  37.  
  38. class Bench
  39. {
  40. public:
  41. Bench(size_t * targetPtr)
  42. {
  43. target=targetPtr;
  44. t1 = std::chrono::duration_cast< std::chrono::nanoseconds >(std::chrono::high_resolution_clock::now().time_since_epoch());
  45. }
  46.  
  47. ~Bench()
  48. {
  49. t2 = std::chrono::duration_cast< std::chrono::nanoseconds >(std::chrono::high_resolution_clock::now().time_since_epoch());
  50. *target= t2.count() - t1.count();
  51. }
  52. private:
  53. size_t * target;
  54. std::chrono::nanoseconds t1,t2;
  55. };
  56.  
  57. // stores a bit in a byte at a position
  58. inline void storeBit(unsigned char & data, const unsigned char value, const int pos) noexcept
  59. {
  60. data = (value << pos) | (data & ~(1 << pos));
  61. }
  62.  
  63. // stores a bit in a size_t at a position
  64. inline void storeBit(size_t & data, size_t value, const int pos) noexcept
  65. {
  66. data = (value << pos) | (data & ~(1ull << pos));
  67. }
  68.  
  69. // stores a bit in a size_t at a position
  70. inline void storeBit(uint32_t & data, uint32_t value, const int pos) noexcept
  71. {
  72. data = (value << pos) | (data & ~(((uint32_t)1) << pos));
  73. }
  74.  
  75. // stores a bit in a size_t at a position
  76. inline void storeBit(uint16_t & data, uint16_t value, const int pos) noexcept
  77. {
  78. data = (value << pos) | (data & ~(((uint16_t)1) << pos));
  79. }
  80.  
  81. // loads a bit from a byte at a position
  82. inline unsigned char loadBit(const unsigned char & data, const int pos) noexcept
  83. {
  84. return (data>>pos)&1;
  85. }
  86.  
  87. // loads a bit from a size_t at a position
  88. inline size_t loadBit(const size_t & data, const int pos) noexcept
  89. {
  90. return (data>>pos)&1;
  91. }
  92.  
  93. // loads a bit from a size_t at a position
  94. inline uint32_t loadBit(const uint32_t & data, const int pos) noexcept
  95. {
  96. return (data>>pos)&1;
  97. }
  98.  
  99. // loads a bit from a size_t at a position
  100. inline uint16_t loadBit(const uint16_t & data, const int pos) noexcept
  101. {
  102. return (data>>pos)&1;
  103. }
  104.  
  105. // Node structure for Huffman Tree On a std::vector
  106. struct Node
  107. {
  108. size_t count;
  109. int self;
  110. int leaf1;
  111. int leaf2;
  112. unsigned char data;
  113. unsigned char isLeaf;
  114. };
  115.  
  116. // Huffman Tree that works on std::vector for more cached reads
  117.  
  118. class HuffmanTree
  119. {
  120.  
  121.  
  122. public:
  123. HuffmanTree(){
  124. for(int i=0;i<256;i++)
  125. {
  126. referenceMapDirect[i]=0;
  127. encodeDirectSize[i]=0;
  128. }
  129.  
  130. }
  131.  
  132. void add(std::string str)
  133. {
  134. const int sz = str.size();
  135.  
  136. const int sz8= sz-(sz%8);
  137.  
  138. size_t parallelAccumulator[4][256];
  139. for(int i=0;i<4;i++)
  140. for(int j=0;j<256;j++)
  141. parallelAccumulator[i][j]=0;
  142.  
  143. for(int i=0;i<sz8;i+=8)
  144. {
  145.  
  146. parallelAccumulator[0][(unsigned char)str[i]]++;
  147. parallelAccumulator[1][(unsigned char)str[i+1]]++;
  148. parallelAccumulator[2][(unsigned char)str[i+2]]++;
  149. parallelAccumulator[3][(unsigned char)str[i+3]]++;
  150. parallelAccumulator[0][(unsigned char)str[i+4]]++;
  151. parallelAccumulator[1][(unsigned char)str[i+5]]++;
  152. parallelAccumulator[2][(unsigned char)str[i+6]]++;
  153. parallelAccumulator[3][(unsigned char)str[i+7]]++;
  154. }
  155. for(int i=0;i<4;i++)
  156. for(int j=0;j<256;j++)
  157. referenceMapDirect[j]+=parallelAccumulator[i][j];
  158. for(int i=sz8;i<sz;i++)
  159. {
  160. referenceMapDirect[(unsigned char)str[i]]++;
  161. }
  162.  
  163. }
  164.  
  165. void add(unsigned char data)
  166. {
  167. referenceMapDirect[data]++;
  168. }
  169.  
  170. void generateTree(const unsigned char debug=false)
  171. {
  172. std::vector<Node> sortedNodes;
  173.  
  174.  
  175. int ctr=0;
  176. int singleCharDetected = 0;
  177. int detectedNodeIndex = -1;
  178. for(int i=0;i<256;i++)
  179. {
  180. size_t ct = referenceMapDirect[i];
  181. if(ct>0)
  182. {
  183. detectedNodeIndex=i;
  184. singleCharDetected++;
  185. Node node;
  186.  
  187. node.data=i;
  188. node.count=ct;
  189.  
  190. node.self=ctr;
  191. node.leaf1=-1;
  192. node.leaf2=-1;
  193. node.isLeaf=true;
  194. referenceVec.push_back(node);
  195. sortedNodes.push_back(node);
  196. ctr++;
  197.  
  198. }
  199. }
  200. if(singleCharDetected == 1)
  201. {
  202. Node node;
  203. referenceMapDirect[(detectedNodeIndex+1)&255]++;
  204. node.data=(detectedNodeIndex+1)&255;
  205. node.count=referenceMapDirect[(detectedNodeIndex+1)&255];
  206.  
  207. node.self=ctr;
  208. node.leaf1=-1;
  209. node.leaf2=-1;
  210. node.isLeaf=true;
  211. referenceVec.push_back(node);
  212. sortedNodes.push_back(node);
  213. ctr++;
  214. }
  215.  
  216. std::sort(sortedNodes.begin(), sortedNodes.end(),[](const Node & n1, const Node & n2){ return n1.count<n2.count;});
  217.  
  218. while(sortedNodes.size()>1)
  219. {
  220. Node node1 = sortedNodes[0];
  221. Node node2 = sortedNodes[1];
  222. Node newNode;
  223. newNode.count = node1.count + node2.count;
  224. newNode.data=0;
  225. newNode.leaf1 = node1.self;
  226. newNode.leaf2 = node2.self;
  227. newNode.self = ctr;
  228. newNode.isLeaf=false;
  229. sortedNodes.erase(sortedNodes.begin());
  230. sortedNodes.erase(sortedNodes.begin());
  231. sortedNodes.push_back(newNode);
  232.  
  233. referenceVec.push_back(newNode);
  234. std::sort(sortedNodes.begin(), sortedNodes.end(),[](const Node & n1, const Node & n2){ return n1.count<n2.count;});
  235. ctr++;
  236. }
  237.  
  238. root = sortedNodes[0];
  239.  
  240.  
  241. std::function<void(Node,std::vector<unsigned char>)> g = [&](Node node, std::vector<unsigned char> path){
  242. if(node.leaf1!=-1)
  243. {
  244. std::vector<unsigned char> path1 = path;
  245. path1.push_back(false);
  246. g(referenceVec[node.leaf1],path1);
  247. }
  248.  
  249. if(node.leaf2!=-1)
  250. {
  251. std::vector<unsigned char> path2 = path;
  252. path2.push_back(true);
  253. g(referenceVec[node.leaf2],path2);
  254. }
  255.  
  256. if((node.leaf1 == -1) && (node.leaf2 == -1))
  257. {
  258. encodeDirect[node.data]=path;
  259. encodeDirectSize[node.data]=path.size();
  260. }
  261.  
  262. };
  263.  
  264. std::vector<unsigned char> rootPath;
  265. g(root,rootPath);
  266.  
  267. if(debug)
  268. {
  269. std::cout<<"-------------------------------------"<<std::endl;
  270.  
  271. for(int i=0;i<256;i++)
  272. {
  273. if(encodeDirect[i].size()>0)
  274. {
  275. std::cout<<(unsigned char)i<<": ";
  276. for(const auto & f: encodeDirect[i])
  277. {
  278. std::cout<<(bool)f<<" ";
  279. }
  280. std::cout<<std::endl;
  281. }
  282. }
  283. std::cout<<"-------------------------------------"<<std::endl;
  284. }
  285. }
  286.  
  287. size_t getCount(unsigned char data)
  288. {
  289. return referenceMapDirect[data];
  290. }
  291.  
  292. inline
  293. const std::vector<unsigned char> & generateBits(unsigned char data) const noexcept
  294. {
  295. return encodeDirect[data];
  296. }
  297.  
  298. inline
  299. const int & generateBitsSize(unsigned char data) const noexcept
  300. {
  301. return encodeDirectSize[data];
  302. }
  303.  
  304.  
  305. inline
  306. const unsigned char followBitsDirect(const Node * __restrict__ const refVec, const unsigned char * __restrict__ const path, size_t & idx, const size_t & ofs) const
  307. {
  308. unsigned char result;
  309. const Node * curNode=&root;
  310. unsigned char work=true;
  311.  
  312. while(work)
  313. {
  314. int p = loadBit(path[(idx>>3)-ofs],idx&7);
  315. if(curNode->isLeaf)
  316. {
  317. result=curNode->data;
  318. work=false;
  319. }
  320. else
  321. {
  322. curNode = refVec+(curNode->leaf2*p + curNode->leaf1*(1-p));
  323. idx++;
  324. }
  325. }
  326. return result;
  327. }
  328.  
  329. const Node * getRefData() const { return referenceVec.data(); }
  330.  
  331.  
  332.  
  333. /*
  334.   * changes checkpoint vector of this object to be used for consume() later
  335.   * (same object must use consume on the produced data)
  336.   */
  337. std::vector<unsigned char> produce(std::string str, size_t& szTotal, const size_t checkResolutionPrm=256)
  338. {
  339. checkPointResolution = checkResolutionPrm;
  340. checkPoint.clear();
  341. const size_t checkResolutionM1 = checkPointResolution-1;
  342. szTotal=0;
  343. std::vector<unsigned char> result;
  344.  
  345. unsigned char data = 0;
  346. unsigned char needWrite = false;
  347. int pos = 0;
  348. const size_t szStr = str.size();
  349. checkPoint.reserve(szStr/checkPointResolution);
  350.  
  351. std::vector<unsigned char> vec;
  352. size_t sz=0;
  353.  
  354. {
  355.  
  356. for(size_t s = 0;s<szStr;s++ )
  357. {
  358. sz+=generateBitsSize(str[s]);
  359. }
  360. }
  361.  
  362. vec.reserve(sz);
  363. size_t szCur=0;
  364. {
  365.  
  366. const size_t szStr8 = szStr-szStr%8;
  367.  
  368. // todo: fix 256-bit long stream
  369.  
  370.  
  371. for(size_t s = 0;s<szStr;s++ )
  372. {
  373. const size_t & szTmp = generateBitsSize(str[s]);
  374. if((s&checkResolutionM1) == 0)
  375. {
  376. checkPoint.push_back(szCur);
  377. }
  378. const auto & vecTmp = generateBits(str[s]);
  379. for(size_t i=0;i<szTmp;i++)
  380. {
  381. vec[szCur+i]=vecTmp[i];
  382. }
  383. szCur+=szTmp;
  384. }
  385. }
  386.  
  387. szTotal+=sz;
  388. result.reserve(szTotal);
  389. const size_t sz8 = sz-sz%8;
  390. {
  391.  
  392. for(size_t i=0;i<sz8;i+=8)
  393. {
  394. unsigned char data = 0;
  395.  
  396. storeBit(data,vec[i], 0);
  397. storeBit(data,vec[i+1], 1);
  398. storeBit(data,vec[i+2], 2);
  399. storeBit(data,vec[i+3], 3);
  400. storeBit(data,vec[i+4], 4);
  401. storeBit(data,vec[i+5], 5);
  402. storeBit(data,vec[i+6], 6);
  403. storeBit(data,vec[i+7], 7);
  404.  
  405. result.push_back(data);
  406.  
  407. }
  408. }
  409.  
  410. {
  411.  
  412. for(size_t i=sz8;i<sz;i++)
  413. {
  414. storeBit(data,vec[i], pos++);
  415. needWrite=true;
  416. if(pos==8)
  417. {
  418. result.push_back(data);
  419. pos=0;
  420. data=0;
  421. needWrite=false;
  422.  
  423. }
  424. }
  425. }
  426.  
  427. if(needWrite)
  428. result.push_back(data);
  429.  
  430.  
  431. return result;
  432. }
  433.  
  434. std::string consume(std::vector<unsigned char> bits, size_t szTotal, const size_t& index=0, const size_t& numChars=0)
  435. {
  436. const Node * const nodePtr = getRefData();
  437. const size_t checkM1 = checkPointResolution-1;
  438. std::string resultStr;
  439. size_t idx = 0;
  440. size_t ofs = 0;
  441. size_t checkPointResolutionClosestLeft = checkPoint[index / checkPointResolution];
  442.  
  443.  
  444. const size_t szLast = szTotal + idx;
  445. const unsigned char * const ptr = bits.data();
  446. if(numChars == 0)
  447. {
  448. resultStr.reserve((checkPoint.size()+1)*checkPointResolution);
  449. while(idx<szLast)
  450. {
  451. resultStr += followBitsDirect(nodePtr, ptr, idx,ofs);
  452. }
  453. }
  454. else
  455. {
  456. resultStr.reserve(numChars);
  457. size_t charCtr = index%checkPointResolution;
  458. while(charCtr>0)
  459. {
  460. followBitsDirect(nodePtr, ptr, checkPointResolutionClosestLeft,ofs);
  461. charCtr--;
  462. }
  463. charCtr=0;
  464. while(charCtr<numChars)
  465. {
  466. resultStr += followBitsDirect(nodePtr,ptr, checkPointResolutionClosestLeft,ofs);
  467. charCtr++;
  468. }
  469. }
  470. return resultStr;
  471.  
  472. }
  473.  
  474. inline
  475. const std::vector<unsigned char> serializeReferenceMapDirect()
  476. {
  477. std::vector<unsigned char> result(256*sizeof(size_t));
  478. std::memcpy(result.data(),&referenceMapDirect[0],256*sizeof(size_t));
  479. return result;
  480. }
  481.  
  482. inline
  483. const std::vector<unsigned char> serializeEncodeDirectSize()
  484. {
  485. std::vector<unsigned char> result(256*sizeof(int));
  486. std::memcpy(result.data(),&encodeDirectSize[0],256*sizeof(int));
  487. return result;
  488. }
  489.  
  490. inline
  491. const std::vector<unsigned char> serializeEncodeDirect()
  492. {
  493. std::vector<unsigned char> result;
  494. size_t totSz = 0;
  495. for(int i=0;i<256;i++)
  496. {
  497. unsigned char sz = encodeDirect[i].size();
  498. totSz+=(sz+1);
  499. }
  500.  
  501. result.resize(totSz);
  502. size_t ctr=0;
  503. for(int i=0;i<256;i++)
  504. {
  505. unsigned char sz = encodeDirect[i].size();
  506. std::memcpy(result.data()+ctr,&sz,1);
  507. ctr++;
  508. if(sz>0)
  509. {
  510. std::memcpy(result.data()+ctr,encodeDirect[i].data(),sz);
  511. ctr+=sz;
  512. }
  513. }
  514. return result;
  515. }
  516.  
  517. inline
  518. const std::vector<unsigned char> serializeRoot()
  519. {
  520. std::vector<unsigned char> result(sizeof(Node));
  521. std::memcpy(result.data(),&root,sizeof(Node));
  522. return result;
  523. }
  524.  
  525. inline
  526. const std::vector<unsigned char> serializeReferenceVec()
  527. {
  528. std::vector<unsigned char> result(sizeof(size_t) + (referenceVec.size()*sizeof(Node)));
  529. size_t sz = referenceVec.size();
  530. std::memcpy(result.data(),&sz,sizeof(size_t));
  531. std::memcpy(result.data()+sizeof(size_t),referenceVec.data(),referenceVec.size()*sizeof(Node));
  532. return result;
  533. }
  534.  
  535. inline
  536. const std::vector<unsigned char> serializeCheckpoint()
  537. {
  538. size_t sz = checkPoint.size();
  539. std::vector<unsigned char> result(sizeof(size_t)*(sz+2));
  540.  
  541. std::memcpy(result.data(),&sz,sizeof(size_t));
  542. std::memcpy(result.data()+sizeof(size_t),&checkPointResolution,sizeof(size_t));
  543. std::memcpy(result.data()+(2*sizeof(size_t)),checkPoint.data(),sz*sizeof(size_t));
  544. return result;
  545. }
  546.  
  547. inline
  548. void deserializeReferenceMapDirect(const std::vector<unsigned char>& data, size_t& ofs)
  549. {
  550. std::memcpy(&referenceMapDirect[0],data.data()+ofs,256*sizeof(size_t));
  551. ofs+=256*sizeof(size_t);
  552. }
  553.  
  554. inline
  555. void deserializeEncodeDirectSize(const std::vector<unsigned char>& data, size_t& ofs)
  556. {
  557. std::memcpy(&encodeDirectSize[0],data.data()+ofs,256*sizeof(int));
  558. ofs+=256*sizeof(int);
  559. }
  560.  
  561. inline
  562. void deserializeEncodeDirect(const std::vector<unsigned char>& data, size_t& ofs)
  563. {
  564.  
  565. for(int i=0;i<256;i++)
  566. {
  567. unsigned char sz = data[ofs];
  568.  
  569. ofs++;
  570. encodeDirect[i].resize(sz);
  571. if(sz>0)
  572. {
  573. std::memcpy(encodeDirect[i].data(),data.data()+ofs,sz);
  574. ofs+=sz;
  575.  
  576. }
  577. }
  578. }
  579.  
  580. inline
  581. void deserializeRoot(const std::vector<unsigned char>& data, size_t& ofs)
  582. {
  583. std::memcpy(&root,data.data()+ofs,sizeof(Node));
  584. ofs+=sizeof(Node);
  585. }
  586.  
  587. inline
  588. void deserializeReferenceVec(const std::vector<unsigned char>& data, size_t& ofs)
  589. {
  590. size_t sz;
  591. std::memcpy(&sz,data.data()+ofs,sizeof(size_t));
  592. referenceVec.resize(sz);
  593. std::memcpy(&referenceVec[0],data.data()+ofs+sizeof(size_t),sz*sizeof(Node));
  594. ofs+=(sizeof(size_t) + (sz*sizeof(Node)));
  595. }
  596.  
  597. inline
  598. void deserializeCheckpoint(const std::vector<unsigned char>& data, size_t& ofs)
  599. {
  600. size_t sz;
  601. std::memcpy(&sz,data.data()+ofs,sizeof(size_t));
  602. ofs+=sizeof(size_t);
  603. checkPoint.resize(sz);
  604. std::memcpy(&checkPointResolution,data.data()+ofs,sizeof(size_t));
  605. ofs+=sizeof(size_t);
  606. std::memcpy(checkPoint.data(),data.data()+ofs+sizeof(size_t),sz*sizeof(size_t));
  607. ofs+=(sz*sizeof(size_t));
  608. }
  609.  
  610. inline void clearRef(){ referenceVec.clear();}
  611. ~HuffmanTree(){}
  612. private:
  613. size_t referenceMapDirect[256];
  614. int encodeDirectSize[256];
  615. std::vector<unsigned char> encodeDirect[256];
  616. Node root;
  617. std::vector<Node> referenceVec;
  618.  
  619. // check-point data for multiple-entrance support into compressed string
  620. // resolution=256 means every 256th character's offset is retained in a vector
  621. std::vector<size_t> checkPoint;
  622. size_t checkPointResolution;
  623. };
  624.  
  625.  
  626.  
  627.  
  628. /*
  629.   * DirectMappedCache.h
  630.   *
  631.   * Created on: Oct 8, 2021
  632.   * Author: tugrul
  633.   */
  634.  
  635. #ifndef DIRECTMAPPEDMULTITHREADCACHE_H_
  636. #define DIRECTMAPPEDMULTITHREADCACHE_H_
  637.  
  638.  
  639.  
  640.  
  641. /* Direct-mapped cache implementation with granular locking (per-tag)
  642.   * Only usable for integer type keys in range [0,maxPositive-1] (if key is int then "-1" not usable, if key is uint16_t then "65535" not usable)
  643.   * since locking protects only items/keys, also the user should make cache-miss functions thread-safe (i.e. adding a lock-guard)
  644.   * unless backing-store is thread-safe already (or has multi-thread support already)
  645.   * Intended to be used as LLC(last level cache) for CacheThreader instances
  646.   * to optimize contentions out in multithreaded read-only scenarios
  647.   * Can be used alone, as a read+write multi-threaded cache using getThreadSafe setThreadSafe methods but cache-hit ratio will not be good
  648.   * CacheKey: type of key (only integers: int, char, size_t)
  649.   * CacheValue: type of value that is bound to key (same as above)
  650.   * InternalKeyTypeInteger: type of tag found after modulo operationa (is important for maximum cache size. unsigned char = 255, unsigned int=1024*1024*1024*4)
  651.   */
  652. template< typename CacheKey, typename CacheValue, typename InternalKeyTypeInteger=size_t>
  653. class DirectMappedMultiThreadCache
  654. {
  655. public:
  656. // allocates buffers for numElements number of cache slots/lanes
  657. // readMiss: cache-miss for read operations. User needs to give this function
  658. // to let the cache automatically get data from backing-store
  659. // example: [&](MyClass key){ return redis.get(key); }
  660. // takes a CacheKey as key, returns CacheValue as value
  661. // writeMiss: cache-miss for write operations. User needs to give this function
  662. // to let the cache automatically set data to backing-store
  663. // example: [&](MyClass key, MyAnotherClass value){ redis.set(key,value); }
  664. // takes a CacheKey as key and CacheValue as value
  665. // numElements: has to be integer-power of 2 (e.g. 2,4,8,16,...)
  666. // prepareForMultithreading: by default (true) it allocates an array of structs each with its own mutex to evade false-sharing during getThreadSafe/setThreadSafe calls
  667. // with a given "false" value, it does not allocate mutex array and getThreadSafe/setThreadSafe methods become undefined behavior under multithreaded-use
  668. // true: allocates at least extra 64 bytes per cache tag
  669. DirectMappedMultiThreadCache(CacheKey numElements,
  670. const std::function<CacheValue(CacheKey)> & readMiss,
  671. const std::function<void(CacheKey,CacheValue)> & writeMiss,
  672. const bool prepareForMultithreading = true):size(numElements),sizeM1(numElements-1),loadData(readMiss),saveData(writeMiss)
  673. {
  674. if(prepareForMultithreading)
  675. mut = std::vector<MutexWithoutFalseSharing>(numElements);
  676. // initialize buffers
  677. for(size_t i=0;i<numElements;i++)
  678. {
  679. valueBuffer.push_back(CacheValue());
  680. isEditedBuffer.push_back(0);
  681. keyBuffer.push_back(CacheKey()-1);
  682. }
  683. }
  684.  
  685.  
  686.  
  687. // get element from cache
  688. // if cache doesn't find it in buffers,
  689. // then cache gets data from backing-store
  690. // then returns the result to user
  691. // then cache is available from RAM on next get/set access with same key
  692. inline
  693. const CacheValue get(const CacheKey & key) noexcept
  694. {
  695. return accessDirect(key,nullptr);
  696. }
  697.  
  698. // only syntactic difference
  699. inline
  700. const std::vector<CacheValue> getMultiple(const std::vector<CacheKey> & key) noexcept
  701. {
  702. const int n = key.size();
  703. std::vector<CacheValue> result(n);
  704.  
  705. for(int i=0;i<n;i++)
  706. {
  707. result[i]=accessDirect(key[i],nullptr);
  708. }
  709. return result;
  710. }
  711.  
  712.  
  713. // thread-safe but slower version of get()
  714. inline
  715. const CacheValue getThreadSafe(const CacheKey & key) noexcept
  716. {
  717. return accessDirectLocked(key,nullptr);
  718. }
  719.  
  720. // set element to cache
  721. // if cache doesn't find it in buffers,
  722. // then cache sets data on just cache
  723. // writing to backing-store only happens when
  724. // another access evicts the cache slot containing this key/value
  725. // or when cache is flushed by flush() method
  726. // then returns the given value back
  727. // then cache is available from RAM on next get/set access with same key
  728. inline
  729. void set(const CacheKey & key, const CacheValue & val) noexcept
  730. {
  731. accessDirect(key,&val,1);
  732. }
  733.  
  734. // thread-safe but slower version of set()
  735. inline
  736. void setThreadSafe(const CacheKey & key, const CacheValue & val) noexcept
  737. {
  738. accessDirectLocked(key,&val,1);
  739. }
  740.  
  741. // use this before closing the backing-store to store the latest bits of data
  742. void flush()
  743. {
  744. try
  745. {
  746. if(mut.size()>0)
  747. {
  748. for (size_t i=0;i<size;i++)
  749. {
  750. std::lock_guard<std::mutex> lg(mut[i].mut);
  751. if (isEditedBuffer[i] == 1)
  752. {
  753. isEditedBuffer[i]=0;
  754. auto oldKey = keyBuffer[i];
  755. auto oldValue = valueBuffer[i];
  756. saveData(oldKey,oldValue);
  757. }
  758. }
  759. }
  760. else
  761. {
  762. for (size_t i=0;i<size;i++)
  763. {
  764. if (isEditedBuffer[i] == 1)
  765. {
  766. isEditedBuffer[i]=0;
  767. auto oldKey = keyBuffer[i];
  768. auto oldValue = valueBuffer[i];
  769. saveData(oldKey,oldValue);
  770. }
  771. }
  772. }
  773.  
  774. }catch(std::exception &ex){ std::cout<<ex.what()<<std::endl; }
  775. }
  776.  
  777. // direct mapped cache element access, locked
  778. // opType=0: get
  779. // opType=1: set
  780. CacheValue const accessDirectLocked(const CacheKey & key,const CacheValue * value, const bool opType = 0)
  781. {
  782.  
  783. // find tag mapped to the key
  784. CacheKey tag = key & sizeM1;
  785. std::lock_guard<std::mutex> lg(mut[tag].mut); // N parallel locks in-flight = less contention in multi-threading
  786.  
  787. // compare keys
  788. if(keyBuffer[tag] == key)
  789. {
  790. // cache-hit
  791.  
  792. // "set"
  793. if(opType == 1)
  794. {
  795. isEditedBuffer[tag]=1;
  796. valueBuffer[tag]=*value;
  797. }
  798.  
  799. // cache hit value
  800. return valueBuffer[tag];
  801. }
  802. else // cache-miss
  803. {
  804. CacheValue oldValue = valueBuffer[tag];
  805. CacheKey oldKey = keyBuffer[tag];
  806.  
  807. // eviction algorithm start
  808. if(isEditedBuffer[tag] == 1)
  809. {
  810. // if it is "get"
  811. if(opType==0)
  812. {
  813. isEditedBuffer[tag]=0;
  814. }
  815.  
  816. saveData(oldKey,oldValue);
  817.  
  818. // "get"
  819. if(opType==0)
  820. {
  821. const CacheValue && loadedData = loadData(key);
  822. valueBuffer[tag]=loadedData;
  823. keyBuffer[tag]=key;
  824. return loadedData;
  825. }
  826. else /* "set" */
  827. {
  828. valueBuffer[tag]=*value;
  829. keyBuffer[tag]=key;
  830. return *value;
  831. }
  832. }
  833. else // not edited
  834. {
  835. // "set"
  836. if(opType == 1)
  837. {
  838. isEditedBuffer[tag]=1;
  839. }
  840.  
  841. // "get"
  842. if(opType == 0)
  843. {
  844. const CacheValue && loadedData = loadData(key);
  845. valueBuffer[tag]=loadedData;
  846. keyBuffer[tag]=key;
  847. return loadedData;
  848. }
  849. else // "set"
  850. {
  851. valueBuffer[tag]=*value;
  852. keyBuffer[tag]=key;
  853. return *value;
  854. }
  855. }
  856.  
  857. }
  858. }
  859.  
  860. // direct mapped cache element access
  861. // opType 0 = get
  862. // opType 1 = set
  863. CacheValue const accessDirect(const CacheKey & key,const CacheValue * value, const bool opType = 0)
  864. {
  865.  
  866. // find tag mapped to the key
  867. CacheKey tag = key & sizeM1;
  868.  
  869. // compare keys
  870. if(keyBuffer[tag] == key)
  871. {
  872. // cache-hit
  873.  
  874. // "set"
  875. if(opType == 1)
  876. {
  877. isEditedBuffer[tag]=1;
  878. valueBuffer[tag]=*value;
  879. }
  880.  
  881. // cache hit value
  882. return valueBuffer[tag];
  883. }
  884. else // cache-miss
  885. {
  886. CacheValue oldValue = valueBuffer[tag];
  887. CacheKey oldKey = keyBuffer[tag];
  888.  
  889. // eviction algorithm start
  890. if(isEditedBuffer[tag] == 1)
  891. {
  892. // if it is "get"
  893. if(opType==0)
  894. {
  895. isEditedBuffer[tag]=0;
  896. }
  897.  
  898. saveData(oldKey,oldValue);
  899.  
  900. // "get"
  901. if(opType==0)
  902. {
  903. const CacheValue && loadedData = loadData(key);
  904. valueBuffer[tag]=loadedData;
  905. keyBuffer[tag]=key;
  906. return loadedData;
  907. }
  908. else /* "set" */
  909. {
  910. valueBuffer[tag]=*value;
  911. keyBuffer[tag]=key;
  912. return *value;
  913. }
  914. }
  915. else // not edited
  916. {
  917. // "set"
  918. if(opType == 1)
  919. {
  920. isEditedBuffer[tag]=1;
  921. }
  922.  
  923. // "get"
  924. if(opType == 0)
  925. {
  926. const CacheValue && loadedData = loadData(key);
  927. valueBuffer[tag]=loadedData;
  928. keyBuffer[tag]=key;
  929. return loadedData;
  930. }
  931. else // "set"
  932. {
  933. valueBuffer[tag]=*value;
  934. keyBuffer[tag]=key;
  935. return *value;
  936. }
  937. }
  938.  
  939. }
  940. }
  941.  
  942.  
  943. private:
  944. struct MutexWithoutFalseSharing
  945. {
  946. std::mutex mut;
  947. char padding[64-sizeof(std::mutex) <= 0 ? 4:64-sizeof(std::mutex)];
  948. };
  949. const CacheKey size;
  950. const CacheKey sizeM1;
  951. std::vector<MutexWithoutFalseSharing> mut;
  952.  
  953. std::vector<CacheValue> valueBuffer;
  954. std::vector<unsigned char> isEditedBuffer;
  955. std::vector<CacheKey> keyBuffer;
  956. const std::function<CacheValue(CacheKey)> loadData;
  957. const std::function<void(CacheKey,CacheValue)> saveData;
  958.  
  959. };
  960.  
  961.  
  962. #endif /* DIRECTMAPPEDMULTITHREADCACHE_H_ */
  963.  
  964.  
  965. class HuffmanFields
  966. {
  967. public:
  968. HuffmanTree tree;
  969. unsigned char useMultiThread;
  970. size_t bitLength;
  971. std::vector<unsigned char> bits; // compacted bits
  972.  
  973. inline
  974. const std::vector<unsigned char> serializeBitLength()
  975. {
  976. std::vector<unsigned char> result(sizeof(size_t));
  977. std::memcpy(result.data(),&bitLength,sizeof(size_t));
  978. return result;
  979. }
  980.  
  981. inline
  982. const std::vector<unsigned char> serializeBits()
  983. {
  984. std::vector<unsigned char> result(bits.size()+sizeof(size_t));
  985. size_t sz=bits.size();
  986.  
  987. std::memcpy(result.data(),&sz,sizeof(size_t));
  988. std::memcpy(result.data()+sizeof(size_t),bits.data(),bits.size());
  989. return result;
  990. }
  991.  
  992. inline
  993. void deserializeBitLength(const std::vector<unsigned char>& data, size_t& ofs)
  994. {
  995. std::vector<unsigned char> result(sizeof(size_t));
  996. std::memcpy(&bitLength,data.data()+ofs,sizeof(size_t));
  997. ofs+=sizeof(size_t);
  998. }
  999.  
  1000. /*
  1001.   * output is packed binary
  1002.   */
  1003. inline
  1004. const void deserializeBits(const std::vector<unsigned char>& data, size_t& ofs)
  1005. {
  1006. size_t sz;
  1007. std::memcpy(&sz,data.data()+ofs,sizeof(size_t));
  1008.  
  1009. bits.resize(sz);
  1010. std::memcpy(bits.data(),data.data()+ofs+sizeof(size_t),sz);
  1011. ofs+=(sizeof(size_t)+sz);
  1012. }
  1013.  
  1014. /*
  1015.   * from packed binary
  1016.   */
  1017. inline
  1018. const std::vector<unsigned char> serialize()
  1019. {
  1020. std::vector<unsigned char> result;
  1021. auto v1 = tree.serializeEncodeDirect();
  1022. auto v2 = tree.serializeEncodeDirectSize();
  1023. auto v3 = tree.serializeReferenceMapDirect();
  1024. auto v4 = tree.serializeRoot();
  1025. auto v5 = tree.serializeReferenceVec();
  1026. auto v6 = tree.serializeCheckpoint();
  1027. auto v7 = serializeBitLength();
  1028. auto v8 = serializeBits();
  1029. std::copy(v1.begin(),v1.end(),std::back_inserter(result));
  1030. std::copy(v2.begin(),v2.end(),std::back_inserter(result));
  1031. std::copy(v3.begin(),v3.end(),std::back_inserter(result));
  1032. std::copy(v4.begin(),v4.end(),std::back_inserter(result));
  1033. std::copy(v5.begin(),v5.end(),std::back_inserter(result));
  1034. std::copy(v6.begin(),v6.end(),std::back_inserter(result));
  1035. std::copy(v7.begin(),v7.end(),std::back_inserter(result));
  1036. std::copy(v8.begin(),v8.end(),std::back_inserter(result));
  1037. return result;
  1038. }
  1039.  
  1040. inline
  1041. void deserialize(const std::vector<unsigned char>& data)
  1042. {
  1043. size_t ofs=0;
  1044.  
  1045. tree.deserializeEncodeDirect(data,ofs);
  1046. tree.deserializeEncodeDirectSize(data,ofs);
  1047. tree.deserializeReferenceMapDirect(data,ofs);
  1048. tree.deserializeRoot(data,ofs);
  1049. tree.deserializeReferenceVec(data,ofs);
  1050. tree.deserializeCheckpoint(data,ofs);
  1051. deserializeBitLength(data,ofs);
  1052. deserializeBits(data,ofs);
  1053. }
  1054. };
  1055.  
  1056. /*
  1057.   * Huffman-Encoded string wrapper for fast+compressed access to text
  1058.   * subString() calls are cached (LRU approximation + direct mapped)
  1059.   * decoding has parallelism support (including subString())
  1060.   * encoding
  1061.   */
  1062. class HuffmanString
  1063. {
  1064. public:
  1065.  
  1066. /*
  1067.   * str: initializer string data
  1068.   * useMultiThreading: optional decoding speedup by multiple threads for long strings
  1069.   * cacheSizePrm: direct-mapped cache size for increasing reduntant indexed access performance
  1070.   * checkPointResolutionPrm: resolution of entrance points within compressed bits to optimize subString decoding performance (and multi-threaded decoding performance)
  1071.   * checkPointResolutionPrm=256 means there is an entrance for every 256th character in string
  1072.   * (also means worst-case of O(256) complexity to extract a character)
  1073.   */
  1074. HuffmanString( const std::string & str=std::string(""),
  1075. const bool useMultiThreading=false,
  1076. const size_t cacheSizePrm=0,
  1077. const size_t checkPointResolutionPrm=256):fields(std::make_shared<HuffmanFields>())
  1078. {
  1079. cacheSize = std::make_shared<size_t>(cacheSizePrm);
  1080. size_t pickSizeCheck = 1;
  1081. while(pickSizeCheck < checkPointResolutionPrm)
  1082. {
  1083. pickSizeCheck *= 2;
  1084. }
  1085. checkPointResolution = std::make_shared<size_t>(pickSizeCheck);
  1086. if(cacheSize>0)
  1087. {
  1088. size_t pickSize = 1;
  1089. while(pickSize < *cacheSize)
  1090. {
  1091. pickSize *= 2;
  1092. }
  1093. *cacheSize = pickSize;
  1094. cacheL1 = std::make_shared<DirectMappedMultiThreadCache<size_t,unsigned char>>(
  1095. pickSize,
  1096. [&,this](size_t index)
  1097. {
  1098. return this->fields->tree.consume(this->fields->bits,this->fields->bitLength,index,1)[0];
  1099. },
  1100. [&](size_t index, unsigned char val){ } /* string is immutable, so no cache-write */
  1101. );
  1102. }
  1103. else
  1104. {
  1105. *cacheSize=0;
  1106. }
  1107.  
  1108. if(str.size()>0)
  1109. {
  1110. fields->useMultiThread = useMultiThreading;
  1111. fields->tree.add(str);
  1112. fields->tree.generateTree();
  1113. size_t nano;
  1114. {
  1115. Bench bench(&nano);
  1116. fields->bits = fields->tree.produce(str,fields->bitLength, *checkPointResolution);
  1117. }
  1118. //std::cout<<"produce:"<<nano<<"ns"<<std::endl;
  1119. //fields->tree.clearRef();
  1120. }
  1121. else
  1122. {
  1123. fields->bitLength=0;
  1124. }
  1125. }
  1126.  
  1127. HuffmanString operator +=(const std::string & str)
  1128. {
  1129. HuffmanString result(this->string() + str);
  1130. *this=result;
  1131. return *this;
  1132. }
  1133.  
  1134.  
  1135. HuffmanString operator +=(HuffmanString & str)
  1136. {
  1137. HuffmanString result(this->string() + str.string());
  1138. *this=result;
  1139. return *this;
  1140. }
  1141.  
  1142. // get full string without caching
  1143. const std::string string()
  1144. {
  1145. return fields->tree.consume(fields->bits,fields->bitLength);
  1146. }
  1147.  
  1148. // get cached/uncached substring (constructor must take cacheSize>0 to work with cache)
  1149. const std::string subString(size_t offset, size_t length, const bool cachedRead = false)
  1150. {
  1151. std::string result;
  1152.  
  1153. result.reserve(length);
  1154. const size_t last = offset+length;
  1155. if(cachedRead)
  1156. {
  1157. for(size_t i=offset;i<last;i++)
  1158. {
  1159.  
  1160. result += cacheL1->get(i);
  1161. }
  1162. }
  1163. else
  1164. {
  1165. result += fields->tree.consume(fields->bits,fields->bitLength,offset,length);
  1166. }
  1167. return result;
  1168. }
  1169.  
  1170. // get char
  1171. const unsigned char operator[](size_t index)
  1172. {
  1173. if(*cacheSize>0)
  1174. return cacheL1->get(index);
  1175. else
  1176. return fields->tree.consume(fields->bits,fields->bitLength,index,1)[0];
  1177. }
  1178.  
  1179. // find position of character
  1180. const size_t find(const unsigned char character, const bool cachedSearch=false)
  1181. {
  1182. size_t result = -1;
  1183. return result;
  1184. }
  1185.  
  1186.  
  1187. /*
  1188.   * Serialize internal data into byte vector to save into a file to be loaded by deserializeFrom() later
  1189.   */
  1190. const std::vector<unsigned char> serialize()
  1191. {
  1192. return fields->serialize();
  1193. }
  1194.  
  1195. /*
  1196.   * To retain a HuffmanString instance from a serialized data
  1197.   */
  1198. void deserializeFrom(const std::vector<unsigned char>& data)
  1199. {
  1200. fields->deserialize(data);
  1201. }
  1202.  
  1203. size_t compressedBytes()
  1204. {
  1205. return fields->bits.size();
  1206. }
  1207.  
  1208. ~HuffmanString(){}
  1209. private:
  1210. std::shared_ptr<HuffmanFields> fields;
  1211. std::shared_ptr<DirectMappedMultiThreadCache<size_t,unsigned char>> cacheL1;
  1212. std::shared_ptr<size_t> cacheSize;
  1213. std::shared_ptr<size_t> checkPointResolution;
  1214. };
  1215.  
  1216.  
  1217. template<typename PrefixType=size_t>
  1218. class PredictorFields
  1219. {
  1220. public:
  1221.  
  1222.  
  1223. const std::string decompress(size_t offset=0,size_t length=0,bool cachedRead=false)
  1224. {
  1225. std::string result;
  1226. const int n = 256;
  1227. unsigned char dict[(n*n)/8]; // todo: 8bit->1byte compression
  1228. int predict[n];
  1229. std::memset(&dict[0],0,(n*n)/8);
  1230. std::memset(&predict[0],-1,n*sizeof(int));
  1231.  
  1232. const size_t sz = prefix.size();
  1233. constexpr int szTileIn = 64;
  1234.  
  1235. if(hstr)
  1236. {
  1237. compressed.resize(szStr);
  1238. const std::string& hs = hstr->string();
  1239. std::copy(hs.begin(),hs.end(),compressed.begin());
  1240.  
  1241. }
  1242.  
  1243. alignas(64)
  1244. unsigned char in[szTileIn];
  1245.  
  1246. unsigned char last = 0;
  1247.  
  1248. int inCtr = 0;
  1249. size_t ctr=0;
  1250. for(size_t i=0;i<sz;i++)
  1251. {
  1252. PrefixType mask = prefix[i];
  1253.  
  1254.  
  1255. int numWork = sizeof(PrefixType)*8 - 2 /* 8 bits x 8 bytes, saving 2 bits for control logic */;
  1256.  
  1257. // if no mixed work happened
  1258.  
  1259. if((mask&1) == 0)
  1260. {
  1261. numWork = mask >> 2;
  1262.  
  1263. // if all prediction happened
  1264. if(((mask>>1)&1) == 1)
  1265. {
  1266. for(int j=0;j<numWork;j++)
  1267. {
  1268. unsigned char cur = predict[last];
  1269. //if(result.size()<szStr)
  1270. in[inCtr++]=cur;
  1271. if(inCtr == szTileIn)
  1272. {
  1273.  
  1274. result.append(in,in+szTileIn);
  1275. inCtr=0;
  1276. }
  1277. last = cur;
  1278. }
  1279. }
  1280. else // all stream happened
  1281. {
  1282. for(int j=0;j<numWork;j++)
  1283. {
  1284. unsigned char cur = compressed[ctr++];
  1285. //if(result.size()<szStr)
  1286. in[inCtr++]=cur;
  1287. if(inCtr == szTileIn)
  1288. {
  1289.  
  1290. result.append(in,in+szTileIn);
  1291. inCtr=0;
  1292. }
  1293. if(predict[last]!=-1)
  1294. {
  1295. storeBit(dict[(last*(unsigned int)256+predict[last])>>3],0,(last*(int)256+predict[last])&7);
  1296. }
  1297.  
  1298. predict[last]=cur;
  1299. last = cur;
  1300. }
  1301. }
  1302.  
  1303. }
  1304. else
  1305. {
  1306. mask = mask>>2;
  1307. for(int j=0;j<numWork;j++)
  1308. {
  1309. // if predicted
  1310. if(loadBit(mask,j))
  1311. {
  1312. unsigned char cur = predict[last];
  1313. //if(result.size()<szStr)
  1314. in[inCtr++]=cur;
  1315. if(inCtr == szTileIn)
  1316. {
  1317.  
  1318. result.append(in,in+szTileIn);
  1319. inCtr=0;
  1320. }
  1321. last = cur;
  1322. }
  1323. else
  1324. {
  1325. unsigned char cur = compressed[ctr++];
  1326. //if(result.size()<szStr)
  1327. in[inCtr++]=cur;
  1328. if(inCtr == szTileIn)
  1329. {
  1330.  
  1331. result.append(in,in+szTileIn);
  1332. inCtr=0;
  1333. }
  1334. if(predict[last]!=-1)
  1335. {
  1336. storeBit(dict[(last*(unsigned int)256+predict[last])>>3],0,(last*(int)256+predict[last])&7);
  1337. }
  1338.  
  1339. predict[last]=cur;
  1340. last = cur;
  1341. }
  1342. }
  1343. }
  1344. }
  1345.  
  1346. if(inCtr>0)
  1347. {
  1348. result.append(in,in+szTileIn);
  1349. }
  1350.  
  1351. if(hstr)
  1352. {
  1353. compressed = std::vector<unsigned char>();
  1354. }
  1355.  
  1356. return result.substr(0,szStr);
  1357. }
  1358.  
  1359. const std::string bits()
  1360. {
  1361. if(hstr)
  1362. {
  1363. std::vector<unsigned char> ser = hstr->serialize();
  1364.  
  1365. return std::string(ser.begin(),ser.end());
  1366. }
  1367. else
  1368. {
  1369. return std::string((const char * )compressed.data(),compressed.size());
  1370. }
  1371. }
  1372.  
  1373.  
  1374. const std::string bits2()
  1375. {
  1376. return std::string((const char * )prefix.data(),prefix.size());
  1377. }
  1378.  
  1379.  
  1380.  
  1381. void compress(const std::string & str=std::string(""))
  1382. {
  1383. const int n = 256;
  1384.  
  1385. unsigned char dict[(n*n)/8]; // todo: 8bit->1byte compression
  1386. int predict[n];
  1387. std::memset(&dict[0],0,(n*n)/8);
  1388. std::memset(&predict[0],-1,n*sizeof(int));
  1389.  
  1390. const size_t sz = str.size();
  1391. szStr = sz;
  1392. unsigned char last = 0;
  1393.  
  1394.  
  1395. PrefixType mask = 0;
  1396. int pos = 0;
  1397.  
  1398.  
  1399. constexpr int szTileTmp = 64;
  1400. constexpr int szTileOut = 64;
  1401.  
  1402. alignas(64)
  1403. unsigned char tmp[szTileTmp];
  1404.  
  1405. alignas(64)
  1406. unsigned char out[szTileOut];
  1407.  
  1408. int outCtr = 0;
  1409.  
  1410. bool hasPrediction = false;
  1411. bool hasStream = false;
  1412. PrefixType countStream = 0;
  1413. PrefixType countPrediction = 0;
  1414.  
  1415.  
  1416.  
  1417. for(size_t i=0;i<sz;i+=szTileTmp)
  1418. {
  1419. for(int j=0;j<szTileTmp;j++)
  1420. {
  1421. tmp[j]=str[i+j];
  1422. }
  1423.  
  1424. for(int j=0;j<szTileTmp;j++)
  1425. {
  1426. const unsigned char cur = tmp[j];
  1427. // if predicted, write 1 to bit
  1428. if(loadBit(dict[(last*(unsigned int)256+cur)>>3],(last*(unsigned int)256+cur)&7))
  1429. {
  1430. storeBit(mask,1,pos++);
  1431. hasPrediction=true;
  1432. countPrediction++;
  1433. }
  1434. else
  1435. {
  1436. storeBit(mask,0,pos++);
  1437. hasStream=true;
  1438. countStream++;
  1439. //compressed.push_back(cur);
  1440. out[outCtr++]=cur;
  1441. if(outCtr==szTileOut)
  1442. {
  1443. compressed.insert(compressed.end(),out,out+szTileOut);
  1444. outCtr=0;
  1445. }
  1446.  
  1447. if(predict[last]!=-1)
  1448. {
  1449. storeBit(dict[(last*(unsigned int)256+predict[last])>>3],0,(last*(unsigned int)256+predict[last])&7);
  1450. }
  1451.  
  1452. predict[last]=cur;
  1453. storeBit(dict[(last*(unsigned int)256+cur)>>3],1,(last*(unsigned int)256+cur)&7);
  1454. }
  1455.  
  1456.  
  1457. last = cur;
  1458.  
  1459.  
  1460. if(pos==(sizeof(PrefixType)*8 - 2)) // -2: saving 2 bits for control logic
  1461. {
  1462.  
  1463. if(hasPrediction && !hasStream)
  1464. {
  1465. // only prediction happened, save only number of predictions
  1466. mask=(countPrediction<<2)|2; // lsb+1 = 1 means all predictions
  1467. countPrediction=0;
  1468. }
  1469. else if(!hasPrediction && hasStream)
  1470. {
  1471. // only stream happened, save only number of streams
  1472. mask=(countStream<<2); // lsb+1 = 0 means all streams
  1473. countStream=0;
  1474. }
  1475. else
  1476. {
  1477.  
  1478. // no possibility to have nothing
  1479. // both prediction & stream happened
  1480. // save normally
  1481. countStream=0;
  1482. countPrediction=0;
  1483. mask = (mask<<2)|1; // lsb=1 means mixed work
  1484. }
  1485.  
  1486. prefix.push_back(mask);
  1487. pos=0;
  1488. }
  1489.  
  1490.  
  1491. }
  1492.  
  1493. }
  1494.  
  1495. if(outCtr>0)
  1496. {
  1497. compressed.insert(compressed.end(),out,out+szTileOut);
  1498. }
  1499.  
  1500. if(pos>0)
  1501. {
  1502.  
  1503. if(hasPrediction && !hasStream)
  1504. {
  1505. // only prediction happened, save only number of predictions
  1506. mask=(countPrediction<<2)|2; // lsb+1 = 1 means all predictions
  1507. countPrediction=0;
  1508. }
  1509. else if(!hasPrediction && hasStream)
  1510. {
  1511. // only stream happened, save only number of streams
  1512. mask=(countStream<<2); // lsb+1 = 0 means all streams
  1513. countStream=0;
  1514. }
  1515. else
  1516. {
  1517.  
  1518. // no possibility to have nothing
  1519. // both prediction & stream happened
  1520. // save normally
  1521. countStream=0;
  1522. countPrediction=0;
  1523. mask = (mask<<2)|1; // lsb=1 means mixed work
  1524. }
  1525.  
  1526. prefix.push_back(mask);
  1527. pos=0;
  1528.  
  1529. }
  1530. prefix.shrink_to_fit();
  1531. compressed.shrink_to_fit();
  1532.  
  1533. // if optimized with Huffman Encoding
  1534. if(hstr)
  1535. {
  1536. std::string hs;
  1537. hs.resize(compressed.size());
  1538. std::copy(compressed.begin(),compressed.end(),hs.begin());
  1539. *hstr = HuffmanString(hs);
  1540.  
  1541. // release unused internal data
  1542. compressed = std::vector<unsigned char>();
  1543. }
  1544. }
  1545. std::shared_ptr<HuffmanString> hstr;
  1546. private:
  1547. std::vector<PrefixType> prefix;
  1548. std::vector<unsigned char> compressed;
  1549. size_t szStr;
  1550.  
  1551. };
  1552.  
  1553. template<typename PrefixType=size_t>
  1554. class PredictorString
  1555. {
  1556. public:
  1557.  
  1558. enum {
  1559. // only dictionary-based "prediction" of char-to-char transitions to compress data
  1560. OPTIMIZE_NONE,
  1561.  
  1562. // compressed string's character-literal buffer is further compressed by Huffman Encoding
  1563. OPTIMIZE_WITH_HUFFMAN_ENCODING
  1564. };
  1565.  
  1566. /*
  1567.   * str: string to be saved as compressed in internal data
  1568.   * cacheSize: size of direct-mapped cache for accelerating indexed [] access of characers
  1569.   * optimization_level
  1570.   */
  1571. PredictorString(const std::string & str=std::string(""),
  1572. const size_t cacheSize=256,
  1573. const int optimization_level=OPTIMIZE_NONE):fields(std::make_shared<PredictorFields<PrefixType>>())
  1574. {
  1575. if(optimization_level==OPTIMIZE_NONE)
  1576. {
  1577. fields->hstr = nullptr;
  1578. }
  1579. else if(optimization_level==OPTIMIZE_WITH_HUFFMAN_ENCODING)
  1580. {
  1581. fields->hstr = std::make_shared<HuffmanString>();
  1582. }
  1583.  
  1584. fields->compress(str);
  1585. if(cacheSize>0)
  1586. {
  1587. size_t pickSize = 1;
  1588. while(pickSize < cacheSize)
  1589. {
  1590. pickSize *= 2;
  1591. }
  1592.  
  1593. cacheL1 = std::make_shared<DirectMappedMultiThreadCache<size_t,unsigned char>>(
  1594. pickSize,
  1595. [&,this](size_t index)
  1596. {
  1597. return this->string()[index];
  1598. },
  1599. [&](size_t index, unsigned char val){ } /* string is immutable, so no cache-write */
  1600. );
  1601. }
  1602. else
  1603. {
  1604. cacheL1=nullptr;
  1605. }
  1606. }
  1607.  
  1608. // gets compressed character stream without prefixes
  1609. const std::string bits()
  1610. {
  1611. return fields->bits();
  1612. }
  1613.  
  1614. // gets prefix stream for decompression
  1615. const std::string bits2()
  1616. {
  1617. return fields->bits2();
  1618. }
  1619.  
  1620. const std::string string()
  1621. {
  1622. return fields->decompress();
  1623. }
  1624.  
  1625.  
  1626. // do not use this to add small strings. inefficient
  1627. PredictorString operator +=(const std::string & str)
  1628. {
  1629. PredictorString result(this->string() + str);
  1630. *this=result;
  1631. return *this;
  1632. }
  1633.  
  1634. // do not use this to add small strings. inefficient
  1635. PredictorString operator +=(PredictorString & str)
  1636. {
  1637. PredictorString result(this->string() + str.string());
  1638. *this=result;
  1639. return *this;
  1640. }
  1641.  
  1642.  
  1643. // todo: implement cached load & multithreading
  1644. const std::string subString(size_t offset, size_t length, const bool cachedRead = true)
  1645. {
  1646. std::string result=this->fields->decompress(offset,length,cachedRead).substr(offset,length);
  1647. return result;
  1648. }
  1649.  
  1650. // todo: implement multithreading
  1651. const unsigned char operator[](size_t index)
  1652. {
  1653. if(cacheL1)
  1654. return cacheL1->get(index);
  1655. else
  1656. return this->fields->decompress(index,1,true)[index];
  1657. }
  1658.  
  1659. // todo: implement multithreaded find
  1660. // returns size_t max if not found
  1661. const size_t find(const unsigned char character, const bool cachedSearch=false)
  1662. {
  1663. const std::string v = this->fields->decompress(0,0,true);
  1664. auto test = v.find(character);
  1665. if(test == v.npos)
  1666. return (size_t)-1;
  1667. else
  1668. return test;
  1669. }
  1670.  
  1671.  
  1672. private:
  1673. std::shared_ptr<PredictorFields<PrefixType>> fields;
  1674. std::shared_ptr<DirectMappedMultiThreadCache<size_t,unsigned char>> cacheL1;
  1675. };
  1676. }
  1677.  
  1678. class RunLengthEncoderString
  1679. {
  1680. public:
  1681. RunLengthEncoderString(std::string str=std::string(""))
  1682. {
  1683. const size_t sz = str.size();
  1684.  
  1685.  
  1686. for(size_t i=0;i<sz-2;i++)
  1687. {
  1688. if(sub.find(str.substr(i,2)) == sub.end())
  1689. {
  1690. sub[str.substr(i,2)] = 0;
  1691. }
  1692. else
  1693. {
  1694. sub[str.substr(i,2)]++;
  1695. }
  1696. }
  1697. }
  1698.  
  1699. std::vector<unsigned char> string()
  1700. {
  1701. std::map<std::string,size_t> tmp;
  1702. size_t ct=0;
  1703. for(auto & it:sub)
  1704. {
  1705.  
  1706.  
  1707. if(it.second>=2)
  1708. {
  1709. tmp[it.first]=it.second;
  1710. std::cout<<it.first<<":"<<it.second<<std::endl;
  1711. ct++;
  1712. }
  1713. }
  1714. std::cout<<"="<<ct<<std::endl;
  1715. return rle;
  1716. }
  1717. private:
  1718. std::vector<unsigned char> rle;
  1719. std::map<std::string,size_t> sub;
  1720. };
  1721.  
  1722.  
  1723.  
  1724.  
  1725. #endif /* COMPRESSSTRINGLIB_H_ */
  1726.  
  1727.  
  1728. int main()
  1729. {
  1730.  
  1731. std::string s="a";
  1732. for(int i=0;i<20;i++)
  1733. s += s;
  1734.  
  1735. size_t nano,nano2;
  1736. CompressedStringLib::PredictorString<size_t> str;
  1737. for(int i=0;i<100;i++)
  1738. {
  1739. {
  1740. CompressedStringLib::Bench bench(&nano);
  1741. str=CompressedStringLib::PredictorString<size_t> (s);
  1742. }
  1743. {
  1744. CompressedStringLib::Bench bench2(&nano2);
  1745. auto & v = str.string();
  1746. }
  1747. std::cout<<s.size()/(double)nano<<"GB/s"<<std::endl;
  1748. std::cout<<s.size()/(double)nano2<<"GB/s"<<std::endl;
  1749. std::cout<<"--------"<<std::endl;
  1750. }
  1751.  
  1752. using namespace std;
  1753. char CPUBrandString[0x40];
  1754. unsigned int CPUInfo[4] = {0,0,0,0};
  1755.  
  1756. __cpuid(0x80000000, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
  1757. unsigned int nExIds = CPUInfo[0];
  1758.  
  1759. memset(CPUBrandString, 0, sizeof(CPUBrandString));
  1760.  
  1761. for (unsigned int i = 0x80000000; i <= nExIds; ++i)
  1762. {
  1763. __cpuid(i, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
  1764.  
  1765. if (i == 0x80000002)
  1766. memcpy(CPUBrandString, CPUInfo, sizeof(CPUInfo));
  1767. else if (i == 0x80000003)
  1768. memcpy(CPUBrandString + 16, CPUInfo, sizeof(CPUInfo));
  1769. else if (i == 0x80000004)
  1770. memcpy(CPUBrandString + 32, CPUInfo, sizeof(CPUInfo));
  1771. }
  1772.  
  1773. cout << "CPU Type: " << CPUBrandString << endl;
  1774. return 0;
  1775. }
  1776.  
Success #stdin #stdout 0.8s 7968KB
stdin
Standard input is empty
stdout
0.276594GB/s
0.234391GB/s
--------
0.283145GB/s
0.260878GB/s
--------
0.271857GB/s
0.246849GB/s
--------
0.279702GB/s
0.240445GB/s
--------
0.19671GB/s
0.24637GB/s
--------
0.277969GB/s
0.262129GB/s
--------
0.276237GB/s
0.252144GB/s
--------
0.280172GB/s
0.26371GB/s
--------
0.277675GB/s
0.254182GB/s
--------
0.28039GB/s
0.246155GB/s
--------
0.268787GB/s
0.239719GB/s
--------
0.274194GB/s
0.246859GB/s
--------
0.270674GB/s
0.240893GB/s
--------
0.279003GB/s
0.260461GB/s
--------
0.27614GB/s
0.252047GB/s
--------
0.280074GB/s
0.263157GB/s
--------
0.273042GB/s
0.249836GB/s
--------
0.280634GB/s
0.264501GB/s
--------
0.2786GB/s
0.254132GB/s
--------
0.281703GB/s
0.263807GB/s
--------
0.278527GB/s
0.254824GB/s
--------
0.281451GB/s
0.254877GB/s
--------
0.272134GB/s
0.242194GB/s
--------
0.277505GB/s
0.2586GB/s
--------
0.270515GB/s
0.239984GB/s
--------
0.276923GB/s
0.247921GB/s
--------
0.272256GB/s
0.246289GB/s
--------
0.280953GB/s
0.261078GB/s
--------
0.278502GB/s
0.251862GB/s
--------
0.278823GB/s
0.263534GB/s
--------
0.274721GB/s
0.239079GB/s
--------
0.276661GB/s
0.246837GB/s
--------
0.272762GB/s
0.246839GB/s
--------
0.282059GB/s
0.261797GB/s
--------
0.278529GB/s
0.252691GB/s
--------
0.278163GB/s
0.264774GB/s
--------
0.277244GB/s
0.255282GB/s
--------
0.283138GB/s
0.266435GB/s
--------
0.278708GB/s
0.254342GB/s
--------
0.282434GB/s
0.265536GB/s
--------
0.27805GB/s
0.256324GB/s
--------
0.281045GB/s
0.265061GB/s
--------
0.279441GB/s
0.256366GB/s
--------
0.281888GB/s
0.266915GB/s
--------
0.251761GB/s
0.245098GB/s
--------
0.278635GB/s
0.248707GB/s
--------
0.269387GB/s
0.240102GB/s
--------
0.232649GB/s
0.202346GB/s
--------
0.273945GB/s
0.249294GB/s
--------
0.279617GB/s
0.262778GB/s
--------
0.276097GB/s
0.251259GB/s
--------
0.277888GB/s
0.250375GB/s
--------
0.272063GB/s
0.243944GB/s
--------
0.278983GB/s
0.252076GB/s
--------
0.268488GB/s
0.247889GB/s
--------
0.280789GB/s
0.262028GB/s
--------
0.278377GB/s
0.180743GB/s
--------
0.216814GB/s
0.254783GB/s
--------
0.274058GB/s
0.249415GB/s
--------
0.280116GB/s
0.26292GB/s
--------
0.271896GB/s
0.252075GB/s
--------
0.280649GB/s
0.264949GB/s
--------
0.278819GB/s
0.253694GB/s
--------
0.28258GB/s
0.265414GB/s
--------
0.278612GB/s
0.254864GB/s
--------
0.283037GB/s
0.265871GB/s
--------
0.277449GB/s
0.253437GB/s
--------
0.282536GB/s
0.263343GB/s
--------
0.274581GB/s
0.242442GB/s
--------
0.279351GB/s
0.25463GB/s
--------
0.273093GB/s
0.236013GB/s
--------
0.274678GB/s
0.241903GB/s
--------
0.268596GB/s
0.235663GB/s
--------
0.279775GB/s
0.257614GB/s
--------
0.27716GB/s
0.250091GB/s
--------
0.281915GB/s
0.260277GB/s
--------
0.270681GB/s
0.240768GB/s
--------
0.281036GB/s
0.262245GB/s
--------
0.279014GB/s
0.254245GB/s
--------
0.282461GB/s
0.267503GB/s
--------
0.279912GB/s
0.257558GB/s
--------
0.282012GB/s
0.267531GB/s
--------
0.279549GB/s
0.257551GB/s
--------
0.281967GB/s
0.267308GB/s
--------
0.278081GB/s
0.256195GB/s
--------
0.280318GB/s
0.266774GB/s
--------
0.278519GB/s
0.25709GB/s
--------
0.282675GB/s
0.267013GB/s
--------
0.279554GB/s
0.247438GB/s
--------
0.276626GB/s
0.253295GB/s
--------
0.269697GB/s
0.249224GB/s
--------
0.280409GB/s
0.252118GB/s
--------
0.268609GB/s
0.241379GB/s
--------
0.278993GB/s
0.262428GB/s
--------
0.277268GB/s
0.255564GB/s
--------
0.280618GB/s
0.267882GB/s
--------
0.277535GB/s
0.244814GB/s
--------
0.275678GB/s
0.253515GB/s
--------
0.270153GB/s
0.253856GB/s
--------
0.280477GB/s
0.265566GB/s
--------
CPU Type:       Intel(R) Xeon(R) CPU E3-1270 V2 @ 3.50GHz