fork download
  1. #include <iostream>
  2. #include <random>
  3. #include <chrono>
  4. #include <vector>
  5. #include <deque>
  6. #include <array>
  7. #include <algorithm>
  8. #include <numeric>
  9. #include <iterator>
  10. using namespace std;
  11.  
  12. enum {
  13. kPageW = 10000,
  14. kSortingBits = 10,
  15. kTooLarge = 1u << kSortingBits,
  16. };
  17.  
  18. using Data = uint16_t;
  19. using Index = uint32_t;
  20. using Vec = vector<Data>;
  21. using SVec = vector<Index>;
  22. using Counters = array<Index, 1 + kTooLarge>;
  23.  
  24. uint64_t g_complexity = 0ull;
  25.  
  26. constexpr uint64_t ceilDiv(uint64_t dividend, uint64_t divisor)
  27. {
  28. return (dividend + divisor - 1) / divisor;
  29. }
  30.  
  31. Index getMinRows(const Vec& input)
  32. {
  33. const auto sum = accumulate(begin(input), end(input), 0ull);
  34. return max<Index>(1u, Index(ceilDiv(sum, kPageW)));
  35. }
  36.  
  37. uint32_t getMaxColumns(const Vec& input)
  38. {
  39. const auto sum = accumulate(begin(input), end(input), 0ull);
  40. return uint32_t(input.size() / max(1ull, sum / kPageW));
  41. }
  42.  
  43. uint64_t preproc(const Vec& input, SVec& sorted)
  44. {
  45. const auto size = input.size();
  46. sorted.clear();
  47. sorted.resize(size);
  48. auto sum = 0ull;
  49. Counters counters{};
  50.  
  51. for (Index i = 0; i != size; ++i)
  52. {
  53. ++counters[1 + input[i]];
  54. }
  55.  
  56. partial_sum(begin(counters), end(counters), begin(counters));
  57.  
  58. for (Index i = 0; i != size; ++i)
  59. {
  60. sum += input[i];
  61. auto& cnt = counters[input[i]];
  62. sorted[cnt] = i;
  63. __builtin_prefetch(4 + &sorted[cnt], 1);
  64. ++cnt;
  65. }
  66.  
  67. return sum;
  68.  
  69. }
  70.  
  71. uint32_t arrangeAcrossG(const Vec& input) // iterate by width
  72. {
  73. SVec sorted;
  74. vector<bool> seen_columns;
  75. const auto total = preproc(input, sorted);
  76.  
  77. auto columns = static_cast<uint32_t>(
  78. input.size() / max<uint64_t>(1u, total / kPageW));
  79.  
  80. for (; columns; --columns)
  81. {
  82. seen_columns.clear();
  83. seen_columns.resize(columns);
  84. uint32_t sum = 0;
  85. uint32_t use_cnt = columns;
  86. Index i_sorted = static_cast<Index>(sorted.size() - 1);
  87.  
  88. do {
  89. ++g_complexity;
  90. const auto index = sorted[i_sorted];
  91. const auto width = input[index];
  92. const auto column = index % columns;
  93.  
  94. if (!seen_columns[column])
  95. {
  96. seen_columns[column] = true;
  97. sum += width;
  98. --use_cnt;
  99.  
  100. if (sum > kPageW)
  101. break;
  102. }
  103.  
  104. if (!use_cnt)
  105. {
  106. return columns;
  107. }
  108. } while (i_sorted--);
  109. }
  110.  
  111. return 0;
  112. }
  113.  
  114. uint32_t arrangeDownG(const Vec& input) // iterate by width
  115. {
  116. SVec sorted;
  117. vector<bool> seen_columns;
  118. const auto total = preproc(input, sorted);
  119. auto rows = max<Index>(1u, static_cast<Index>(ceilDiv(total, kPageW)));
  120.  
  121. for (; rows <= input.size(); ++rows)
  122. {
  123. auto columns = uint32_t(ceilDiv(input.size(), rows));
  124. seen_columns.clear();
  125. seen_columns.resize(columns);
  126. uint32_t sum = 0;
  127. uint32_t use_cnt = columns;
  128. Index i_sorted = static_cast<Index>(sorted.size() - 1);
  129.  
  130. do {
  131. ++g_complexity;
  132. const auto index = sorted[i_sorted];
  133. const auto width = input[index];
  134. const auto column = index / rows;
  135.  
  136. if (!seen_columns[column])
  137. {
  138. seen_columns[column] = true;
  139. sum += width;
  140. --use_cnt;
  141.  
  142. if (sum > kPageW)
  143. break;
  144. }
  145.  
  146. if (!use_cnt)
  147. {
  148. return columns;
  149. }
  150. } while (i_sorted--);
  151. }
  152.  
  153. return 0;
  154. }
  155.  
  156. uint32_t arrangeAcrossC(const Vec& input) // iterate by columns
  157. {
  158. for (auto columns = getMaxColumns(input); columns; --columns)
  159. {
  160. uint32_t sum = 0;
  161.  
  162. for (uint32_t c = 0; sum <= kPageW && c < columns; ++c)
  163. {
  164. Data cw = 0;
  165.  
  166. for (uint32_t i = c; i < input.size(); i += columns)
  167. {
  168. cw = max(cw, input[i]);
  169. ++g_complexity;
  170. }
  171.  
  172. sum += cw;
  173. }
  174.  
  175. if (sum <= kPageW)
  176. return columns;
  177. }
  178.  
  179. return 0;
  180. }
  181.  
  182. uint32_t arrangeAcrossR(const Vec& input) // iterate by rows
  183. {
  184. auto columns = getMaxColumns(input);
  185. std::vector<uint32_t> widths(columns);
  186.  
  187. for (; columns; --columns)
  188. {
  189. uint32_t sum = 0;
  190. uint32_t column = 0;
  191.  
  192. for (size_t i = 0; sum <= kPageW && i < input.size(); ++i, ++column)
  193. {
  194. if (column == columns)
  195. {
  196. column = 0;
  197. }
  198.  
  199. if (input[i] > widths[column])
  200. {
  201. sum += input[i] - widths[column];
  202. widths[column] = input[i];
  203. }
  204.  
  205. ++g_complexity;
  206. }
  207.  
  208. if (sum <= kPageW)
  209. {
  210. return columns;
  211. }
  212.  
  213. fill(begin(widths), begin(widths) + columns, 0u);
  214. }
  215.  
  216. return 0;
  217. }
  218.  
  219. Data maximum(Data l, Data r)
  220. {
  221. return max(l, r);
  222. }
  223.  
  224. template<typename I, typename Op>
  225. auto transform_partial_sum(I lbegin, I lend, I rbegin, I out, Op op)
  226. { // maximum of the element in first enterval and prefix of second interval
  227. auto sum = typename I::value_type{};
  228.  
  229. for (; lbegin != lend; ++lbegin, ++rbegin, ++out)
  230. {
  231. sum = op(sum, *rbegin);
  232. *lbegin = op(*lbegin, sum);
  233. }
  234.  
  235. return sum;
  236. }
  237.  
  238. template<typename I>
  239. void reverse_max(I b, I e)
  240. { // for each element: maximum of the suffix starting from this element
  241. partial_sum(make_reverse_iterator(e),
  242. make_reverse_iterator(b),
  243. make_reverse_iterator(e),
  244. maximum);
  245. }
  246.  
  247. struct PreprocRMQ
  248. {
  249. Index operator () (Vec& input, Index rows)
  250. {
  251. if (rows < 4)
  252. { // no preprocessing needed
  253. return 1;
  254. }
  255.  
  256. Index ranges = 1;
  257. auto b = begin(input);
  258.  
  259. while (rows >>= 1)
  260. {
  261. ranges <<= 1;
  262. }
  263.  
  264. for (; b + 2 * ranges <= end(input); b += ranges)
  265. {
  266. reverse_max(b, b + ranges);
  267. transform_partial_sum(b, b + ranges, b + ranges, b, maximum);
  268. }
  269.  
  270. // preprocess inconvenient tail part of the array
  271. reverse_max(b, b + ranges);
  272. const auto d = end(input) - b - ranges;
  273. const auto z = transform_partial_sum(b, b + d, b + ranges, b, maximum);
  274. transform(b + d, b + ranges, b + d, [&](Data x){return max(x, z);});
  275. reverse_max(b + ranges, end(input));
  276. return ranges;
  277. }
  278. };
  279.  
  280. struct NoPreproc
  281. {
  282. Index operator () (Vec&, Index)
  283. {
  284. return 1;
  285. }
  286. };
  287.  
  288. template<class PP>
  289. uint32_t arrangeDownRMQ(Vec& input)
  290. {
  291. auto rows = getMinRows(input);
  292. auto ranges = PP{}(input, rows);
  293.  
  294. while (rows <= input.size())
  295. {
  296. if (rows >= ranges * 2)
  297. { // lazily update RMQ data
  298. transform(begin(input) + ranges, end(input), begin(input),
  299. begin(input), maximum
  300. );
  301.  
  302. ranges *= 2;
  303. }
  304. else
  305. { // use RMQ to get widths of columns and check if all columns fit
  306. uint32_t sum = 0;
  307.  
  308. for (Index i = 0; sum <= kPageW && i < input.size(); i += rows)
  309. {
  310. ++g_complexity;
  311. auto j = i + rows - ranges;
  312.  
  313. if (j < input.size())
  314. sum += max(input[i], input[j]);
  315. else
  316. sum += input[i];
  317. }
  318.  
  319. if (sum <= kPageW)
  320. {
  321. return uint32_t(ceilDiv(input.size(), rows));
  322. }
  323.  
  324. ++rows;
  325. }
  326. }
  327.  
  328. return 0;
  329. }
  330.  
  331. //#define TESTING
  332. #ifndef TESTING
  333. int main()
  334. {
  335. mt19937_64 rng(random_device{}());
  336. geometric_distribution<uint32_t> distrib(0.02); // max value up to ~1000
  337.  
  338. for (unsigned size = 3; size < 1000000u; size *= 2)
  339. {
  340. const unsigned repeat = max(5u, 1000u / size);
  341. chrono::duration<double> time {};
  342. g_complexity = 0ull;
  343. Vec input(size);
  344. uint32_t res = 0u;
  345.  
  346. for (unsigned i = 0; i < repeat; ++i)
  347. {
  348. generate(begin(input), end(input),
  349. [&]{return 1 + min(distrib(rng), kTooLarge - 2u);});
  350.  
  351. auto start = chrono::steady_clock::now();
  352. res = arrangeDownRMQ<PreprocRMQ>(input);
  353. time += chrono::steady_clock::now() - start;
  354. }
  355.  
  356. cout << size << ' '
  357. << time.count() / repeat << ' '
  358. << g_complexity / repeat << ' '
  359. << res << '\n';
  360. }
  361. }
  362. #else // TESTING
  363. int main()
  364. {
  365. mt19937_64 rng(random_device{}());
  366. geometric_distribution<uint32_t> distrib(0.02); // max value up to ~1000
  367.  
  368. for (unsigned size = 12; size < 10000000u; size *= 4)
  369. {
  370. Vec input(size);
  371.  
  372. generate(begin(input), end(input),
  373. [&]{return 1 + min(distrib(rng), kTooLarge - 2u);});
  374.  
  375. cout << arrangeAcrossR(input) << ' ' << arrangeAcrossG(input) << '\n';
  376.  
  377. cout << arrangeAcrossR(input) << ' ' << arrangeAcrossC(input) << '\n';
  378.  
  379. cout << arrangeDownG(input) << ' ';
  380. cout << arrangeDownRMQ<PreprocRMQ>(input) << '\n';
  381. }
  382. }
  383. #endif // TESTING
Success #stdin #stdout 1.57s 3424KB
stdin
Standard input is empty
stdout
3 3.19462e-07 3 3
6 3.18018e-07 6 6
12 3.48241e-07 12 12
24 4.31854e-07 24 24
48 5.677e-07 48 48
96 8.22e-07 96 96
192 1.809e-06 134 96
384 6.8894e-06 359 96
768 2.34202e-05 715 64
1536 4.38732e-05 1414 48
3072 8.29704e-05 2852 46
6144 0.000170349 5926 33
12288 0.000312167 12124 31
24576 0.000589112 23997 29
49152 0.00118567 49336 24
98304 0.00242333 97191 23
196608 0.00461064 196098 21
393216 0.00995426 400043 20
786432 0.0216677 802068 18