fork download
  1. #include <iostream>
  2. #include <random>
  3. #include <chrono>
  4. #include <vector>
  5. #include <deque>
  6. #include <array>
  7. #include <algorithm>
  8. #include <numeric>
  9. using namespace std;
  10.  
  11. enum {
  12. kPageW = 10000,
  13. kSortingBits = 8,
  14. kTooLarge = 1u << kSortingBits,
  15. };
  16.  
  17. struct WidthPos
  18. {
  19. uint32_t width = 0;
  20. uint32_t pos = 0;
  21.  
  22. WidthPos(uint32_t w = 0, uint32_t p = 0)
  23. : width(w)
  24. , pos(p)
  25. {}
  26. };
  27.  
  28. bool operator < (WidthPos l, WidthPos r)
  29. { return l.width > r.width; }
  30.  
  31. using Vec = vector<uint32_t>;
  32. using SVec = vector<WidthPos>;
  33.  
  34. uint64_t g_complexity = 0ull;
  35.  
  36. constexpr uint64_t ceilDiv(uint64_t dividend, uint64_t divisor)
  37. {
  38. return (dividend + divisor - 1) / divisor;
  39. }
  40.  
  41. uint32_t getMinRows(const Vec& input)
  42. {
  43. const auto sum = accumulate(begin(input), end(input), 0ull);
  44. return max(1u, uint32_t(ceilDiv(sum, kPageW)));
  45. }
  46.  
  47. uint32_t getMaxColumns(const Vec& input)
  48. {
  49. const auto sum = accumulate(begin(input), end(input), 0ull);
  50. return uint32_t(input.size() / max(1ull, sum / kPageW));
  51. }
  52.  
  53. uint32_t preproc(const Vec& input, SVec& sorted)
  54. {
  55. sorted.clear();
  56. sorted.resize(input.size());
  57.  
  58. auto sum = 0ull;
  59. array<uint32_t, 1 + kTooLarge> cnt_1 {};
  60.  
  61. for (uint32_t i = 0; i != input.size(); ++i)
  62. {
  63. sum += input[i];
  64.  
  65. if (input[i] >= kTooLarge)
  66. sorted[cnt_1[0]++] = {input[i], i};
  67. else
  68. ++cnt_1[kTooLarge - input[i]];
  69. }
  70.  
  71. sort(begin(sorted), begin(sorted) + cnt_1[0]);
  72. partial_sum(begin(cnt_1), end(cnt_1), begin(cnt_1));
  73.  
  74. for (uint32_t i = 0; i != input.size(); ++i)
  75. {
  76. if (input[i] < kTooLarge)
  77. {
  78. auto& cnt = cnt_1[kTooLarge - 1 - input[i]];
  79. sorted[cnt++] = {input[i], i};
  80. __builtin_prefetch(4 + &sorted[cnt], 1);
  81. }
  82. }
  83.  
  84. return uint32_t(input.size() / max(1ull, sum / kPageW));
  85. }
  86.  
  87. uint32_t arrangeAcrossG(const Vec& input) // iterate by width
  88. {
  89. SVec sorted;
  90. vector<bool> seen_columns;
  91.  
  92. for (auto columns = preproc(input, sorted); columns; --columns)
  93. {
  94. seen_columns.clear();
  95. seen_columns.resize(columns);
  96. uint32_t sum = 0;
  97. uint32_t use_cnt = columns;
  98.  
  99. for (auto x: sorted)
  100. {
  101. ++g_complexity;
  102. const auto column = x.pos % columns;
  103.  
  104. if (!seen_columns[column])
  105. {
  106. sum += x.width;
  107. --use_cnt;
  108.  
  109. if (sum > kPageW)
  110. break;
  111. }
  112.  
  113. seen_columns[column] = true;
  114.  
  115. if (!use_cnt)
  116. return columns;
  117. }
  118. }
  119.  
  120. return 0;
  121. }
  122.  
  123. uint32_t preprocX(const Vec& input, SVec& sorted)
  124. {
  125. sorted.clear();
  126. sorted.resize(input.size());
  127.  
  128. auto sum = 0ull;
  129. array<uint32_t, 1 + kTooLarge> cnt_1 {};
  130.  
  131. for (uint32_t i = 0; i != input.size(); ++i)
  132. {
  133. sum += input[i];
  134.  
  135. if (input[i] >= kTooLarge)
  136. sorted[cnt_1[0]++] = {input[i], i};
  137. else
  138. ++cnt_1[kTooLarge - input[i]];
  139. }
  140.  
  141. sort(begin(sorted), begin(sorted) + cnt_1[0]);
  142. partial_sum(begin(cnt_1), end(cnt_1), begin(cnt_1));
  143.  
  144. for (uint32_t i = 0; i != input.size(); ++i)
  145. {
  146. if (input[i] < kTooLarge)
  147. {
  148. auto& cnt = cnt_1[kTooLarge - 1 - input[i]];
  149. sorted[cnt++] = {input[i], i};
  150. __builtin_prefetch(4 + &sorted[cnt], 1);
  151. }
  152. }
  153.  
  154. return max(1u, uint32_t(ceilDiv(sum, kPageW)));
  155. }
  156.  
  157. uint32_t arrangeDownG(const Vec& input) // iterate by width
  158. {
  159. SVec sorted;
  160. vector<bool> seen_columns;
  161.  
  162. for (auto rows = preprocX(input, sorted); rows <= input.size(); ++rows)
  163. {
  164. auto columns = uint32_t(ceilDiv(input.size(), rows));
  165. seen_columns.clear();
  166. seen_columns.resize(columns);
  167. uint32_t sum = 0;
  168. uint32_t use_cnt = columns;
  169.  
  170. for (auto x: sorted)
  171. {
  172. ++g_complexity;
  173. const auto column = x.pos / rows;
  174.  
  175. if (!seen_columns[column])
  176. {
  177. sum += x.width;
  178. --use_cnt;
  179.  
  180. if (sum > kPageW)
  181. break;
  182. }
  183.  
  184. seen_columns[column] = true;
  185.  
  186. if (!use_cnt)
  187. return columns;
  188. }
  189. }
  190.  
  191. return 0;
  192. }
  193.  
  194. uint32_t arrangeAcrossC(const Vec& input) // iterate by columns
  195. {
  196. for (auto columns = getMaxColumns(input); columns; --columns)
  197. {
  198. uint32_t sum = 0;
  199.  
  200. for (uint32_t c = 0; sum <= kPageW && c < columns; ++c)
  201. {
  202. uint32_t cw = 0;
  203.  
  204. for (uint32_t i = c; i < input.size(); i += columns)
  205. {
  206. cw = max(cw, input[i]);
  207. ++g_complexity;
  208. }
  209.  
  210. sum += cw;
  211. }
  212.  
  213. if (sum <= kPageW)
  214. return columns;
  215. }
  216.  
  217. return 0;
  218. }
  219.  
  220. uint32_t arrangeAcrossR(const Vec& input) // iterate by rows
  221. {
  222. auto columns = getMaxColumns(input);
  223. std::vector<uint32_t> widths(columns);
  224.  
  225. for (; columns; --columns)
  226. {
  227. uint32_t sum = 0;
  228. size_t column = 0;
  229.  
  230. for (size_t i = 0; sum <= kPageW && i < input.size(); ++i, ++column)
  231. {
  232. if (column == columns)
  233. {
  234. column = 0;
  235. }
  236.  
  237. if (input[i] > widths[column])
  238. {
  239. sum += input[i] - widths[column];
  240. widths[column] = input[i];
  241. }
  242.  
  243. ++g_complexity;
  244. }
  245.  
  246. if (sum <= kPageW)
  247. {
  248. return columns;
  249. }
  250.  
  251. fill(begin(widths), begin(widths) + columns, 0u);
  252. }
  253.  
  254. return 0;
  255. }
  256.  
  257. uint32_t arrangeDownR(const Vec& input) // iterate by rows
  258. {
  259. auto rows = getMinRows(input);
  260. auto columns = uint32_t(ceilDiv(input.size(), rows));
  261. std::vector<uint32_t> widths(columns);
  262.  
  263. while (rows <= input.size())
  264. {
  265. uint32_t sum = 0;
  266.  
  267. for (uint32_t row = 0; row < rows; ++row)
  268. {
  269. uint32_t column = 0;
  270. uint32_t i = row;
  271.  
  272. for (; sum <= kPageW && i < input.size(); i += rows, ++column)
  273. {
  274. if (input[i] > widths[column])
  275. {
  276. sum += input[i] - widths[column];
  277. widths[column] = input[i];
  278. }
  279.  
  280. ++g_complexity;
  281. }
  282.  
  283. if (sum > kPageW)
  284. {
  285. break;
  286. }
  287. }
  288.  
  289. if (sum <= kPageW)
  290. {
  291. return columns;
  292. }
  293.  
  294. ++rows;
  295. columns = uint32_t(ceilDiv(input.size(), rows));
  296. fill(begin(widths), begin(widths) + columns, 0u);
  297. }
  298.  
  299. return 0;
  300. }
  301.  
  302. uint32_t arrangeDownNaive(const Vec& input)
  303. {
  304. const size_t size = input.size();
  305.  
  306. for (size_t rows = 1; rows <= size; ++rows)
  307. {
  308. size_t index = 0;
  309. uint32_t text_w = 0;
  310.  
  311. for (uint32_t c = 1; ; ++c)
  312. {
  313. uint32_t column_w = 0;
  314.  
  315. for (uint32_t r = 0; r < rows && index < size; ++r, ++index)
  316. column_w = max(column_w, input[index]);
  317.  
  318. text_w += column_w;
  319.  
  320. if (text_w > kPageW)
  321. break;
  322.  
  323. if (index >= size)
  324. return c;
  325. }
  326. }
  327.  
  328. return 0;
  329. }
  330.  
  331. uint32_t arrangeDownRMQ(Vec& input)
  332. {
  333. auto rows = getMinRows(input);
  334. uint32_t ranges = 1;
  335.  
  336. while (rows <= input.size())
  337. {
  338. if (rows >= ranges * 2)
  339. { // lazily update RMQ data
  340. transform(begin(input) + ranges, end(input), begin(input),
  341. begin(input), [](auto l, auto r) {return max(l, r);}
  342. );
  343.  
  344. ranges *= 2;
  345. }
  346. else
  347. { // use RMQ to get widths of columns and check if all columns fit
  348. uint32_t sum = 0;
  349.  
  350. for (uint32_t i = 0; sum <= kPageW && i < input.size(); i += rows)
  351. {
  352. ++g_complexity;
  353. auto j = i + rows - ranges;
  354.  
  355. if (j < input.size())
  356. sum += max(input[i], input[j]);
  357. else
  358. sum += input[i];
  359. }
  360.  
  361. if (sum <= kPageW)
  362. {
  363. return uint32_t(ceilDiv(input.size(), rows));
  364. }
  365.  
  366. ++rows;
  367. }
  368. }
  369.  
  370. return 0;
  371. }
  372.  
  373. struct DeckEntry
  374. {
  375. uint32_t v;
  376. uint32_t p;
  377.  
  378. DeckEntry(uint32_t value = 0, uint32_t pos = 0)
  379. : v(value)
  380. , p(pos)
  381. {}
  382. };
  383.  
  384. using Deck = deque<DeckEntry>;
  385.  
  386. void push(Deck& deck, uint32_t value, uint32_t pos)
  387. {
  388. while (!deck.empty() && deck.back().v <= value)
  389. deck.pop_back();
  390.  
  391. deck.emplace_back(value, pos);
  392. }
  393.  
  394. uint32_t maxAfterPos(Deck& deck, uint32_t pos)
  395. {
  396. if (pos == deck.front().p)
  397. deck.pop_front();
  398.  
  399. return deck.front().v;
  400. }
  401.  
  402. uint32_t preprocRMQ(Vec& input, uint32_t rows)
  403. {
  404. if (rows < 4)
  405. return 1;
  406.  
  407. uint32_t back_pos = 0;
  408. size_t front_pos = 0;
  409. uint32_t ranges = 1;
  410. Deck deck;
  411.  
  412. while (rows >>= 1)
  413. ranges <<= 1;
  414.  
  415. for (; back_pos != ranges - 1; ++back_pos)
  416. {
  417. push(deck, input[back_pos], back_pos);
  418. }
  419.  
  420. for (; back_pos != input.size(); ++front_pos, ++back_pos)
  421. {
  422. push(deck, input[back_pos], back_pos);
  423. input[front_pos] = maxAfterPos(deck, back_pos - ranges);
  424. }
  425.  
  426. for (; front_pos != input.size(); ++front_pos, ++back_pos)
  427. {
  428. input[front_pos] = maxAfterPos(deck, back_pos - ranges);
  429. }
  430.  
  431. return ranges;
  432. }
  433.  
  434. uint32_t arrangeDownRMQ2(Vec& input)
  435. {
  436. auto rows = getMinRows(input);
  437. uint32_t ranges = preprocRMQ(input, rows);
  438.  
  439. while (rows <= input.size())
  440. {
  441. if (rows >= ranges * 2)
  442. { // lazily update RMQ data
  443. transform(begin(input) + ranges, end(input), begin(input),
  444. begin(input), [](auto l, auto r) {return max(l, r);}
  445. );
  446.  
  447. ranges *= 2;
  448. //g_complexity += input.size() - ranges;
  449. }
  450. else
  451. { // use RMQ to get widths of columns and check if all columns fit
  452. uint32_t sum = 0;
  453.  
  454. for (uint32_t i = 0; sum <= kPageW && i < input.size(); i += rows)
  455. {
  456. ++g_complexity;
  457. auto j = i + rows - ranges;
  458.  
  459. if (j < input.size())
  460. sum += max(input[i], input[j]);
  461. else
  462. sum += input[i];
  463. }
  464.  
  465. if (sum <= kPageW)
  466. {
  467. return uint32_t(ceilDiv(input.size(), rows));
  468. }
  469.  
  470. ++rows;
  471. }
  472. }
  473.  
  474. return 0;
  475. }
  476.  
  477. class TwoStk
  478. {
  479. vector<uint32_t> in_;
  480. vector<uint32_t> out_;
  481. uint32_t in_max_;
  482.  
  483. void inout()
  484. {
  485. in_max_ = 0;
  486. uint32_t out_max = 0;
  487.  
  488. while (!in_.empty())
  489. {
  490. const auto x = in_.back();
  491. in_.pop_back();
  492. out_max = max(out_max, x);
  493. out_.push_back(out_max);
  494. }
  495. }
  496.  
  497. public:
  498. TwoStk(uint32_t size)
  499. : in_max_(0)
  500. {
  501. in_.reserve(size);
  502. out_.reserve(size);
  503. }
  504.  
  505. void push(uint32_t x)
  506. {
  507. in_.push_back(x);
  508. in_max_ = max(in_max_, x);
  509. }
  510.  
  511. uint32_t pop()
  512. {
  513. if (out_.empty())
  514. {
  515. inout();
  516. }
  517.  
  518. const auto res = out_.back();
  519. out_.pop_back();
  520. return max(res, in_max_);
  521. }
  522. };
  523.  
  524. uint32_t preprocRMQ3(Vec& input, uint32_t rows)
  525. {
  526. if (rows < 4)
  527. return 1;
  528.  
  529. uint32_t back_pos = 0;
  530. size_t front_pos = 0;
  531. uint32_t ranges = 1;
  532.  
  533. while (rows >>= 1)
  534. ranges <<= 1;
  535.  
  536. TwoStk ts(ranges);
  537.  
  538. for (; back_pos != ranges - 1; ++back_pos)
  539. {
  540. ts.push(input[back_pos]);
  541. }
  542.  
  543. for (; back_pos != input.size(); ++front_pos, ++back_pos)
  544. {
  545. ts.push(input[back_pos]);
  546. input[front_pos] = ts.pop();
  547. }
  548.  
  549. for (; front_pos != input.size(); ++front_pos, ++back_pos)
  550. {
  551. input[front_pos] = ts.pop();
  552. }
  553.  
  554. return ranges;
  555. }
  556.  
  557. uint32_t arrangeDownRMQ3(Vec& input)
  558. {
  559. auto rows = getMinRows(input);
  560. uint32_t ranges = preprocRMQ3(input, rows);
  561.  
  562. while (rows <= input.size())
  563. {
  564. if (rows >= ranges * 2)
  565. { // lazily update RMQ data
  566. transform(begin(input) + ranges, end(input), begin(input),
  567. begin(input), [](auto l, auto r) {return max(l, r);}
  568. );
  569.  
  570. ranges *= 2;
  571. //g_complexity += input.size() - ranges;
  572. }
  573. else
  574. { // use RMQ to get widths of columns and check if all columns fit
  575. uint32_t sum = 0;
  576.  
  577. for (uint32_t i = 0; sum <= kPageW && i < input.size(); i += rows)
  578. {
  579. ++g_complexity;
  580. auto j = i + rows - ranges;
  581.  
  582. if (j < input.size())
  583. sum += max(input[i], input[j]);
  584. else
  585. sum += input[i];
  586. }
  587.  
  588. if (sum <= kPageW)
  589. {
  590. return uint32_t(ceilDiv(input.size(), rows));
  591. }
  592.  
  593. ++rows;
  594. }
  595. }
  596.  
  597. return 0;
  598. }
  599.  
  600. int main()
  601. {
  602. mt19937_64 rng(random_device{}());
  603. geometric_distribution<uint32_t> distrib(0.02); // max value up to ~1000
  604.  
  605. for (unsigned size = 3; size < 1000000u; size *= 2)
  606. {
  607. const unsigned repeat = max(1u, 100000u / size);
  608. chrono::duration<double> time {};
  609. g_complexity = 0ull;
  610. Vec input(size);
  611. uint32_t res = 0u;
  612.  
  613. for (unsigned i = 0; i < repeat; ++i)
  614. {
  615. generate(begin(input), end(input), [&] {return 1 + distrib(rng);});
  616. auto start = chrono::steady_clock::now();
  617.  
  618. res = arrangeDownG(input);
  619.  
  620. time += chrono::steady_clock::now() - start;
  621. }
  622.  
  623. cout << size << ' '
  624. << time.count() / repeat << ' '
  625. << g_complexity / repeat << ' '
  626. << res << '\n';
  627. }
  628. }
  629.  
Success #stdin #stdout 0.71s 4048KB
stdin
Standard input is empty
stdout
3 9.64697e-07 3 3
6 1.05353e-06 6 6
12 1.48308e-06 12 12
24 1.87463e-06 24 24
48 2.38875e-06 48 48
96 3.78629e-06 96 96
192 6.47739e-06 187 96
384 1.55096e-05 531 96
768 3.12119e-05 1069 64
1536 6.25127e-05 2050 50
3072 0.000130512 4205 44
6144 0.00027456 8693 34
12288 0.000574379 17883 32
24576 0.00129296 39036 28
49152 0.0027272 79801 24
98304 0.00620303 177045 22
196608 0.0130799 380504 22
393216 0.0247719 605727 19
786432 0.0571786 1637286 20