fork download
  1. #pragma GCC optimize("Ofast")
  2. #include <bits/stdc++.h>
  3. static unsigned fast_mod(uint64_t x, unsigned m) {
  4. // Optimized mod for Codeforces 32-bit machines.
  5. // x must be less than 2^32 * m for this to work, so that x / m fits in an unsigned 32-bit int.
  6. unsigned x_high = unsigned(x >> 32), x_low = unsigned(x);
  7. unsigned quot, rem;
  8. asm("divl %4\n"
  9. : "=a" (quot), "=d" (rem)
  10. : "d" (x_high), "a" (x_low), "r" (m));
  11. return rem;
  12. }
  13.  
  14. class Timer {
  15. std::chrono::time_point<std::chrono::steady_clock> timePoint;
  16. size_t value;
  17. public:
  18. void start() { timePoint = std::chrono::steady_clock::now(); }
  19. void finish() {
  20. auto curr = std::chrono::steady_clock::now();
  21. auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(curr - timePoint);
  22. value = elapsed.count();
  23. }
  24. size_t operator()() const { return value; }
  25. } timer;
  26.  
  27. uint32_t x32 = 314159265;
  28. uint32_t xorshift32()
  29. { // fastest random generator
  30. x32 ^= x32 << 13;
  31. x32 ^= x32 >> 17;
  32. x32 ^= x32 << 5;
  33. return x32;
  34. }
  35.  
  36. int main() {
  37. unsigned mod; std::cin >> mod;
  38. std::vector<uint32_t> data(1 << 16);
  39. for (auto& it : data) it = xorshift32();
  40. timer.start();
  41. uint32_t prod=1;
  42. for (int iter = 0; iter < 1000; iter++) {
  43. for (auto it : data) {
  44. prod = fast_mod(uint64_t(prod)*it,mod);
  45. }
  46. }
  47. timer.finish();
  48. std::cout << "prod1 = " << prod << ", runtime1 = " << timer() << "ms" << std::endl;
  49. timer.start();
  50. prod=1;
  51. for (int iter = 0; iter < 1000; iter++) {
  52. for (auto it : data) {
  53. prod = uint64_t(prod) * it % mod;
  54. }
  55. }
  56. timer.finish();
  57. std::cout << "prod2 = " << prod << ", runtime2 = " << timer() << "ms" << std::endl;
  58. }
Success #stdin #stdout 1.47s 5404KB
stdin
1000000007
stdout
prod1 = 49955945, runtime1 = 650ms
prod2 = 49955945, runtime2 = 821ms