fork(9) download
  1. #pragma GCC optimize ("O3")
  2. #pragma GCC target ("avx")
  3.  
  4. #include <cstdio>
  5. #include <cassert>
  6. #include <cmath>
  7. #include <cstring>
  8.  
  9. #include <algorithm>
  10. #include <iostream>
  11. #include <vector>
  12. #include <functional>
  13.  
  14. #ifdef __x86_64__
  15. #define NTT64
  16. #endif
  17.  
  18. #define _rep(_1, _2, _3, _4, name, ...) name
  19. #define rep2(i, n) rep3(i, 0, n)
  20. #define rep3(i, a, b) rep4(i, a, b, 1)
  21. #define rep4(i, a, b, c) for (int i = int(a); i < int(b); i += int(c))
  22. #define rep(...) _rep(__VA_ARGS__, rep4, rep3, rep2, _)(__VA_ARGS__)
  23.  
  24. using namespace std;
  25.  
  26. using i64 = long long;
  27. using u32 = unsigned;
  28. using u64 = unsigned long long;
  29. using f80 = long double;
  30.  
  31. namespace ntt {
  32.  
  33. #ifdef NTT64
  34. using word_t = u64;
  35. using dword_t = __uint128_t;
  36. #else
  37. using word_t = u32;
  38. using dword_t = u64;
  39. #endif
  40. static const int word_bits = 8 * sizeof(word_t);
  41.  
  42. template <word_t mod, word_t prim_root>
  43. class Mod {
  44. private:
  45. static constexpr word_t mul_inv(word_t n, int e=6, word_t x=1) {
  46. return e == 0 ? x : mul_inv(n, e-1, x*(2-x*n));
  47. }
  48. public:
  49. static constexpr word_t inv = mul_inv(mod);
  50. static constexpr word_t r2 = -dword_t(mod) % mod;
  51. static constexpr int level = __builtin_ctzll(mod - 1);
  52. static_assert(inv * mod == 1, "invalid 1/M modulo 2^@.");
  53.  
  54. Mod() {}
  55. Mod(word_t n) : x(init(n)) {};
  56. static word_t modulus() { return mod; }
  57. static word_t init(word_t w) { return reduce(dword_t(w) * r2); }
  58. static word_t reduce(const dword_t w) { return word_t(w >> word_bits) + mod - word_t((dword_t(word_t(w) * inv) * mod) >> word_bits); }
  59. static Mod omega() { return Mod(prim_root).pow((mod - 1) >> level); }
  60. Mod& operator += (Mod rhs) { this->x += rhs.x; return *this; }
  61. Mod& operator -= (Mod rhs) { this->x += 3 * mod - rhs.x; return *this; }
  62. Mod& operator *= (Mod rhs) { this->x = reduce(dword_t(this->x) * rhs.x); return *this; }
  63. Mod operator + (Mod rhs) const { return Mod(*this) += rhs; }
  64. Mod operator - (Mod rhs) const { return Mod(*this) -= rhs; }
  65. Mod operator * (Mod rhs) const { return Mod(*this) *= rhs; }
  66. word_t get() const { return reduce(this->x) % mod; }
  67. void set(word_t n) const { this->x = n; }
  68. Mod pow(word_t exp) const {
  69. Mod ret = Mod(1);
  70. for (Mod base = *this; exp; exp >>= 1, base *= base) if (exp & 1) ret *= base;
  71. return ret;
  72. }
  73. Mod inverse() const { return pow(mod - 2); }
  74. friend ostream& operator << (ostream& os, const Mod& m) { return os << m.get(); }
  75. static void debug() {
  76. printf("%llu %llu %llu %llu\n", mod, inv, r2, omega().get());
  77. }
  78. word_t x;
  79. };
  80.  
  81. const int size = 1 << 16;
  82.  
  83. #ifdef NTT64
  84. using m64_1 = ntt::Mod<709143768229478401, 31>;
  85. using m64_2 = ntt::Mod<711416664922521601, 19>; // <= 712e15 (sub.D = 3)
  86. m64_1 f1[size], g1[size];
  87. m64_2 f2[size], g2[size];
  88. #else
  89. using m32_1 = ntt::Mod<138412033, 5>;
  90. using m32_2 = ntt::Mod<155189249, 6>;
  91. using m32_3 = ntt::Mod<163577857, 23>; // <= 16579e4 (sub.D = 3)
  92. m32_1 f1[size], g1[size];
  93. m32_2 f2[size], g2[size];
  94. m32_3 f3[size], g3[size];
  95. #endif
  96.  
  97. template <typename mod_t>
  98. void convolve(mod_t* A, int s1, mod_t* B, int s2, bool cyclic=false) {
  99. int s = (cyclic ? max(s1, s2) : s1 + s2 - 1);
  100. int size = 1;
  101. while (size < s) size <<= 1;
  102. mod_t roots[mod_t::level] = { mod_t::omega() };
  103. rep(i, 1, mod_t::level) roots[i] = roots[i - 1] * roots[i - 1];
  104. fill(A + s1, A + size, 0); ntt_dit4(A, size, 1, roots);
  105. if (A == B && s1 == s2) {
  106. rep(i, size) A[i] *= A[i];
  107. } else {
  108. fill(B + s2, B + size, 0); ntt_dit4(B, size, 1, roots);
  109. rep(i, size) A[i] *= B[i];
  110. }
  111. ntt_dit4(A, size, -1, roots);
  112. mod_t inv = mod_t(size).inverse();
  113. rep(i, cyclic ? size : s) A[i] *= inv;
  114. }
  115.  
  116. template <typename mod_t>
  117. void rev_permute(mod_t* A, int n) {
  118. int r = 0, nh = n >> 1;
  119. rep(i, 1, n) {
  120. for (int h = nh; !((r ^= h) & h); h >>= 1);
  121. if (r > i) swap(A[i], A[r]);
  122. }
  123. }
  124.  
  125. template <typename mod_t>
  126. void ntt_dit4(mod_t* A, int n, int sign, mod_t* roots) {
  127. rev_permute(A, n);
  128. int logn = __builtin_ctz(n);
  129. assert(logn <= mod_t::level);
  130.  
  131. if (logn & 1) rep(i, 0, n, 2) {
  132. mod_t a = A[i], b = A[i + 1];
  133. A[i] = a + b; A[i + 1] = a - b;
  134. }
  135. mod_t imag = roots[mod_t::level - 2];
  136. if (sign < 0) imag = imag.inverse();
  137.  
  138. mod_t one = mod_t(1);
  139. rep(e, 2 + (logn & 1), logn + 1, 2) {
  140. const int m = 1 << e;
  141. const int m4 = m >> 2;
  142. mod_t dw = roots[mod_t::level - e];
  143. if (sign < 0) dw = dw.inverse();
  144.  
  145. const int block_size = min(n, max(m, (1 << 15) / int(sizeof(A[0]))));
  146. rep(k, 0, n, block_size) {
  147. mod_t w = one, w2 = one, w3 = one;
  148. rep(j, m4) {
  149. rep(i, k + j, k + block_size, m) {
  150. mod_t a0 = A[i + m4 * 0] * one, a2 = A[i + m4 * 1] * w2;
  151. mod_t a1 = A[i + m4 * 2] * w, a3 = A[i + m4 * 3] * w3;
  152. mod_t t02 = a0 + a2, t13 = a1 + a3;
  153. A[i + m4 * 0] = t02 + t13; A[i + m4 * 2] = t02 - t13;
  154. t02 = a0 - a2, t13 = (a1 - a3) * imag;
  155. A[i + m4 * 1] = t02 + t13; A[i + m4 * 3] = t02 - t13;
  156. }
  157. w *= dw; w2 = w * w; w3 = w2 * w;
  158. }
  159. }
  160. }
  161. }
  162.  
  163. } // namespace ntt
  164.  
  165. using R = int;
  166. using R64 = i64;
  167.  
  168. class poly {
  169. public:
  170. #ifdef NTT64
  171. static const int ntt_threshold = 900; // deg(f * g)
  172. #else
  173. static const int ntt_threshold = 1500; // deg(f * g)
  174. #endif
  175.  
  176. static R add_mod(R a, R b) { return int(a += b - mod) < 0 ? a + mod : a; }
  177. static R sub_mod(R a, R b) { return int(a -= b) < 0 ? a + mod : a; }
  178. static R64 sub_mul_mod(R64 a, R b, R c) {
  179. i64 t = i64(a) - i64(int(b)) * int(c);
  180. return t < 0 ? t + lmod : t;
  181. }
  182. static R mul_mod(R a, R b) { return R64(a) * b % fast_mod; }
  183. static R pow_mod(R a, int e) {
  184. R ret = 1 % fast_mod;
  185. for (; e; e >>= 1, a = mul_mod(a, a)) {
  186. if (e & 1) ret = mul_mod(ret, a);
  187. }
  188. return ret;
  189. }
  190. static R mod_inv(R a) {
  191. R b = mod, s = 1, t = 0;
  192. while (b > 0) {
  193. swap(s -= t * (a / b), t);
  194. swap(a %= b, b);
  195. }
  196. if (a > 1) { fprintf(stderr, "Error: invalid modular inverse\n"); exit(1); };
  197. return int(s) < 0 ? s + mod : s;
  198. }
  199. inline static void vec_add(R64* res, int s, const R* f, R c) {
  200. rep(i, s) res[i] = sub_mul_mod(res[i], mod - c, f[i]);
  201. }
  202. inline static void vec_sub(R64* res, int s, const R* f, R c) {
  203. rep(i, s) res[i] = sub_mul_mod(res[i], c, f[i]);
  204. }
  205.  
  206. #ifdef NTT64
  207. struct fast_div {
  208. using u128 = __uint128_t;
  209. fast_div() {}
  210. fast_div(u64 n) : m(n) {
  211. s = (n == 1) ? 0 : 127 - __builtin_clzll(n - 1);
  212. x = ((u128(1) << s) + n - 1) / n;
  213. }
  214. friend u64 operator / (u64 n, fast_div d) { return u128(n) * d.x >> d.s; }
  215. friend u64 operator % (u64 n, fast_div d) { return n - n / d * d.m; }
  216. u64 m, s, x;
  217. };
  218. #else
  219. struct fast_div {
  220. fast_div() {}
  221. fast_div(u32 n) : m(n) {}
  222. friend u32 operator % (u64 n, fast_div d) { return n % d.m; }
  223. u32 m;
  224. };
  225. #endif
  226.  
  227. public:
  228. poly() {}
  229. poly(int n) : coefs(n) {}
  230. poly(int n, int c) : coefs(n, c % mod) {}
  231. poly(const R* ar, int s) : coefs(ar, ar + s) {}
  232. poly(const vector<R>& v) : coefs(v) {}
  233. poly(const poly& f, int beg, int end=-1) {
  234. if (end < 0) end = beg, beg = 0;
  235. resize(end - beg);
  236. rep(i, beg, end) if (i < f.size()) coefs[i - beg] = f[i];
  237. }
  238.  
  239. static int ilog2(u64 n) {
  240. return 63 - __builtin_clzll(n);
  241. }
  242. int size() const { return coefs.size(); }
  243. void resize(int s) { coefs.resize(s); }
  244. void push_back(R c) { coefs.push_back(c); }
  245.  
  246. const R* data() const { return coefs.data(); }
  247. R* data() { return coefs.data(); }
  248. const R& operator [] (int i) const { return coefs[i]; }
  249. R& operator [] (int i) { return coefs[i]; }
  250.  
  251. void reverse() { std::reverse(coefs.begin(), coefs.end()); }
  252.  
  253. poly operator - () {
  254. poly ret = *this;
  255. rep(i, ret.size()) ret[i] = (ret[i] == 0 ? 0 : mod - ret[i]);
  256. return ret;
  257. }
  258. poly& operator += (const poly& rhs) {
  259. if (size() < rhs.size()) resize(rhs.size());
  260. rep(i, rhs.size()) coefs[i] = add_mod(coefs[i], rhs[i]);
  261. return *this;
  262. }
  263. poly& operator -= (const poly& rhs) {
  264. if (size() < rhs.size()) resize(rhs.size());
  265. rep(i, rhs.size()) coefs[i] = sub_mod(coefs[i], rhs[i]);
  266. return *this;
  267. }
  268. poly& operator *= (const poly& rhs) { return *this = *this * rhs; }
  269.  
  270. poly& rev_add(const poly& rhs) {
  271. if (size() < rhs.size()) {
  272. int s = size();
  273. resize(rhs.size());
  274. rep(i, s) coefs[size() - 1 - i] = coefs[s - 1 - i];
  275. rep(i, size() - s) coefs[i] = 0;
  276. }
  277. rep(i, rhs.size()) coefs[size() - 1 - i] = \
  278. add_mod(coefs[size() - 1 - i], rhs.coefs[rhs.size() - 1 - i]);
  279. return *this;
  280. }
  281.  
  282. poly operator + (const poly& rhs) const { return poly(*this) += rhs; }
  283. poly operator - (const poly& rhs) const { return poly(*this) -= rhs; }
  284. poly operator * (const poly& rhs) const { return this->mul(rhs); }
  285.  
  286. static void set_mod(R m, int N=2) {
  287. mod = m;
  288. lmod = R64(m) << 32;
  289. N = max(2, N);
  290. fast_mod = fast_div(mod);
  291. invs.assign(N + 1, 1);
  292. facts.assign(N + 1, 1);
  293. ifacts.assign(N + 1, 1);
  294. invs[1] = 1;
  295. rep(i, 2, N + 1) {
  296. invs[i] = mul_mod(invs[mod % i], mod - mod / i);
  297. facts[i] = mul_mod(facts[i - 1], i);
  298. ifacts[i] = mul_mod(ifacts[i - 1], invs[i]);
  299. }
  300. }
  301.  
  302. private:
  303.  
  304. #ifdef NTT64
  305. static poly mul_crt(int beg, int end) {
  306. using namespace ntt;
  307. auto inv = m64_2(m64_1::modulus()).inverse();
  308. auto mod1 = m64_1::modulus() % fast_mod;
  309. poly ret(end - beg);
  310. rep(i, ret.size()) {
  311. u64 r1 = f1[i + beg].get(), r2 = f2[i + beg].get();
  312. ret[i] = (r1 + (m64_2(r2 + m64_2::modulus() - r1) * inv).get() % fast_mod * mod1) % fast_mod;
  313. }
  314. return ret;
  315. }
  316.  
  317. static void mul2(const poly& f, const poly& g, bool cyclic=false) {
  318. using namespace ntt;
  319. if (&f == &g) {
  320. rep(i, f.size()) f1[i] = f[i];
  321. convolve(f1, f.size(), f1, f.size(), cyclic);
  322. rep(i, f.size()) f2[i] = f[i];
  323. convolve(f2, f.size(), f2, f.size(), cyclic);
  324. } else {
  325. rep(i, f.size()) f1[i] = f[i]; rep(i, g.size()) g1[i] = g[i];
  326. convolve(f1, f.size(), g1, g.size(), cyclic);
  327. rep(i, f.size()) f2[i] = f[i]; rep(i, g.size()) g2[i] = g[i];
  328. convolve(f2, f.size(), g2, g.size(), cyclic);
  329. }
  330. }
  331. #else
  332. static poly mul_crt(int beg, int end) {
  333. using namespace ntt;
  334. auto m1 = m32_1::modulus();
  335. auto m2 = m32_2::modulus();
  336. auto m3 = m32_3::modulus();
  337. auto m12 = u64(m1) * m2;
  338.  
  339. poly ret(end - beg);
  340. u32 m12m = m12 % mod;
  341. u32 inv1 = m32_2(m1).inverse().get();
  342. u32 inv12 = m32_3(m12 % m3).inverse().get();
  343.  
  344. rep(i, ret.size()) {
  345. u32 r1 = f1[i + beg].get(), r2 = f2[i + beg].get(), r3 = f3[i + beg].get();
  346. u64 r = r1 + u64(r2 + m2 - r1) * inv1 % m2 * m1;
  347. ret[i] = (r + u64(r3 + m3 - r % m3) * inv12 % m3 * m12m) % mod;
  348. }
  349. return ret;
  350. }
  351.  
  352. static void mul2(const poly& f, const poly& g, bool cyclic=false) {
  353. using namespace ntt;
  354. if (&f == &g) {
  355. rep(i, f.size()) f1[i] = f[i] % m32_1::modulus();
  356. convolve(f1, f.size(), f1, f.size(), cyclic);
  357. rep(i, f.size()) f2[i] = f[i] % m32_2::modulus();
  358. convolve(f2, f.size(), f2, f.size(), cyclic);
  359. rep(i, f.size()) f3[i] = f[i] % m32_3::modulus();
  360. convolve(f3, f.size(), f3, f.size(), cyclic);
  361. } else {
  362. rep(i, f.size()) f1[i] = f[i] % m32_1::modulus();
  363. rep(i, g.size()) g1[i] = g[i] % m32_1::modulus();
  364. convolve(f1, f.size(), g1, g.size(), cyclic);
  365. rep(i, f.size()) f2[i] = f[i] % m32_2::modulus();
  366. rep(i, g.size()) g2[i] = g[i] % m32_2::modulus();
  367. convolve(f2, f.size(), g2, g.size(), cyclic);
  368. rep(i, f.size()) f3[i] = f[i] % m32_3::modulus();
  369. rep(i, g.size()) g3[i] = g[i] % m32_3::modulus();
  370. convolve(f3, f.size(), g3, g.size(), cyclic);
  371. }
  372. }
  373. #endif
  374.  
  375. public:
  376. static void amul(const R* f, int s1, const R* g, int s2, R* res) {
  377. int s = s1 + s2 - 1;
  378. tmp64.assign(s, 0);
  379. rep(i, s2) if (g[i]) vec_add(tmp64.data() + i, s1, f, g[i]);
  380. rep(i, s) res[i] = tmp64[i] % fast_mod;
  381. }
  382.  
  383. poly mul_basecase(const poly& g) const {
  384. const auto& f = *this;
  385. int s = size() + g.size() - 1;
  386. poly ret(s);
  387. amul(f.data(), f.size(), g.data(), g.size(), ret.data());
  388. return ret;
  389. }
  390.  
  391. // 1.0 * M(n)
  392. poly mul(const poly& g) const {
  393. const auto& f = *this;
  394. if (f.size() == 0 || g.size() == 0) return poly();
  395. if (f.size() + g.size() <= ntt_threshold) {
  396. return f.mul_basecase(g);
  397. } else {
  398. mul2(f, g, false);
  399. return mul_crt(0, f.size() + g.size() - 1);
  400. }
  401. }
  402.  
  403. // 1.0 * M(n)
  404. poly middle_product(const poly& g) const {
  405. const poly& f = *this;
  406. if (f.size() == 0 || g.size() == 0) return poly();
  407. mul2(f, g, true);
  408. return mul_crt(f.size(), g.size());
  409. }
  410.  
  411. void print() const {
  412. printf("[");
  413. if (size()) {
  414. printf("%u", coefs[0]);
  415. rep(i, 1, size()) printf(", %u", coefs[i]);
  416. }
  417. puts("]");
  418. }
  419.  
  420. public:
  421. vector<R> coefs;
  422. static vector<R> tmp32;
  423. static vector<R64> tmp64;
  424. static vector<R> invs, facts, ifacts;
  425. static R mod;
  426. static R64 lmod;
  427. static fast_div fast_mod;
  428. };
  429. R poly::mod;
  430. R64 poly::lmod;
  431. poly::fast_div poly::fast_mod;
  432. vector<R> poly::tmp32;
  433. vector<R64> poly::tmp64;
  434. vector<R> poly::invs, poly::facts, poly::ifacts;
  435.  
  436. int pow_mod(int b, int e, int mod) {
  437. int ret = 1;
  438. for (; e; e >>= 1, b = i64(b) * b % mod) {
  439. if (e & 1) ret = i64(ret) * b % mod;
  440. }
  441. return ret;
  442. }
  443.  
  444. int binomial_sum_mod_p(int N, int K, int mod) {
  445. if (K == 0) return 1 % mod;
  446. if (N <= K) return pow_mod(2, N, mod);
  447. if (i64(K) * 2 > N) {
  448. return (pow_mod(2, N, mod) + i64(mod) - binomial_sum_mod_p(N, N - K - 1, mod)) % mod;
  449. }
  450. assert(N < mod);
  451.  
  452. const int sqrt_K = sqrt(K);
  453. poly::set_mod(mod, sqrt_K);
  454.  
  455. auto mod_invs = [&] (vector<int>& f) {
  456. int n = f.size();
  457. vector<int> ret(f);
  458. if (n > 0) {
  459. rep(i, 1, n) ret[i] = i64(ret[i - 1]) * ret[i] % mod;
  460. int inv = poly::mod_inv(ret[n - 1]);
  461. for (int i = n - 1; i > 0; --i) {
  462. ret[i] = i64(ret[i - 1]) * inv % mod;
  463. inv = i64(inv) * f[i] % mod;
  464. }
  465. ret[0] = inv;
  466. }
  467. return ret;
  468. };
  469.  
  470. auto conv = [&] (vector<int>& f) -> poly {
  471. int n = f.size();
  472. const auto& ifacts = poly::ifacts;
  473. auto g = poly(f);
  474. rep(i, n) {
  475. int d = i64(ifacts[i]) * ifacts[(n - 1) - i] % mod;
  476. if ((n - 1 - i) & 1) d = mod - d;
  477. g[i] = i64(g[i]) * d % mod;
  478. }
  479. return g;
  480. };
  481.  
  482. auto shift = [&] (const poly& cf, const poly& f, i64 dx) {
  483. if ((dx %= mod) < 0) dx += mod;
  484. const int n = f.size();
  485. const int a = i64(dx) * poly::mod_inv(sqrt_K) % mod;
  486.  
  487. auto g = poly(2 * n);
  488. rep(i, g.size()) g[i] = (i64(mod) + a + i - n) % mod;
  489. rep(i, g.size()) if (g[i] == 0) g[i] = 1;
  490.  
  491. g.coefs = mod_invs(g.coefs);
  492. auto ret = cf.middle_product(g);
  493. int prod = 1;
  494. rep(i, n) prod = i64(prod) * (i64(mod) + a + n - 1 - i) % mod;
  495. for (int i = n - 1; i >= 0; --i) {
  496. ret[i] = i64(ret[i]) * prod % mod;
  497. prod = i64(prod) * g[n + i] % mod * (i64(mod) + a + i - n) % mod;
  498. }
  499. if (dx % sqrt_K == 0) {
  500. int k = n - dx / sqrt_K;
  501. rep(i, k) ret[i] = f[n - k + i];
  502. }
  503. return ret.coefs;
  504. };
  505.  
  506. using Pair = pair< vector<int>, vector<int> >;
  507. function< Pair(int) > rec = [&] (int n) -> Pair {
  508. if (n == 1) {
  509. return Pair({N, N - sqrt_K}, {1, sqrt_K + 1});
  510. }
  511. int nh = n >> 1;
  512. auto res = rec(nh);
  513. auto& f11 = res.first, &g11 = res.second;
  514.  
  515. auto f = conv(f11), g = conv(g11);
  516. auto g12 = shift(g, g11, nh);
  517. auto g21 = shift(g, g11, i64(sqrt_K) * nh);
  518. auto g22 = shift(g, g11, i64(sqrt_K) * nh + nh);
  519.  
  520. auto f12 = shift(f, f11, N - nh * i64(sqrt_K + 2));
  521. auto f21 = shift(f, f11, i64(sqrt_K) * nh);
  522. auto f22 = shift(f, f11, N - i64(2) * nh * (sqrt_K + 1));
  523. rep(i, nh + 1) {
  524. g11[i] = (i64(g11[i]) * f12[nh - i] + i64(g12[i]) * f11[i]) % mod;
  525. }
  526. rep(i, 1, nh + 1) {
  527. g11.push_back( (i64(g21[i]) * f22[nh - i] + i64(g22[i]) * f21[i]) % mod );
  528. }
  529.  
  530. f12 = shift(f, f11, nh);
  531. f22 = shift(f, f11, i64(sqrt_K) * nh + nh);
  532.  
  533. rep(i, nh + 1) f11[i] = i64(f11[i]) * f12[i] % mod;
  534. rep(i, 1, nh + 1) f11.push_back(i64(f21[i]) * f22[i] % mod);
  535.  
  536. if (n & 1) {
  537. rep(i, n) {
  538. g11[i] = (i64(g11[i]) + f11[i]) * (n + i64(i) * sqrt_K) % mod;
  539. }
  540. rep(i, n) {
  541. f11[i] = i64(f11[i]) * (i64(N) + mod - sqrt_K * i - n + 1) % mod;
  542. }
  543. vector<int> vals(n);
  544. rep(i, n) vals[i] = (i64(sqrt_K) * n + i + 1) % mod;
  545. if (i64(sqrt_K + 1) * n < mod) {
  546. int prod = 1;
  547. rep(i, n) prod = i64(prod) * vals[i] % mod;
  548. auto invs = mod_invs(vals);
  549. i64 s = 0;
  550. rep(i, n) {
  551. s += prod;
  552. prod = i64(prod) * invs[i] % mod * (i64(N) + mod - i64(sqrt_K) * n - i) % mod;
  553. }
  554. g11.push_back(s % mod);
  555. f11.push_back(prod);
  556. } else {
  557. g11.push_back(0);
  558. f11.push_back(0);
  559. }
  560. }
  561. return {f11, g11};
  562. };
  563.  
  564. auto res = rec(sqrt_K);
  565. auto &f1 = res.first, &g1 = res.second;
  566. auto f2 = shift(conv(f1), f1, N - i64(sqrt_K) * (sqrt_K + 1));
  567. reverse(f2.begin(), f2.end());
  568. f2.resize(f2.size() - 1);
  569. f2 = mod_invs(f2);
  570.  
  571. i64 ret = 0;
  572. rep(i, sqrt_K) {
  573. ret = (ret * f1[sqrt_K - 1 - i] + g1[sqrt_K - 1 - i]) % mod;
  574. ret = ret * f2[sqrt_K - 1 - i] % mod;
  575. }
  576.  
  577. int prod = 1;
  578. rep(i, sqrt_K) {
  579. prod = i64(prod) * f1[i] % mod * f2[i] % mod;
  580. }
  581.  
  582. const int rest = max(0, K - sqrt_K * sqrt_K);
  583. ret += prod;
  584. vector<int> invs(rest);
  585. rep(i, rest) invs[i] = i + 1 + sqrt_K * sqrt_K;
  586. invs = mod_invs(invs);
  587. rep(i, rest) {
  588. prod = i64(prod) * (N - sqrt_K * sqrt_K - i) % mod * invs[i] % mod;
  589. ret += prod;
  590. }
  591. ret %= mod;
  592. return ret;
  593. }
  594.  
  595. void solve() {
  596. const u32 p = u32(-1) >> 1;
  597. printf("%d\n", binomial_sum_mod_p(2e9, 1e9, p));
  598. }
  599.  
  600. int main() {
  601. clock_t beg = clock();
  602. solve();
  603. clock_t end = clock();
  604. fprintf(stderr, "%.3f sec\n", double(end - beg) / CLOCKS_PER_SEC);
  605. return 0;
  606. }
  607.  
Success #stdin #stdout #stderr 0.12s 19552KB
stdin
Standard input is empty
stdout
1236371654
stderr
0.123 sec