fork download
  1. #pragma GCC optimize("Ofast")
  2. #pragma GCC target("sse,sse2,sse3,ssse3,sse4")
  3. #include <bits/stdc++.h>
  4.  
  5. using namespace std;
  6. #define link alflak
  7. #define fpos ladla
  8. #define all(x) begin(x), end(x)
  9. #define rall(x) x.rbegin(), x.rend()
  10.  
  11.  
  12. /*
  13.  * Our first step is to note that the standard Radix-2 FFT has the problem that
  14.  * its inverse transform requires division by 2, which is not invertible in R.
  15.  *
  16.  * We solve this by employing a Radix-3 FFT, which handles arrays whose size is
  17.  * a power of 3, and whose inverse transform requires division by 3.
  18.  *
  19.  * Now for FFT to work, the ring has to have a sufficiently powerful 3^m'th
  20.  * root of unity, and since the unit ring of R is Z_2 x Z_{2^62}, it only has
  21.  * roots of unity of order 2^m.
  22.  *
  23.  * The first step to solving this is by expanding the ring to have a 3rd root
  24.  * of unity. This extension can be realized as the ring R[omega]/(omega^2 +
  25.  * omega + 1), that is polynomials of the form a + b*omega, with the property
  26.  * that omega^2 = - omega - 1. It follows that omega^3 = 1.
  27.  *
  28.  * We call this new ring T and define the following type for its elements.
  29.  */
  30.  
  31. struct T {
  32. uint64_t a, b;
  33.  
  34. T() : a(0), b(0) { }
  35. T(uint64_t x) : a(x), b(0) { }
  36. T(uint64_t a, uint64_t b) : a(a), b(b) { }
  37.  
  38. //The conjugate of a + b*omega is given by mapping omega -> omega^2
  39. T conj() {
  40. return T{a - b, -b};
  41. }
  42.  
  43. T operator-() {
  44. return T{-a, -b};
  45. }
  46. };
  47.  
  48. /*
  49.  * A couple of useful constants: `OMEGA` is a third root of unity, `OMEGA2` is
  50.  * its square and `INV3` is the multiplicative inverse of 3.
  51.  */
  52.  
  53. const T OMEGA = {0, 1};
  54. const T OMEGA2 = {-1ull, -1ull};
  55. const T INV3 = {12297829382473034411ull, 0};
  56.  
  57. /*
  58.  * Standard operators.
  59.  */
  60.  
  61. T operator+(const T &u, const T &v) {
  62. return {u.a + v.a, u.b + v.b};
  63. }
  64.  
  65. T operator-(const T &u, const T &v) {
  66. return {u.a - v.a, u.b - v.b};
  67. }
  68.  
  69. T operator*(const T &u, const T &v) {
  70. return {u.a*v.a - u.b*v.b, u.b*v.a + u.a*v.b - u.b*v.b};
  71. }
  72.  
  73. void operator+=(T &u, const T &v) {
  74. u.a += v.a;
  75. u.b += v.b;
  76. }
  77.  
  78. void operator-=(T &u, const T &v) {
  79. u.a -= v.a;
  80. u.b -= v.b;
  81. }
  82.  
  83. void operator*=(T &u, const T &v) {
  84. uint64_t tmp=u.a;
  85. u.a=u.a*v.a - u.b*v.b;
  86. u.b=u.b*v.a + tmp*v.b - u.b*v.b;
  87. }
  88.  
  89. // We pack the main algorithm in a class of its own, mainly because we will have
  90. // to allocate some temporary working memory.
  91. class Conv64 {
  92. public:
  93.  
  94. // Returns the product of two polynomials from the ring R[x].
  95. vector<int64_t> multiply(const vector<int64_t> &p,
  96. const vector<int64_t> &q) {
  97. vector<uint64_t> pp(p.size()), qq(q.size());
  98. for (uint64_t i = 0; i < p.size(); ++i) {
  99. pp[i] = p[i];
  100. }
  101. for (uint64_t i = 0; i < q.size(); ++i) {
  102. qq[i] = q[i];
  103. }
  104. uint64_t s = 1;
  105. while (s < p.size() + q.size() - 1) {
  106. s *= 3;
  107. }
  108. pp.resize(s);
  109. qq.resize(s);
  110. vector<int64_t> res(s);
  111. multiply_cyclic_raw(pp.data(), qq.data(), pp.size(), (uint64_t*)res.data());
  112. res.resize(p.size() + q.size() - 1);
  113. return res;
  114. }
  115.  
  116. private:
  117.  
  118. // Temporary space.
  119. T *tmp;
  120.  
  121. // Returns the product of a polynomial and the monomial x^t in the ring
  122. // T[x]/(x^m - omega). The result is placed in `to`.
  123. // NOTE: t must be in the range [0,3m]
  124. void twiddle(T *p, uint64_t m, uint64_t t, T *to) {
  125. if (t == 0 || t == 3*m) {
  126. for (uint64_t j = 0; j < m; ++j) {
  127. to[j] = p[j];
  128. }
  129. return;
  130. }
  131. uint64_t tt;
  132. T mult = 1;
  133. if (t < m) {
  134. tt = t;
  135. } else if (t < 2*m) {
  136. tt = t - m;
  137. mult = OMEGA;
  138. } else {
  139. tt = t - 2*m;
  140. mult = OMEGA2;
  141. }
  142. for (uint64_t j=0;j<tt;j++) {
  143. to[j] = p[m - tt + j]*OMEGA*mult;
  144. }
  145. for (uint64_t j=tt;j<m;j++) {
  146. to[j] = p[j-tt]*mult;
  147. }
  148. }
  149.  
  150. // A "Decimation In Frequency" In-Place Radix-3 FFT Routine.
  151. // Input: A polynomial from (T[x]/(x^m - omega))[y]/(y^r - 1).
  152. // Output: Its Fourier transform (w.r.t. y) in 3-reversed order.
  153. void fftdif(T *p, uint64_t m, uint64_t r) {
  154. if (r == 1) return;
  155. uint64_t rr = r/3;
  156. uint64_t pos1 = m*rr, pos2 = 2*m*rr;
  157. for (uint64_t i = 0; i < rr; ++i) {
  158. for (uint64_t j = 0; j < m; ++j) {
  159. tmp[j] = p[i*m + j] + p[pos1 + i*m + j] + p[pos2 + i*m + j];
  160. tmp[m + j] = p[i*m + j] + OMEGA*p[pos1 + i*m + j] + OMEGA2*p[pos2 + i*m + j];
  161. tmp[2*m + j] = p[i*m + j] + OMEGA2*p[pos1 + i*m + j] + OMEGA*p[pos2 + i*m + j];
  162. p[i*m + j] = tmp[j];
  163. }
  164. twiddle(tmp + m, m, 3*i*m/r, p + pos1 + i*m);
  165. twiddle(tmp + 2*m, m, 6*i*m/r, p + pos2 + i*m);
  166. }
  167. fftdif(p, m, rr);
  168. fftdif(p + pos1, m, rr);
  169. fftdif(p + pos2, m, rr);
  170. }
  171.  
  172. // A "Decimation In Time" In-Place Radix-3 Inverse FFT Routine.
  173. // Input: A polynomial in (T[x]/(x^m - omega))[y]/(y^r - 1) with coefficients
  174. // in 3-reversed order.
  175. // Output: Its inverse Fourier transform in normal order.
  176. void fftdit(T *p, uint64_t m, uint64_t r) {
  177. if (r == 1) return;
  178. uint64_t rr = r/3;
  179. uint64_t pos1 = m*rr, pos2 = 2*m*rr;
  180. fftdit(p, m, rr);
  181. fftdit(p + pos1, m, rr);
  182. fftdit(p + pos2, m, rr);
  183. for (uint64_t i = 0; i < rr; ++i) {
  184. twiddle(p + pos1 + i*m, m, 3*m - 3*i*m/r, tmp + m);
  185. twiddle(p + pos2 + i*m, m, 3*m - 6*i*m/r, tmp + 2*m);
  186. for(uint64_t j = 0; j < m; ++j) {
  187. tmp[j] = p[i*m + j];
  188. p[i*m + j] = tmp[j] + tmp[m + j] + tmp[2*m + j];
  189. p[i*m + pos1 + j] = tmp[j] + OMEGA2*tmp[m + j] + OMEGA*tmp[2*m + j];
  190. p[i*m + pos2 + j] = tmp[j] + OMEGA*tmp[m + j] + OMEGA2*tmp[2*m + j];
  191. }
  192. }
  193. }
  194.  
  195. // Computes the product of two polynomials in T[x]/(x^n - omega), where n is
  196. // a power of 3. The result is placed in `to`.
  197. void mul(T *p, T *q, uint64_t n, T *to) {
  198. if (n <= 27) {
  199. // O(n^2) grade-school multiplication
  200. for (uint64_t i = 0; i < n; ++i) {
  201. to[i]=0;
  202. }
  203. for (uint64_t i = 0; i < n; ++i) {
  204. for (uint64_t j = 0; j < n - i; ++j) {
  205. to[i + j] += p[i]*q[j];
  206. }
  207. for (uint64_t j = n - i; j < n; ++j) {
  208. to[i + j - n] += p[i]*q[j]*OMEGA;
  209. }
  210. }
  211. return;
  212. }
  213.  
  214. uint64_t m = 1;
  215. while (m*m < n) {
  216. m *= 3;
  217. }
  218. uint64_t r = n/m;
  219.  
  220. T inv = 1;
  221. for (uint64_t i = 1; i < r; i *= 3) {
  222. inv *= INV3;
  223. }
  224.  
  225. /**********************************************************
  226.   * THE PRODUCT IN (T[x]/(x^m - omega))[y] / (y^r - omega) *
  227.   **********************************************************/
  228.  
  229. // Move to the ring (T[x]/(x^m - omega))[y]/(y^r - 1) via the map y -> x^(m/r) y
  230. for (uint64_t i = 0; i < r; ++i) {
  231. twiddle(p + m*i, m, m/r*i, to + m*i);
  232. twiddle(q + m*i, m, m/r*i, to + n + m*i);
  233. }
  234.  
  235. // Multiply using FFT
  236. fftdif(to, m, r);
  237. fftdif(to + n, m, r);
  238. for (uint64_t i = 0; i < r; ++i) {
  239. mul(to + m*i, to + n + m*i, m, to +2*n + m*i);
  240. }
  241. fftdit(to + 2*n, m, r);
  242. for (uint64_t i = 0; i < n; ++i) {
  243. to[2*n + i] *= inv;
  244. }
  245.  
  246. // Return to the ring (T[x]/(x^m - omega))[y]/(y^r - omega)
  247. for (uint64_t i = 0; i < r; ++i) {
  248. twiddle(to + 2*n + m*i, m, 3*m - m/r*i, to + n + m*i);
  249. }
  250.  
  251. /************************************************************
  252.   * THE PRODUCT IN (T[x]/(x^m - omega^2))[y] / (y^r - omega) *
  253.   ************************************************************/
  254.  
  255. // Use conjugation to move to the ring (T[x]/(x^m - omega))[y]/(y^r - omega^2).
  256. // Then move to (T[x]/(x^m - omega))[y]/(y^r - 1) via the map y -> x^(2m/r) y
  257. for (uint64_t i = 0; i < r; ++i) {
  258. for (uint64_t j = 0; j < m; ++j) {
  259. p[m*i + j] = p[m*i + j].conj();
  260. q[m*i + j] = q[m*i + j].conj();
  261. }
  262. twiddle(p + m*i, m, 2*m/r*i, to + m*i);
  263. twiddle(q + m*i, m, 2*m/r*i, p + m*i);
  264. }
  265.  
  266. fftdif(to, m, r);
  267. fftdif(p, m, r);
  268. for (uint64_t i = 0; i < r; ++i) {
  269. mul(to + m*i, p + m*i, m, to + 2*n + m*i);
  270. }
  271. fftdit(to + 2*n, m, r);
  272. for (uint64_t i = 0; i < n; ++i) {
  273. to[2*n + i] *= inv;
  274. }
  275.  
  276. for (uint64_t i = 0; i < r; ++i) {
  277. twiddle(to + 2*n + m*i, m, 3*m - 2*m/r*i, q + m*i);
  278. }
  279.  
  280. /**************************************************************************
  281.   * The product in (T[x]/(x^(2m) + x^m + 1))[y]/(y^r - omega) via CRT, and *
  282.   * unravelling the substitution y = x^m at the same time. *
  283.   **************************************************************************/
  284.  
  285. for (uint64_t i = 0; i < n; ++i) {
  286. to[i] = 0;
  287. }
  288. for (uint64_t i = 0; i < r; ++i) {
  289. for (uint64_t j = 0; j < m; ++j) {
  290. to[i*m + j] += (1 - OMEGA)*to[n + i*m + j] + (1 - OMEGA2)*q[i*m + j].conj();
  291. if (i*m + m + j < n) {
  292. to[i*m + m + j] += (OMEGA2 - OMEGA)*(to[n + i*m + j] - q[i*m + j].conj());
  293. } else {
  294. to[i*m + m + j - n] += (1 - OMEGA2)*(to[n + i*m + j] - q[i*m + j].conj());
  295. }
  296. }
  297. }
  298. for (uint64_t i = 0; i < n; ++i) {
  299. to[i] *= INV3;
  300. }
  301. }
  302.  
  303. // Computes the product of two polynomials from the ring R[x]/(x^n - 1), where
  304. // n must be a power of three. The result is placed in target which must have
  305. // space for n elements.
  306. void multiply_cyclic_raw(uint64_t *p, uint64_t *q, uint64_t n,
  307. uint64_t *target) {
  308. // If n = 3^k, let m = 3^(floor(k/2)) and r = 3^(ceil(k/2))
  309. uint64_t m = 1;
  310. while (m*m <= n) {
  311. m *= 3;
  312. }
  313. m /= 3;
  314. uint64_t r = n/m;
  315.  
  316. // Compute 3^(-r)
  317. T inv = 1;
  318. for (uint64_t i = 1; i < r; i *= 3) {
  319. inv *= INV3;
  320. }
  321.  
  322. // Allocate some working memory, the layout is as follows:
  323. // pp: length n
  324. // qq: length n
  325. // to: length n + 3*m
  326. // tmp: length 3*m
  327. T *buf = new T[3*n + 6*m];
  328. T *pp = buf;
  329. T *qq = buf + n;
  330. T *to = buf + 2*n;
  331. tmp = buf + 3*n + 3*m;
  332.  
  333. for (uint64_t i = 0; i < n; ++i) {
  334. pp[i] = p[i];
  335. qq[i] = q[i];
  336. }
  337.  
  338. // By setting y = x^m, we may write our polynomials in the form
  339. // (p_0 + p_1 x + ... + p_{m-1} x^{m-1})
  340. // + (p_m + ... + p_{2m-1} x^{m-1}) y
  341. // + ...
  342. // + (p_{(r-1)m} + ... + p_{rm - 1} x^{m-1}) y^r
  343. //
  344. // In this way we can view p and q as elements of the ring S[y]/(y^r - 1),
  345. // where S = R[x]/(x^m - omega), and since r <= 3m, we know that x^{3m/r} is
  346. // an rth root of unity. We can therefore use FFT to calculate the product
  347. // in S[y]/(y^r - 1).
  348. fftdif(pp, m, r);
  349. fftdif(qq, m, r);
  350. for (uint64_t i = 0; i < r; ++i) {
  351. mul(pp + i*m, qq + i*m, m, to + i*m);
  352. }
  353. fftdit(to, m, r);
  354. for (uint64_t i = 0; i<n; ++i) {
  355. pp[i] = to[i]*inv;
  356. }
  357.  
  358. // Now, the product in (T[x]/(x^m - omega^2))[y](y^r - 1) is simply the
  359. // conjugate of the product in (T[x]/(x^m - omega))[y]/(y^r - 1), because
  360. // there is no omega-component in the data.
  361. //
  362. // By the Chinese Remainder Theorem we can obtain the product in the
  363. // ring (T[x]/(x^(2m) + x^m + x))[y]/(y^r - 1), and then set y=x^m to get
  364. // the result.
  365. for (uint64_t i = 0; i < n; ++i) to[i] = 0;
  366. for (uint64_t i = 0; i < r; ++i) {
  367. for (uint64_t j = 0; j < m; ++j) {
  368. to[i*m + j] += (1 - OMEGA)*pp[i*m + j] + (1 - OMEGA2)*pp[i*m + j].conj();
  369. if (i*m + m + j < n) {
  370. to[i*m + m + j] += (OMEGA2 - OMEGA)*(pp[i*m + j] - pp[i*m + j].conj());
  371. } else {
  372. to[i*m + m + j - n] += (OMEGA2 - OMEGA) * (pp[i*m + j] - pp[i*m + j].conj());
  373. }
  374. }
  375. }
  376. for (uint64_t i = 0; i < n; ++i) {
  377. target[i] = (to[i]*INV3).a;
  378. }
  379.  
  380. delete[] buf;
  381. }
  382. };
  383.  
  384.  
  385.  
  386. template<typename T>
  387. inline T sqr(T x) {
  388. return x * x;
  389. }
  390.  
  391. inline int normalize(int x, int m) {
  392. return x < 0 ? x + m : x >= m ? x - m : x;
  393. }
  394. inline int mul(int a, int b, int m) {
  395. return int64_t(a) * b % m;
  396. }
  397. inline int add(int a, int b, int m) {
  398. return normalize(a + b, m);
  399. }
  400. inline int sub(int a, int b, int m) {
  401. return normalize(a - b, m);
  402. }
  403. inline int bpow(int x, int n, int m) {
  404. return n ? (n & 1) ? mul(x, bpow(x, n - 1, m), m) : bpow(mul(x, x, m), n >> 1, m) : 1;
  405. }
  406. inline int inv(int x, int m) {
  407. return bpow(x, m - 2, m);
  408. }
  409. const int mod = 2 * 491 * 499 + 1;
  410.  
  411. vector<int64_t> mul(vector<int64_t> a, vector<int64_t> b, int m) {
  412. Conv64 c;
  413. auto res = c.multiply(a, b);
  414. for(auto &it: res) {
  415. it %= m;
  416. }
  417. return res;
  418. }
  419.  
  420. int PW[mod], IPW[mod];
  421.  
  422. vector<int64_t> u(mod);
  423. vector<int64_t> v(2 * mod - 1);
  424. vector<int64_t> chirpz(vector<int> p, int root, int m, bool rev = 0) {
  425. for(auto &it: p) {
  426. it = normalize(it, m);
  427. }
  428. int n = p.size();
  429. if(rev) {
  430. for(int i = 0; i < n; i++) {
  431. u[i] = u[i] * PW[i] % mod;
  432. }
  433. } else {
  434. for(int i = 0; i < n; i++) {
  435. int t = PW[1LL * i * i % (m - 1)];
  436. u[i] = 1LL * p[i] * t % m;
  437. v[(n - 1) - i] = v[(n - 1) + i] = IPW[1LL * i * i % (m - 1)];
  438. }
  439. }
  440. auto w = mul(u, v, m);
  441. vector<int64_t> ans(n);
  442. for(int i = 0; i < n; i++) {
  443. ans[i] = mul(bpow(root, mul(i, i, m - 1), m), w[(n - 1) + i], m);
  444. }
  445. return ans;
  446. }
  447.  
  448. const int phi = mod - 1;
  449.  
  450. void fail() {
  451. cout << "NO" << endl;
  452. exit(0);
  453. }
  454.  
  455. bool check(int g) {
  456. return !(bpow(g, 2 * 491, phi) == 1 || bpow(g, 2 * 499, phi) == 1 || bpow(g, 491 * 499, phi) == 1);
  457. }
  458.  
  459. signed main() {
  460. //freopen("input.txt", "r", stdin);
  461. ios::sync_with_stdio(0);
  462. cin.tie(0);
  463. int i2 = inv(2, mod);
  464. PW[0] = IPW[0] = 1;
  465. for(int i = 1; i < mod; i++) {
  466. PW[i] = 1LL * PW[i - 1] * 2 % mod;
  467. IPW[i] = 1LL * IPW[i - 1] * i2 % mod;
  468. }
  469.  
  470. int n, m, c;
  471. cin >> n >> m >> c;
  472. int a[n], b[m];
  473. for(int i = 0; i < n; i++) {
  474. cin >> a[i];
  475. }
  476. vector<int> p(mod - 1);
  477. for(int j = 0; j < m; j++) {
  478. cin >> b[j];
  479. p[1LL * j * j * j % phi] += b[j];
  480. p[1LL * j * j * j % phi] %= mod;
  481. }
  482. auto np = p;
  483. for(int j = phi / 2; j < phi; j++) {
  484. p[j - phi / 2] += p[j];
  485. p[j - phi / 2] %= mod;
  486. }
  487. p.resize(phi / 2);
  488. int root = 2;
  489. vector<int64_t> eval[2];
  490. eval[0] = chirpz(p, root, mod);
  491. p = np;
  492. for(int i = 0; i < phi; i++) {
  493. p[i] = 1LL * p[i] * PW[i] % mod;
  494. }
  495. for(int j = phi / 2; j < phi; j++) {
  496. p[j - phi / 2] += p[j];
  497. p[j - phi / 2] %= mod;
  498. }
  499. p.resize(phi / 2);
  500. eval[1] = chirpz(p, root, mod);
  501. int rv[mod];
  502. for(int i = 0; i < phi; i++) {
  503. rv[PW[i]] = i;
  504. }
  505. for(int i = 0; i < 0; i++) {
  506. int x = bpow(2, i, mod);
  507. int cur = 0;
  508. for(int j = 0; j < mod; j++) {
  509. cur += 1LL * np[j] * bpow(x, j, mod) % mod;
  510. cur %= mod;
  511. }
  512. assert(cur == eval[i % 2][i / 2]);
  513. }
  514. int ans = 0;
  515. for(int i = 0; i < n; i++) {
  516. int x = bpow(c, 1LL * i * i % phi, mod);
  517. int t = rv[x];
  518. ans += 1LL * a[i] * eval[t % 2][t / 2] % mod;
  519. ans %= mod;
  520. }
  521. cout << ans << endl;
  522. return 0;
  523. }
  524.  
Success #stdin #stdout 1.89s 25896KB
stdin
3 4 1
1 1 1
1 1 1 1
stdout
12