fork download
  1. #include <bits/stdc++.h>
  2. using namespace std;
  3.  
  4. using ull = uint64_t;
  5. using ll = int64_t;
  6.  
  7. template<typename complex_t>
  8. class Fft{
  9. static constexpr double PI(){return 3.1415926535897932384626;};
  10. static constexpr int SPLIT_BITS = 15;
  11. using cvec = std::vector<complex_t>;
  12. using ivec = std::vector<int64_t>;
  13.  
  14. static int lg(int x){
  15. return __builtin_clz(1) - __builtin_clz(x);
  16. }
  17. static int next_2pow(int x){
  18. return x>1 ? 1<<lg(2*x-1): 1;
  19. }
  20.  
  21. static std::vector<complex_t> roots;
  22. static void gen_roots(int const&depth){
  23. if(roots.size() < (1u<<depth)){
  24. if(roots.empty()){
  25. roots.emplace_back(std::polar(0.0, 0.0));
  26. roots.emplace_back(std::polar(1.0, 0.0));
  27. }
  28. int cur_d = lg(roots.size())-1;
  29. roots.resize(1<<(depth+1));
  30. for(int i = cur_d;i<depth;++i){
  31. int n = 1 << i;
  32. complex_t ome = std::polar(1.0, PI()/2.0/n);
  33. for(int j=n;j<2*n;++j){
  34. roots[2*j] = roots[j];
  35. roots[2*j+1] = roots[j]*ome;
  36. }
  37. }
  38. }
  39. }
  40. static void bitflip(cvec &v){
  41. int n = v.size();
  42. for(int i=1, j=0;i<n;++i){
  43. int m = n >> 1;
  44. for(;j>=m;m >>= 1)
  45. j -= m;
  46. j += m;
  47. if(i < j) std::swap(v[i], v[j]);
  48. }
  49. }
  50. template<bool is_rev>
  51. static void fft_regular(cvec& vec){
  52. bitflip(vec);
  53. int n = vec.size();
  54. gen_roots(lg(n));
  55. for(int iter=1, sh=0;iter<n;iter*=2, ++sh){
  56. for(int x=0;x<n;x+=2*iter){
  57. for(int y=0;y<iter;++y){
  58. complex_t ome = roots[(1 << sh) + y];
  59. if (is_rev) ome = conj(ome);
  60. complex_t v = vec[x+y], w=vec[x+y+iter];
  61. vec[x+y] = v+ome*w;
  62. vec[x+y+iter] = v-ome*w;
  63. }
  64. }
  65. }
  66. }
  67. template<bool is_rev>
  68. static void fft_inplace(cvec&v){
  69. fft_regular<is_rev>(v);
  70. }
  71. public:
  72. static void three_mul(ivec &vout, ivec const&v1, ivec const& v2, const size_t mod = 0){
  73. constexpr int BITS = 17, MASK = (1<<BITS) - 1;
  74. const size_t out_size = v1.size() + v2.size() - 1;
  75. const size_t n = next_2pow(out_size);
  76.  
  77. cvec c1(n, complex_t(0.0, 0.0)), c2(n, complex_t(0.0, 0.0)), c3(n, complex_t(0.0, 0.0));
  78. for(size_t i=0;i<v1.size();++i){
  79. int64_t a = v1[i] >> (2*BITS), b = (v1[i] >> BITS) & MASK, c = v1[i] & MASK;
  80. if(MASK-c < c){
  81. c-= 1<<BITS;
  82. ++b;
  83. }
  84. if(MASK-b < b){
  85. b-= 1<<BITS;
  86. ++a;
  87. }
  88. c1[i].real(a);
  89. c1[i].imag(b);
  90. c2[i].real(c);
  91. }
  92. for(size_t i=0;i<v2.size();++i){
  93. int64_t a = v2[i] >> (2*BITS), b = (v2[i] >> BITS) & MASK, c = v2[i] & MASK;
  94. if(MASK-c < c){
  95. c-= 1<<BITS;
  96. ++b;
  97. }
  98. if(MASK-b < b){
  99. b-= 1<<BITS;
  100. ++a;
  101. }
  102. c2[i].imag(a);
  103. c3[i].real(b);
  104. c3[i].imag(c);
  105. }
  106. fft_inplace<false>(c1);
  107. fft_inplace<false>(c2);
  108. fft_inplace<false>(c3);
  109.  
  110. const double factor = 0.25 / static_cast<double>(c1.size());
  111. const complex_t IMAG (0.0, 1.0);
  112. const complex_t NIMAG = conj(IMAG);
  113.  
  114. for(int i=0;i<=n/2;++i){
  115. int j = (n-i)&(n-1); //reverse index
  116. complex_t rx = (c1[i] + conj(c1[j]));
  117. complex_t ix = (c1[i] - conj(c1[j])) * NIMAG;
  118. complex_t ry = (c2[i] + conj(c2[j]));
  119. complex_t iy = (c2[i] - conj(c2[j])) * NIMAG;
  120. complex_t rz = (c3[i] + conj(c3[j]));
  121. complex_t iz = (c3[i] - conj(c3[j])) * NIMAG;
  122. //cerr << rx << " " << ix << " " << ry << "\n";
  123. c1[i] = (ry*iz + (ix*iz + ry*rz) * IMAG) * factor;
  124. c2[i] = (rx*iz + ix*rz + ry*iy) * factor;
  125. c1[j] = conj(ry*iz + (ix*iz + ry*rz) * NIMAG) * factor;
  126. c2[j] = conj(rx*iz + ix*rz + ry*iy) * factor;
  127. }
  128. fft_inplace<true>(c1);
  129. fft_inplace<true>(c2);
  130. vout.resize(out_size);
  131. //cerr << c1[0] << " " << c2[0] << "\n";
  132. for(int i=0;i<out_size;++i){
  133. uint64_t a = llround(c1[i].real()), b = llround(c1[i].imag()), c = llround(c2[i].real());
  134. vout[i] = (a + (b<<BITS) + (c<<(2*BITS))) % mod;
  135. }
  136.  
  137. }
  138. };
  139. template<typename complex_t>
  140. std::vector<complex_t> Fft<complex_t>::roots;
  141.  
  142. using fft = Fft<std::complex<double>>;
  143.  
  144.  
  145. template<int64_t mod>
  146. struct NT64 {
  147. static_assert(mod >= 0, "mod should be non-negative.");
  148. static int64_t add(int64_t const&a, int64_t const&b){
  149. int64_t ret = a+b;
  150. if(ret>=mod) ret-=mod;
  151. return ret;
  152. }
  153. static int64_t& xadd(int64_t& a, int64_t const&b){
  154. a+=b;
  155. if(a>=mod) a-=mod;
  156. return a;
  157. }
  158. static int64_t sub(int64_t const&a, int64_t const&b){
  159. return add(a, mod-b);
  160. }
  161. static int64_t& xsub(int64_t& a, int64_t const&b){
  162. return xadd(a, mod-b);
  163. }
  164. static int64_t mul(int64_t const&a, int64_t const&b){
  165. return a*static_cast<int64_t>(b)%mod;
  166. }
  167. static int64_t& xmul(int64_t &a, int64_t const&b){
  168. return a=mul(a, b);
  169. }
  170. static int64_t inv_rec(int64_t const&a, int64_t const&m){
  171. assert(a!=0);
  172. if(a==1) return 1;
  173. int64_t ret = m+(1-inv_rec(m%a, a)*static_cast<int64_t>(m))/a;
  174. return ret;
  175. }
  176. // this is soooo great, can even be used for a sieve
  177. static int64_t inv_rec_2(int64_t const&a, int64_t const&m){
  178. assert(a!=0);
  179. if(a==1) return 1;
  180. int64_t ret = m-mul((m/a), inv_rec_2(m%a, m));
  181. return ret;
  182. }
  183. static int64_t inv(int64_t const&a){
  184. return inv_rec(a, mod);
  185. }
  186. static int64_t div(int64_t const&a, int64_t const&b){
  187. return mul(a, inv(b));
  188. }
  189. };
  190.  
  191.  
  192. using nt = NT64<1ll<<32>;
  193.  
  194.  
  195.  
  196. const int max_log = 32 + 18;
  197. const ll MODMOD = 1ll << max_log;
  198.  
  199.  
  200. int lg(int x){
  201. return __builtin_clz(1) - __builtin_clz(x);
  202. }
  203.  
  204.  
  205. vector<unsigned int> facts, ifacts, v2;
  206.  
  207. void precalc_facts(int lim){
  208. facts.resize(lim, 1);
  209. ifacts.resize(lim, 1);
  210. v2.resize(lim, 0);
  211. for(int i=1;i<lim;++i){
  212. const int j = i >> __builtin_ctz(i);
  213. v2[i] = v2[i-1] + __builtin_ctz(i);
  214. facts[i] = facts[i-1] * j;
  215. }
  216. ifacts.back() = nt::inv(facts.back());
  217. for(int i=lim-1;i>0;--i){
  218. const int j = i >> __builtin_ctz(i);
  219. ifacts[i-1] = ifacts[i] * j;
  220. }
  221. }
  222.  
  223.  
  224. void twofy(vector<ll> &v){
  225. for(size_t i=0;i<v.size();++i){
  226. v[i] *= 1ll<<(i-v2[i]);
  227. v[i] *= ifacts[i];
  228. v[i] %= MODMOD;
  229. }
  230. }
  231.  
  232.  
  233. void vec_mul(vector<ll> &c, vector<ll> const&a, vector<ll> const&b){
  234. const int bits = 25;
  235. fft::three_mul(c, a, b, MODMOD);
  236. }
  237.  
  238. signed main()
  239. {
  240. cin.tie(0); cout.tie(0); ios_base::sync_with_stdio(false);
  241. precalc_facts(2e5+10);
  242. int n;
  243. cin >> n;
  244. ++n;
  245. const int m = lg(n)+1;
  246. vector<ll> a(n), b(n), c;
  247. for(int i=0;i<n;++i)
  248. cin >> a[i];
  249. for(int i=0;i<n;++i)
  250. cin >> b[i];
  251. twofy(a);
  252. twofy(b);
  253. vec_mul(c, a, b);
  254. for(int i=0;i<n;++i){
  255. ll val = c[i];
  256. val >>= (i-v2[i]);
  257. uint32_t out = val;
  258. out*= facts[i];
  259. cout << out << " ";
  260. }
  261. cout << "\n";
  262. return 0;
  263. }
  264.  
Success #stdin #stdout 0s 15248KB
stdin
5
0 1 2 3 4 5
6 7 8 9 10 11
stdout
0 6 26 84 240 640