//Annotated Version
#include<bits/stdc++.h>
using namespace std;
#define PI (atan(1)*4)
#define double long double
#define PRECISION 0.000000001
complex<double>xx,yy;
vector<int>seq=vector<int>{0};//butterfly sequence transform container
//generate the sequence(butterfly transform?) required to do iterative fft
//butterfly transform essentially takes care of all the odd/even ordering in the fft
//the ordering needs the resulting indices differences to be n/2k for the difference of two adjacent blocks of lengths k=2^a
//so instead of thinking of it as sorting as reverse(~x) or whatever the "correct" formula is
//we can recursively generate it, where each time we multiply the first half of the sequence by 2(since length of sequence is doubled, n/2k doubles -> differences also double)
//then the second half is just the first half shifted by one, following the (2n)/(2*(n/2))=1 formula
//note that given a sequence of length n and we need it for a polynomial of length n/2^k,
//we can just divide the first n/2^m terms by 2^m, dividing all the differences to correctly mesh together(n/2k/2^m=(n/2^m)/2k)
//we also know that the first n/2^m terms are divisble by 2^m, due to having sorted in the first half at least m times, gaining a factor of 2 each time it got sorted into the first half
//this ends up being O(2*n) time, which i think is just as fast as the other methods but remains much more intuitive and cleaner
//additionally you only really have to generate it "once"
void seqgen(int n){
if(seq.size()<n){seq.reserve(n);}
while(seq.size()<n){
//we double the sequence length in each iteration
int cur=seq.size();
for(register int a=0;a<cur;++a){
seq[a]*=2;//add the factor of two to the first number as described earlier
seq.push_back(seq[a]+1);//the second half of the sequence is just 1 + each corresponding number of the first half,
//as described earlier the two blocks are of length 2^(n-1) in wihch case n/(2*2^(n-1))=1 and is the valid difference
//no overlap because first half is even, second half is odd
}
}
}
vector<complex<double>>cpy;//temp vector to hold contents of poly
//returns results of plugging in roots of unity without inverse
//with inverse returns coefficients given results from roots of unity
vector<complex<double>> fft(vector<complex<double>>poly,bool flip=false){//polynomial + whether or not is inverse fft
//perform preprocessing necessary for butterfly transform
//i think that preprocessing before ends up being faster than the standard butterfly transform by a slight amount for repeated uses of fft
seqgen(poly.size());//generate sequence
for(int a=0;a<poly.size();++a){
cpy.push_back(poly[((long long)seq[a]*(long long)poly.size())/seq.size()]);//references from earlier(line 16), so we only have to generate at least one sequence greater than or equal to poly length
//butterfly transform in linear time
}
cpy.swap(poly);cpy.clear();
for(register int l=2;l<=poly.size();l*=2){//block size/root of unity
complex<double>root=polar(1.0,2*PI/l);//lth root of unity = w
complex<double>running=1;//running root of unity
//merge two loops into one, hopefully more efficient?
for(register int a=0;a<poly.size();++a){
if(a%l==l/2){a+=l/2;}//we only really want to iterate from [kl,kl+l/2) for all k
if(a==poly.size()){break;}
if(!(a%l)){running=1;}//reset running every cycle of length l
//we use the thing here now
//A(a)=A^even(a^2)+a*A^odd(a^2) because we simply shift the terms together
//plug in w^x=a
//A(w^x)=A^even(w^2x)+(w^x)A^odd(w^2x)
//A_even(w^2x)=A_old(w_0^x) where w_0 is the root of unity from the previous iteration
// because w_0=w^2 b/c w_0 is e^iπ/(l/2) but w is e^iπ/l
//A(w^x)=A_even(w_0^x)+(w^x)A_odd(w_0^x)
// for numbers in [kl,kl+l/2) we can just iterate over
//for numbers in [kl+l/2,(k+1)l) we can do it at the same time as [kl,kl+l/2) with the formula A(w^x)=A_even(w_0^x)-(w^x)A_odd(w_0^x)
// because w^(kl+a)=-w^((k+1)l-a) due to them being conjugates(product is magnitude squared)
//so here x=A_even(w_0^x) and y=w^x A_odd(w_0^2x) where w^x is held as the value running
//when l=1 we simply multiply it by the 1st root of unity=1 so we can skip l=1
xx=poly[a];yy=running*poly[a+l/2];
poly[a]=xx+yy;poly[a+l/2]=xx-yy;
running*=root;
}
}
if(flip){
//inverse fft
//same as normal fft, but you add this at the end
//if you think of terms of matrices you need to find inverse of vandermonde matrix
//which functions the exact same way but you must reverse the nonconstant terms
//and you must divide by n because of things that happen when simplifying
reverse(poly.begin()+1,poly.end());
for(int a=0;a<poly.size();++a){
poly[a]/=poly.size();
}
}
return poly;
}
vector<int>pmul(vector<int>p1,vector<int>p2){
int cap=p1.size()+p2.size()-1;//here because loj requires you to include leading 0's, is bullshit
int rn=p1.size()+p2.size()-1;//first find the total size
rn=(1<<(int)ceil(log2(rn)));//round to the first power of 2 above
//add cushioning zeroes
for(int a=p1.size();a<rn;++a){
p1.push_back(0);
}
for(int a=p2.size();a<rn;++a){
p2.push_back(0);
}
vector<complex<double>>f1,f2;//copy over p1 and p2 into a more appropriate datastructure
//i could use pointers in fft but fft has other uses i think and i dont want to ruin those
//so this is a bit more general?
for(int a=0;a<rn;++a){
f1.push_back(complex<double>(p1[a],0));
f2.push_back(complex<double>(p2[a],0));
}
f1=fft(f1),f2=fft(f2);//transformed point sets
//merge the two results together
//C(x)=A(x)B(x)
for(int a=0;a<rn;++a){
f1[a]*=f2[a];
}
//f1 now holds the point form of the product of p1 and p2
//so now we can just apply inverse fft
vector<complex<double>>ret=fft(f1,true);
//add the contents into an integer vector and prune the ending 0's off
vector<int>ans;
for(int a=0;a<ret.size();++a){
ans.push_back((int)round(ret[a].real()));//results should be integers for product of integer vector
}
//used to be prune off leading 0's, but loj needs those so uh
while(ans.size()>cap){
ans.pop_back();
}
return ans;
}