不会
本来想写三模数NTT,发现常数太TM大了,还不知道怎么优化,于是就写拆系数FFT吧
首先一个简单的方法就是直接FFT开long double 去跑,发现精度掉得十分严重(值域大于long long),于是考虑如何解决
通过学习bitset的经验我们知道要压位
考虑拆系数
假设多项式 A ∗ B , 模 数 是 P A * B, 模数是P A∗B,模数是P
确 定 一 个 常 数 C 一 般 为 2 15 = 32 , 768 或 P 确定一个常数 C 一般为2^{15}= 32,768 或 \sqrt{P} 确定一个常数C一般为215=32,768或P
考 虑 把 A i 拆 成 a i ∗ C + b i 考虑把A_i 拆成 a_i *C+b_i 考虑把Ai拆成ai∗C+bi
把 B i 拆 成 c i ∗ C + d i 把B_i 拆成 c_i *C+d_i 把Bi拆成ci∗C+di
即
A i = a i ∗ C + b i A_i = a_i *C+b_i Ai=ai∗C+bi
B i = c i ∗ C + d i B_i = c_i *C+d_i Bi=ci∗C+di
考虑两个数 X , Y X,Y X,Y相乘
设 X = a ∗ C + b 设X = a*C+b 设X=a∗C+b
Y = c ∗ C + d Y= c *C+d Y=c∗C+d
X ∗ Y = ( a ∗ C + b ) ∗ ( c ∗ C + d ) X*Y=(a *C+b)*(c *C+d) X∗Y=(a∗C+b)∗(c∗C+d)
= a c ∗ C 2 + ( a d + b c ) C + b d =ac*C^2 + (ad+bc)C+bd =ac∗C2+(ad+bc)C+bd
发现C先不用管,要求的是 a c , a d + b c , b d ac,ad+bc,bd ac,ad+bc,bd
然后把 a , b , c , d a,b,c,d a,b,c,d分别求值(DFT),乘起来后再对 a c , a d + b c , b d ac,ad+bc,bd ac,ad+bc,bd做插值(IDFT)就行了
一共是4次DFT+3次IDFT=7次FFT
常数巨大
发现一开始的虚数部并没有用到,参考FFT3次变2次的优化,试试这个能不能行
设 X = a + b i 设X = a+bi 设X=a+bi
Y = c + d i Y= c +di Y=c+di
X ∗ Y = ( a + b i ) ∗ ( c + d i ) = a c − b d + ( b c + a d ) i X*Y=(a+bi)*(c+di)=ac-bd + (bc+ad)i X∗Y=(a+bi)∗(c+di)=ac−bd+(bc+ad)i
然鹅和我们要求的 a c , b d , a d + b c ac,bd,ad+bc ac,bd,ad+bc
现在已经就出来了 a d + b c ad+bc ad+bc
考虑如何求剩下两个
设 X ′ = a − b i 设X' = a - bi 设X′=a−bi
Y = c + d i Y= c + di Y=c+di
X ′ ∗ Y = ( a − b i ) ∗ ( c + d i ) = a c + b d + ( a d − b c ) i X'*Y=(a-bi)*(c+di)=ac+bd + (ad-bc)i X′∗Y=(a−bi)∗(c+di)=ac+bd+(ad−bc)i
然 后 就 知 道 了 实 数 部 然后就知道了实数部 然后就知道了实数部ac+bd 的 值 的值 的值
结 合 上 面 a c − b d 的 值 就 可 以 求 出 来 a c 和 b d 了 结合上面ac-bd的值就可以求出来ac和bd了 结合上面ac−bd的值就可以求出来ac和bd了
这 样 貌 似 只 用 对 X , Y , X ′ 这样貌似只用对X,Y,X' 这样貌似只用对X,Y,X′做3次求值(DFT)
再 对 X ∗ Y , X ′ ∗ Y 再对X*Y,X'*Y 再对X∗Y,X′∗Y做2次插值就可以了
也就是说一共5次
据说myy有4次的%%%%% orz
code:
#include
#define N 4000005
#define ll long long
#define double long double
using namespace std;
const double pi = acos(-1);
const double eps = 0.49;
const int C = 32768;
struct cp {
double a, b;
}X[N], Y[N], Xd[N];
cp operator +(cp x, cp y) {return cp{x.a + y.a, x.b + y.b};}
cp operator -(cp x, cp y) {return cp{x.a - y.a, x.b - y.b};}
cp operator *(cp x, cp y) {return cp{x.a * y.a - x.b * y.b, x.b * y.a + x.a * y.b};}
int rev[N], n, m, P;
void fft(cp *a, int len, int o) {
for(int i = 1; i <= len; i ++) rev[i] = (rev[i >> 1] >> 1) | ((i&1) * len >> 1);
for(int i = 1; i <= len; i ++) if(i < rev[i]) swap(a[i], a[rev[i]]);
for(int i = 2; i <= len; i <<= 1) {
cp wn = cp{cos(2 * pi / i), o * sin(2 * pi / i)};
for(int j = 0, p = i / 2; j + i - 1 <= len; j += i) {
cp w0 = cp{1, 0};
for(int k = j; k < j + p; k ++, w0 = w0 * wn) {
cp x = a[k], y = w0 * a[k + p];
a[k] = x + y;
a[k + p] = x - y;
}
}
}
if(o == -1) for(int i = 0; i <= len; i ++) a[i].a /= len, a[i].b /= len;
}
int main() {
scanf("%d%d%d", &n, &m, &P);
for(int i = 0; i <= n; i ++) {
int x;
scanf("%d", &x);
X[i].a = x / C;
X[i].b = x % C;
Xd[i].a = x / C;
Xd[i].b = -(x % C);
}
for(int i = 0; i <= m; i ++) {
int x;
scanf("%d", &x);
Y[i].a = x / C;
Y[i].b = x % C;
}
int len = 1;
for(; len <= n + m;) len <<= 1;
fft(X, len, 1), fft(Xd, len, 1), fft(Y, len, 1);
for(int i = 0; i <= len; i ++) X[i] = X[i] * Y[i], Xd[i] = Xd[i] * Y[i];
fft(X, len, -1), fft(Xd, len, -1);
for(int i = 0; i <= n + m; i ++) {
ll ac = (X[i].a + Xd[i].a) / 2 + eps;
ll bd = Xd[i].a - ac + eps;
ll bcad = X[i].b + eps;
printf("%lld ", (ac * C % P * C % P + bcad * C % P + bd) % P);
}
return 0;
}
板子一遍过,感动.jpg
code:
#include
#define N 4000005
#define ll long long
#define double long double
using namespace std;
const double pi = acos(-1);
const double eps = 0.49;
const int C = 32768;
struct cp {
double a, b;
}X[N], Y[N], Xd[N];
cp operator +(cp x, cp y) {return cp{x.a + y.a, x.b + y.b};}
cp operator -(cp x, cp y) {return cp{x.a - y.a, x.b - y.b};}
cp operator *(cp x, cp y) {return cp{x.a * y.a - x.b * y.b, x.b * y.a + x.a * y.b};}
int rev[N], P;
void fft(cp *a, int len, int o) {
for(int i = 1; i <= len; i ++) rev[i] = (rev[i >> 1] >> 1) | ((i&1) * len >> 1);
for(int i = 1; i <= len; i ++) if(i < rev[i]) swap(a[i], a[rev[i]]);
for(int i = 2; i <= len; i <<= 1) {
cp wn = cp{cos(2 * pi / i), o * sin(2 * pi / i)};
for(int j = 0, p = i / 2; j + i - 1 <= len; j += i) {
cp w0 = cp{1, 0};
for(int k = j; k < j + p; k ++, w0 = w0 * wn) {
cp x = a[k], y = w0 * a[k + p];
a[k] = x + y;
a[k + p] = x - y;
}
}
}
if(o == -1) for(int i = 0; i <= len; i ++) a[i].a /= len, a[i].b /= len;
}
void mul(ll *a, ll *b, int n, int m) {
for(int i = 0; i <= n; i ++) {
X[i].a = a[i] / C;
X[i].b = a[i] % C;
Xd[i].a = a[i] / C;
Xd[i].b = -(a[i] % C);
}
for(int i = 0; i <= m; i ++) {
Y[i].a = b[i] / C;
Y[i].b = b[i] % C;
}
int len = 1;
for(; len <= n + m;) len <<= 1;
fft(X, len, 1), fft(Xd, len, 1), fft(Y, len, 1);
for(int i = 0; i <= len; i ++) X[i] = X[i] * Y[i], Xd[i] = Xd[i] * Y[i];
fft(X, len, -1), fft(Xd, len, -1);
for(int i = 0; i <= n + m; i ++) {
ll ac = (X[i].a + Xd[i].a) / 2 + eps;
ll bd = Xd[i].a - ac + eps;
ll bcad = X[i].b + eps;
a[i] = (ac % P * C % P * C % P + bcad % P * C % P + bd % P) % P;
}
for(int i = 0; i <= len; i ++) X[i].a = X[i].b = Y[i].a = Y[i].b = Xd[i].a = Xd[i].b = 0;
}
int qpow(ll x, int y) {
ll ret = 1;
for(; y; y >>= 1, x = x * x % P) if(y & 1) ret = ret * x % P;
return ret;
}
ll c[N], d[N], e[N], n;
void inv(ll *a, ll *b, int sz){
if(sz == 0) {b[0] = qpow(a[0], P - 2); return;}
inv(a, b, sz / 2);
int len = 1;
for(; len <= sz + sz; len <<= 1);
for(int i = 0; i <= sz; i ++) c[i] = a[i];
for(int i = sz + 1; i <= len; i ++) c[i] = 0;
//for(int i = 0; i <= len; i ++) b[i] = (b[i] * 2 % mod - b[i] * b[i] % mod * c[i] % mod + mod) % mod;//Ö±½ÓËã
for(int i = 0; i <= sz; i ++) e[i] = b[i] * 2 % P;
for(int i = sz + 1; i <= len; i ++) e[i] = 0;
for(int i = 0; i <= sz; i ++) d[i] = b[i];
for(int i = sz + 1; i <= len; i ++) d[i] = 0;
mul(b, d, sz, sz); mul(b, c, sz, sz);
for(int i = 0; i <= sz; i ++) b[i] = (e[i] - b[i] + P) % P;
for(int i = sz + 1; i <= len; i ++) b[i] = 0;
}
ll a[N], b[N];
int main() {
scanf("%d", &n);
n --;
P = 1000000007;
for(int i = 0; i <= n; i ++) scanf("%lld", &a[i]);
inv(a, b, n);
for(int i = 0; i <= n; i ++) printf("%lld ", b[i]);
return 0;
}