Last active
December 10, 2022 15:51
-
-
Save mizar/97aba1eb23a29afa03ee9e92c7906567 to your computer and use it in GitHub Desktop.
高速なエラトステネスの篩 https://github.com/peria/primes/tree/master/eratosthenes の Rust版
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -*- coding:utf-8-unix -*- | |
//! エラトステネスの篩 & 生成した素数の試し割りによる素因数分解 | |
//! | |
//! ## 素因数分解の実行例 | |
//! | |
//! [`main`] を参照 | |
//! | |
//! ## ベンチマーク実行例 | |
//! | |
//! ```text | |
//! cargo test --release -- tests::er --exact --nocapture | |
//! cargo test --release -- tests::er_pow10_7 --exact --nocapture | |
//! cargo test --release -- tests::er_pow10_8 --exact --nocapture | |
//! cargo test --release -- tests::er_pow10_9 --exact --nocapture | |
//! cargo test --release -- tests::er_pow2_32 --exact --nocapture | |
//! cargo test --release -- tests::er_pow10_10 --exact --nocapture | |
//! cargo test --release -- tests::factor_plain --exact --nocapture | |
//! cargo test --release -- tests::factor_prediv --exact --nocapture | |
//! ``` | |
//! | |
//! ## ドキュメント生成例 | |
//! | |
//! ```text | |
//! cargo +stable doc --no-deps --open | |
//! ``` | |
// ## 標準ドキュメント参照例 | |
// | |
// ```text | |
// rustup doc --std --toolchain stable | |
// rustup doc --std --toolchain 1.42.0 | |
// ``` | |
/// 除算の事前計算 | |
/// 有効にすると、事前計算に6秒程の計算時間+メモリ約3GBの消費と引き換えに、素因数分解時の整除判定が5倍~10倍程度高速化 | |
const USE_PRE_COMPUTAION_OF_DIVISION: bool = true; | |
/// エラトステネスの篩の実装バージョン (V2: 初期版, V6: 高速版) | |
pub enum SieveVersion { V2, V6 } | |
/// エラトステネスの篩の実装バージョン (V2: 初期版, V6: 高速版) | |
const USE_SIEVE_VERSION: SieveVersion = SieveVersion::V6; | |
/// 標準入力で入力された数列の素因数分解(入力先頭行は数列の長さ) | |
/// | |
/// ## 入力書式 | |
/// | |
/// `N`: 入力数, {`A_0`, `A_1`, `A_2`, ..., `A_{N-1}`}: 素因数分解する自然数 | |
/// | |
/// ```text | |
/// N | |
/// A_0 | |
/// A_1 | |
/// A_2 | |
/// ... | |
/// A_{N-1} | |
/// ``` | |
/// | |
/// ## 入力例 | |
/// | |
/// ```text | |
/// 27 | |
/// 3 | |
/// 11 | |
/// 111 | |
/// 1111 | |
/// 11111 | |
/// 111111 | |
/// 1111111 | |
/// 11111111 | |
/// 111111111 | |
/// 1111111111 | |
/// 11111111111 | |
/// 111111111111 | |
/// 1111111111111 | |
/// 11111111111111 | |
/// 111111111111111 | |
/// 1111111111111111 | |
/// 11111111111111111 | |
/// 111111111111111111 | |
/// 1111111111111111111 | |
/// 6111111111111111111 | |
/// 9223371915520229671 | |
/// 9223372036854775783 | |
/// 11111111111111111111 | |
/// 11297479344276717331 | |
/// 18446744065119616769 | |
/// 18446744073709551557 | |
/// 18446744073709551615 | |
/// ``` | |
/// | |
/// ## 出力例 | |
/// | |
/// ```text | |
/// - 0.796486sec sieve 4294967295 | |
/// - 1.846150sec divider 4294967295 | |
/// - 0.000008sec `3 : 3` | |
/// - 0.000002sec `11 : 11` | |
/// - 0.000005sec `111 : 3 37` | |
/// - 0.000007sec `1111 : 11 101` | |
/// - 0.000008sec `11111 : 41 271` | |
/// - 0.000003sec `111111 : 3 7 11 13 37` | |
/// - 0.000004sec `1111111 : 239 4649` | |
/// - 0.000011sec `11111111 : 11 73 101 137` | |
/// - 0.000005sec `111111111 : 3 3 37 333667` | |
/// - 0.000005sec `1111111111 : 11 41 271 9091` | |
/// - 0.000008sec `11111111111 : 21649 513239` | |
/// - 0.000010sec `111111111111 : 3 7 11 13 37 101 9901` | |
/// - 0.000007sec `1111111111111 : 53 79 265371653` | |
/// - 0.000008sec `11111111111111 : 11 239 4649 909091` | |
/// - 0.000011sec `111111111111111 : 3 31 37 41 271 2906161` | |
/// - 0.000017sec `1111111111111111 : 11 17 73 101 137 5882353` | |
/// - 0.000206sec `11111111111111111 : 2071723 5363222357` | |
/// - 0.000027sec `111111111111111111 : 3 3 7 11 13 19 37 52579 333667` | |
/// - 0.063563sec `1111111111111111111 : 1111111111111111111` | |
/// - 0.082739sec `6111111111111111111 : 3 2037037037037037037` | |
/// - 0.172398sec `9223371915520229671 : 3037000453 3037000507` | |
/// - 0.164838sec `9223372036854775783 : 9223372036854775783` | |
/// - 0.000037sec `11111111111111111111 : 11 41 101 271 3541 9091 27961` | |
/// - 0.181135sec `11297479344276717331 : 3263519417 3461747243` | |
/// - 0.242474sec `18446744065119616769 : 4294967279 4294967311` | |
/// - 0.233096sec `18446744073709551557 : 18446744073709551557` | |
/// - 0.000043sec `18446744073709551615 : 3 5 17 257 641 65537 6700417` | |
/// ``` | |
pub fn main() { | |
let a = { | |
use std::io::BufRead; | |
let stdin = std::io::stdin(); | |
let mut lines = stdin.lock().lines(); | |
let n = lines.next().unwrap().unwrap().parse::<usize>().unwrap(); | |
lines.take(n) | |
.map(|r| r.unwrap().parse::<u64>().unwrap()) | |
.collect::<Vec<u64>>() | |
}; | |
bench_factor(a, USE_PRE_COMPUTAION_OF_DIVISION); | |
} | |
/// ベンチマーク: 試し割り法での素因数分解 | |
pub fn bench_factor(a: Vec<u64>, prediv: bool) -> Vec<Vec<u64>> { | |
use std::io::Write; | |
let stdout = std::io::stdout(); | |
let mut out = std::io::BufWriter::new(stdout.lock()); | |
let tins = std::time::Instant::now(); | |
let amax = a.iter().cloned().max().unwrap(); | |
let amaxsqrt = sqrt_floor(amax); | |
let stime = tins.elapsed(); | |
let er = Eratosthenes::generate(amaxsqrt); | |
let etime = tins.elapsed(); | |
writeln!( | |
&mut out, | |
"- {:.6}sec {} {}", | |
(etime - stime).as_secs_f64(), | |
"sieve", | |
amaxsqrt, | |
).unwrap(); | |
out.flush().unwrap(); | |
let mut result = vec![]; | |
if prediv { // 逆元事前計算あり | |
let stime = tins.elapsed(); | |
let divider = InvDivider::generate(&er); | |
let etime = tins.elapsed(); | |
writeln!( | |
&mut out, | |
"- {:.6}sec {} {}", | |
(etime - stime).as_secs_f64(), | |
"divider", | |
amaxsqrt | |
).unwrap(); | |
out.flush().unwrap(); | |
for e in a.iter().cloned() { | |
let stime = tins.elapsed(); | |
let pv = divider.factor(e); | |
let etime = tins.elapsed(); | |
writeln!( | |
&mut out, | |
"- {:.6}sec `{} : {}`", | |
(etime - stime).as_secs_f64(), | |
e, | |
pv.iter().cloned().map(|p| p.to_string()).collect::<Vec<String>>().join(" "), | |
).unwrap(); | |
out.flush().unwrap(); | |
result.push(pv); | |
} | |
} else { // 逆元事前計算なし | |
for e in a.iter().cloned() { | |
let stime = tins.elapsed(); | |
let pv = InvDivider::factor_plain(&er, e); | |
let etime = tins.elapsed(); | |
writeln!( | |
&mut out, | |
"- {:.6}sec `{} : {}`", | |
(etime - stime).as_secs_f64(), | |
e, | |
pv.iter().cloned().map(|p| p.to_string()).collect::<Vec<String>>().join(" "), | |
).unwrap(); | |
out.flush().unwrap(); | |
result.push(pv); | |
} | |
} | |
result | |
} | |
/// ベンチマーク: エラトステネスの篩 V2:初期版 | |
pub fn bench_er_v2(s: &str, x: u64, pix: u64) { | |
use std::io::Write; | |
let stdout = std::io::stdout(); | |
let mut out = std::io::BufWriter::new(stdout.lock()); | |
writeln!(&mut out, "pi({})≃{};", s, approx_prime_pi(x)).unwrap(); | |
out.flush().unwrap(); | |
let tinstant = std::time::Instant::now(); | |
let er = Eratosthenes::generate_v2(x); | |
let el_sieve = tinstant.elapsed(); | |
let count = er.count(); | |
assert_eq!(count, pix); | |
writeln!(&mut out, "pi({})={}; {:.6}sec", s, count, el_sieve.as_secs_f64()).unwrap(); | |
out.flush().unwrap(); | |
} | |
/// ベンチマーク: エラトステネスの篩 V6:高速版 | |
pub fn bench_er_v6(s: &str, x: u64, pix: u64) { | |
use std::io::Write; | |
let stdout = std::io::stdout(); | |
let mut out = std::io::BufWriter::new(stdout.lock()); | |
writeln!(&mut out, "pi({})≃{};", s, approx_prime_pi(x)).unwrap(); | |
out.flush().unwrap(); | |
let tinstant = std::time::Instant::now(); | |
let er = Eratosthenes::generate_v6(x); | |
let el_sieve = tinstant.elapsed(); | |
let count = er.count(); | |
assert_eq!(count, pix); | |
writeln!(&mut out, "pi({})={}; {:.6}sec", s, count, el_sieve.as_secs_f64()).unwrap(); | |
out.flush().unwrap(); | |
} | |
/// ニュートン・ラフソン法による整数平方根 $`\left\lfloor\sqrt{x}\right\rfloor`$ | |
/// | |
/// - $`s_{n+1}=\lfloor(s_n+\lfloor a/s_n\rfloor)/2\rfloor`$ とする。 $`s_n\in\mathbb{N},\ a\in\mathbb{N},\ s_n\ge 1,\ a\ge 1`$ であるとする。 | |
/// - (補題) $`s_n`$ は自然数であるから、 | |
/// ```math | |
/// s_{n+1}=\left\lfloor\left(s_n+\left\lfloor\frac{a}{s_n}\right\rfloor\right)/2\right\rfloor=\left\lfloor\left\lfloor s_n+\frac{a}{s_n}\right\rfloor/2\right\rfloor=\left\lfloor\left(s_n+\frac{a}{s_n}\right)/2\right\rfloor=\left\lfloor\frac{s_n^2+a}{2s_n}\right\rfloor | |
/// ``` | |
/// - (1) $`s_n\gt\left\lfloor\sqrt{a}\right\rfloor`$ のとき、 $`\left\lfloor\sqrt{a}\right\rfloor\le s_{n+1}\lt s_n`$ である。(この条件の間は単調減少する保証) | |
/// - $`s_n\gt\left\lfloor\sqrt{a}\right\rfloor`$ より $`s_n\gt\sqrt{a}`$ であり、 $`\varepsilon\gt 0`$ を使って $`s_n=(1+\varepsilon)\sqrt{a}`$ とすると、 | |
/// ```math | |
/// \left\lfloor\sqrt{a}\right\rfloor\le\left\lfloor\frac{2+2\varepsilon+\varepsilon^2}{2(1+\varepsilon)}\sqrt{a}\right\rfloor=\left\lfloor\frac{s_n^2+a}{2s_n}\right\rfloor=s_{n+1}\le\frac{s_n^2+a}{2s_n}\lt\frac{s_n^2+s_n^2}{2s_n}=s_n | |
/// ``` | |
/// - (2) $`s_n=\left\lfloor\sqrt{a}\right\rfloor`$ のとき、 $`\left\lfloor\sqrt{a}\right\rfloor\le s_{n+1}\le\left\lfloor\sqrt{a}\right\rfloor+1`$ である。($`\left\lfloor\sqrt{a}\right\rfloor`$ 到達時の挙動の保証) | |
/// - $`s_n=\left\lfloor\sqrt{a}\right\rfloor,\quad\sqrt{a}-1\lt s_n\le\sqrt{a}`$ より、 $`s_n^2\le a\lt(s_n+1)^2`$ | |
/// - $`s_n`$ は自然数であるから $`\lfloor s_n\rfloor=s_n`$ および $`\displaystyle{\frac{1}{2s_n}\lt 1}`$ より、 | |
/// ```math | |
/// \left\lfloor\sqrt{a}\right\rfloor=\left\lfloor\frac{s_n^2+s_n^2}{2s_n}\right\rfloor\le s_{n+1}\le\left\lfloor\frac{s_n^2+(s_n+1)^2}{2s_n}\right\rfloor=\left\lfloor s_n+1+\frac{1}{2s_n}\right\rfloor=\left\lfloor\sqrt{a}\right\rfloor+1 | |
/// ``` | |
pub fn sqrt_floor(x: u64) -> u64 { | |
if x <= 1 { return x; } | |
let k = 32 - ((x - 1).leading_zeros() >> 1); | |
let mut s = 1u64 << k; // s = 2^k | |
let mut t = (s + (x >> k)) >> 1; // t = (s + x/s)/2 | |
// while loop count (= divide count) may be { u64: max 6 times, u32: max 5 times } | |
// s > floor(sqrt(x)) -> floor(sqrt(x)) <= t < s | |
// s == floor(sqrt(x)) -> s == floor(sqrt(x)) <= t <= floor(sqrt(x)) + 1 | |
while t < s { | |
s = t; | |
t = (s + x/s) >> 1; | |
} | |
s | |
} | |
/// $`\lceil\sqrt{x}\rceil`$ | |
/// | |
/// See [`sqrt_floor`] | |
pub fn sqrt_ceil(x: u64) -> u64 { | |
let s = sqrt_floor(x); | |
if x > s * s { s + 1 } else { s } | |
} | |
/// $`\operatorname{mod} 2^{64}`$での乗法逆元, $`N \times n \mod 2^{64} \equiv 1`$ | |
/// | |
/// - 入力値の整数 $`N`$ は奇数に限る、さもなくば $`N \times n \mod 2^{64} \equiv 1`$を満たす $`n`$ が存在しない | |
/// - $`\operatorname{mod}`$ が累乗数の場合、ニュートン法によって乗法逆元を計算できる | |
/// - 特に $`\operatorname{mod}`$ が $`2`$ の累乗数の場合、自身を $`\operatorname{mod}8`$ における乗法逆元としての初期値とする事が出来る $`N \mod 2 \equiv 1 ⇔ N^2 \mod 8 \equiv 1`$ | |
/// | |
/// - 2次収束 | |
/// - 整数 $`N`$ に対して 整数 $`n_j`$ が、 $`nx_j\mod m\equiv 1`$ を満たす時、 | |
/// $`n_{j+1}`$ を $`n_{j+1}=n_j(2-Nx_j)`$ と定義するなら、 | |
/// $`Nn_{j+1}\mod m^2\equiv 1`$ を満たす。 | |
/// - $`Nn_j=1+km`$ とおくと、 $`Nn_{j+1}=Nn_j(2-Nn_j)=1-(Nn_j-1)^2=1-k^2m^2\equiv 1\quad(\operatorname{mod}\ m^2)`$ | |
/// - 奇数 $`N`$ を $`N=1+2k`$ とおくと、 $`\left(k(1+k)\right)`$ は偶数であるから | |
/// - $`n_0=N,\quad n_{j+1}=n_j(2-N\,n_j)`$ とした時 | |
/// - $`Nn_0=1+2^3\cdot \left(k(1+k)/2\right)\ \ \to\ \ Nn_0\mod 2^3\equiv 1`$ | |
/// - $`Nn_1=1-2^6\cdot \left(k(1+k)/2\right)^2\ \ \to\ \ Nn_1\mod 2^6\equiv 1`$ | |
/// - $`Nn_2=1-2^{12}\cdot \left(k(1+k)/2\right)^4\ \ \to\ \ Nn_2\mod 2^{12}\equiv 1`$ | |
/// - $`Nn_3=1-2^{24}\cdot \left(k(1+k)/2\right)^8\ \ \to\ \ Nn_3\mod 2^{24}\equiv 1`$ | |
/// - $`Nn_4=1-2^{48}\cdot \left(k(1+k)/2\right)^{16}\ \ \to\ \ Nn_4\mod 2^{48}\equiv 1`$ | |
/// - $`Nn_5=1-2^{96}\cdot \left(k(1+k)/2\right)^{32}\ \ \to\ \ Nn_5\mod 2^{96}\equiv 1`$ | |
/// - 3次収束 | |
/// - 整数 $`N`$ に対して 整数 $`n_j`$ が、 $`Nn_j\mod m\equiv 1`$ を満たす時、 | |
/// $`n_{j+1}`$ を $`n_{j+1}=n_j(Nn_j(Nn_j-3)+3)`$ と定義するなら、 | |
/// $`Nn_{j+1}\mod m^3\equiv 1`$ を満たす。 | |
/// - $`Nn_j=1+km`$ とおくと、 $`Nn_{j+1}=Nn_j(Nn_j(Nn_j-3)+3)=1+(Nn_j-1)^3=1+k^3m^3\equiv 1\quad(\operatorname{mod}\ m^3)`$ | |
/// - 奇数 $`N`$ を $`N=1+2k`$ とおくと、 $`\left(k(1+k)\right)`$ は偶数であるから | |
/// - $`n_0=N,\quad n_{j+1}=n_j(Nn_j(Nn_j-3)+3)`$ とした時 | |
/// - $`Nn_0=1+2^3\cdot \left(k(1+k)/2\right)\ \ \to\ \ Nn_0\mod 2^3\equiv 1`$ | |
/// - $`Nn_1=1+2^9\cdot \left(k(1+k)/2\right)^3\ \ \to\ \ Nn_1\mod 2^9\equiv 1`$ | |
/// - $`Nn_2=1+2^{27}\cdot \left(k(1+k)/2\right)^9\ \ \to\ \ Nn_2\mod 2^{27}\equiv 1`$ | |
/// - $`Nn_3=1+2^{81}\cdot \left(k(1+k)/2\right)^{27}\ \ \to\ \ Nn_3\mod 2^{81}\equiv 1`$ | |
/// | |
/// See [`u64max_quot`], [`InvDivider::factor`] | |
pub fn u64mod_inv(n: u64) -> u64 { | |
debug_assert_eq!(n & 1, 1); // 入力値は奇数に限る | |
let n32 = n as u32; | |
let ninv = n32; | |
let t = n32.wrapping_mul(ninv); | |
let ninv = ninv.wrapping_mul(t.wrapping_sub(3).wrapping_mul(t).wrapping_add(3)); | |
let t = n32.wrapping_mul(ninv); | |
let ninv = ninv.wrapping_mul(t.wrapping_sub(3).wrapping_mul(t).wrapping_add(3)) as u64; | |
let t = n.wrapping_mul(ninv); | |
let ninv = ninv.wrapping_mul(t.wrapping_sub(3).wrapping_mul(t).wrapping_add(3)); | |
debug_assert_eq!(n.wrapping_mul(ninv), 1); // n * ni mod 2^64 == 1 | |
ninv | |
} | |
/// $`\lfloor (2^{64}-1)/n \rfloor`$ | |
/// | |
/// See [`u64mod_inv`], [`InvDivider::factor`] | |
pub fn u64max_quot(n: u64) -> u64 { | |
debug_assert_ne!(n, 0); | |
// u64 / NonZeroU64 (ゼロ除算のエラーチェックを省略) は rust 1.51.0 以降 | |
// unsafe { (!0u64) / std::num::NonZeroU64::new_unchecked(n) } | |
(!0u64) / n | |
} | |
/// 素数計数関数 $`\pi(x)`$ の近似式 (Dusart) | |
/// | |
/// 確保が必要なメモリ領域の見積もり用に、 $`\pi(x)`$ 以上になる近似式を用いる: 下記の(1)式 | |
/// | |
/// ```math | |
/// \begin{align} | |
/// \pi(x) &\lt \frac{x}{\ln(x)}\biggl(1+\frac{1}{\ln(x)}+\frac{2}{\ln^2(x)}+\frac{7.59}{\ln^3(x)}\biggr) \quad (x \gt 1) \\ | |
/// \pi(x) &\gt \frac{x}{\ln(x)}\biggl(1+\frac{1}{\ln(x)}+\frac{2}{\ln^2(x)}\biggr) \quad (x \ge 88789) \\ | |
/// \end{align} | |
/// ``` | |
/// | |
/// <https://en.wikipedia.org/wiki/Prime-counting_function#Inequalities> | |
pub fn approx_prime_pi(x: u64) -> u64 { | |
match x { | |
0..=1 => 0, | |
2..=2 => 1, | |
3..=4 => 2, | |
5..=6 => 3, | |
7..=10 => 4, | |
11..=12 => 5, | |
13..=16 => 6, | |
17..=18 => 7, | |
19..=22 => 8, | |
23..=28 => 9, | |
29..=30 => 10, | |
_ => { | |
let xf = x as f64; | |
let iln = 1./xf.ln(); | |
((1.+(1.+(2.+7.59*iln)*iln)*iln)*iln*xf).ceil() as u64 | |
}, | |
} | |
} | |
/// 高速なエラトステネスの篩 | |
/// | |
/// * <https://qiita.com/peria/items/a4ff4ddb3336f7b81d50> | |
/// * <https://github.com/peria/primes/tree/master/eratosthenes> | |
pub struct Eratosthenes { | |
x: u64, | |
flags: Vec<u8>, | |
} | |
impl Eratosthenes { | |
/// CPUキャッシュサイズを考慮した処理単位 | |
/// | |
/// - 最適パラメータの例: | |
/// - Intel Core i7 7500U (L1dキャッシュ: 32kiB/instance, L2キャッシュ: 256kiB/instance): SEGMENT_SIZE 128kiB が最適 | |
/// ```text | |
/// Caches (sum of all): | |
/// L1d: 64 KiB (2 instances) | |
/// L1i: 64 KiB (2 instances) | |
/// L2: 512 KiB (2 instances) | |
/// L3: 4 MiB (1 instance) | |
/// ``` | |
/// - AMD Ryzen Threadripper 3970X (L1dキャッシュ: 16kiB/instance, L2キャッシュ: 512kiB/instance): SEGMENT_SIZE 512kiB が最適 | |
/// ```text | |
/// Caches (sum of all): | |
/// L1d: 1 MiB (32 instances) | |
/// L1i: 1 MiB (32 instances) | |
/// L2: 16 MiB (32 instances) | |
/// L3: 16 MiB (1 instance) | |
/// ``` | |
//const SEGMENT_SIZE: usize = 65536; // 64kiB | |
//const SEGMENT_SIZE: usize = 131072; // 128kiB | |
//const SEGMENT_SIZE: usize = 262144; // 256kiB | |
const SEGMENT_SIZE: usize = 524288; // 512kiB | |
//const SEGMENT_SIZE: usize = 1048576; // 1MiB | |
//const SEGMENT_SIZE: usize = 2097152; // 2MiB | |
//const SEGMENT_SIZE: usize = 4194304; // 4MiB | |
//const SEGMENT_SIZE: usize = 8388608; // 8MiB | |
//const SEGMENT_SIZE: usize = 16777216; // 16MiB | |
//const SEGMENT_SIZE: usize = 33554432; // 32MiB | |
//const SEGMENT_SIZE: usize = 67108864; // 64MiB | |
//const SEGMENT_SIZE: usize = 134217728; // 128MiB | |
//const SEGMENT_SIZE: usize = 268435456; // 256MiB | |
/// `[1, 7, 11, 13, 17, 19, 23, 29]` | |
const MOD_30: [usize; 8] = [1, 7, 11, 13, 17, 19, 23, 29]; | |
/// `[n0-m0 for (n0,m0) in zip(n, m)]` | |
const C1: [u16; 8] = [6, 4, 2, 4, 2, 4, 6, 2]; | |
/// `[[m0*n1/30-m0*m1/30 for (n1,m1) in zip(n, m)] for m0 in m]` | |
const C0: [[u16; 8]; 8] = [ | |
[0, 0, 0, 0, 0, 0, 0, 1], [1, 1, 1, 0, 1, 1, 1, 1], | |
[2, 2, 0, 2, 0, 2, 2, 1], [3, 1, 1, 2, 1, 1, 3, 1], | |
[3, 3, 1, 2, 1, 3, 3, 1], [4, 2, 2, 2, 2, 2, 4, 1], | |
[5, 3, 1, 4, 1, 3, 5, 1], [6, 4, 2, 4, 2, 4, 6, 1], | |
]; | |
/// `[[bitoff(m0*m1%30) for m1 in m] for m0 in m]` | |
const MASK: [[u8; 8]; 8] = [ | |
[0xfe, 0xfd, 0xfb, 0xf7, 0xef, 0xdf, 0xbf, 0x7f], | |
[0xfd, 0xdf, 0xef, 0xfe, 0x7f, 0xf7, 0xfb, 0xbf], | |
[0xfb, 0xef, 0xfe, 0xbf, 0xfd, 0x7f, 0xf7, 0xdf], | |
[0xf7, 0xfe, 0xbf, 0xdf, 0xfb, 0xfd, 0x7f, 0xef], | |
[0xef, 0x7f, 0xfd, 0xfb, 0xdf, 0xbf, 0xfe, 0xf7], | |
[0xdf, 0xf7, 0x7f, 0xfd, 0xbf, 0xfe, 0xef, 0xfb], | |
[0xbf, 0xfb, 0xf7, 0x7f, 0xfe, 0xef, 0xdf, 0xfd], | |
[0x7f, 0xbf, 0xdf, 0xef, 0xf7, 0xfb, 0xfd, 0xfe], | |
]; | |
const BIT_TO_INDEX: [u8; 256] = [ | |
0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, | |
]; | |
fn bit_to_index(x: u8) -> usize { | |
Self::BIT_TO_INDEX[x as usize] as usize // == if x == 0 { 0 } else { x.trailing_zeros() as usize } | |
} | |
/// エラトステネスの篩の高速化(2) に近い実装 | |
/// | |
/// * <https://qiita.com/peria/items/54499b9ce9d5c1e93e5a> | |
pub fn generate_v2(x: u64) -> Self { | |
let flags_size = (x as usize / 30 + if x % 30 != 0 { 1 } else { 0 }).max(1); | |
// mod 30 == {1, 7, 11, 13, 17, 19, 23, 29} となるビット列 (mod 2,3,5 == 0 をあらかじめリストから除外) | |
let mut flags = vec![!0u8; flags_size]; | |
// {1} を篩から削除 | |
flags[0] &= 0xfe; | |
// 末端の調整 | |
flags[flags_size - 1] &= match x % 30 { | |
0..= 0 => 0xff, | |
1..= 6 => 0x01, | |
7..=10 => 0x03, | |
11..=12 => 0x07, | |
13..=16 => 0x0f, | |
17..=18 => 0x1f, | |
19..=22 => 0x3f, | |
23..=28 => 0x7f, | |
29..=29 => 0xff, | |
_ => unreachable!(), | |
}; | |
// ceil(平方根) の計算 | |
let sqrt_x = sqrt_ceil(x); | |
let sqrt_xi = sqrt_x as usize / 30 + 1; | |
// 篩 | |
for i in 0..sqrt_xi { | |
let mut f = flags[i]; | |
while f != 0 { | |
let ibit = Self::bit_to_index(f); // == if f == 0 { 0 } else { f.trailing_zeros() as usize }; | |
let m = Self::MOD_30[ibit]; | |
let pm = 30 * i + 2 * m; | |
let mut j = i * pm + (m * m) / 30; | |
let mut k = ibit; | |
let mask = Self::MASK[ibit]; | |
let c0 = Self::C0[ibit]; | |
while j < flags_size { | |
flags[j] &= mask[k]; | |
j += i * Self::C1[k] as usize + c0[k] as usize; | |
k = (k + 1) % 8; | |
} | |
f &= f.wrapping_sub(1); // elase least one bit | |
} | |
} | |
// 結果出力 | |
Self { x, flags } | |
} | |
/// エラトステネスの篩の高速化 (6) に近い実装 | |
/// | |
/// * <https://qiita.com/peria/items/c1d8523342e81bb23375> | |
/// | |
/// (4)の省メモリ化・区間篩は未実装 | |
pub fn generate_v6(x: u64) -> Self { | |
let flags_size = ((x / 30) as usize + if x % 30 != 0 { 1 } else { 0 }).max(1); | |
let (initial_loop, initial_size) = match x { | |
0..= 209 => (0, 1), | |
210..= 2309 => (1, 7), | |
2310..= 30029 => (2, 7 * 11), | |
30030..= 510509 => (3, 7 * 11 * 13), | |
510510..= 9699689 => (4, 7 * 11 * 13 * 17), | |
_ => (5, 7 * 11 * 13 * 17 * 19), | |
}; | |
assert!(flags_size >= initial_size); | |
// mod 30 == {1, 7, 11, 13, 17, 19, 23, 29} となるビット列 (mod 2,3,5 == 0 をあらかじめリストから除外) | |
// Rust 1.53.0 以降であれば、flagsを [`std::vec::Vec::with_capacity`] で作成してから 1要素だけ `255u8` を追加し、 | |
// [`slice::copy_within`] の代わりに [`std::vec::Vec::extend_from_within`] でコピーする方が良さそう。 | |
let mut flags = vec![0u8; flags_size]; | |
flags[0] = !0u8; | |
// {7, 11, 13, 17, 19, 23} による篩 | |
let mut size = 1; | |
for ibit in 1..=initial_loop { | |
let p = Self::MOD_30[ibit]; | |
let nsize = size * p; | |
while size < nsize { | |
flags.as_mut_slice().copy_within(0..(size.min(nsize - size)), size); | |
size = size.saturating_mul(2).min(nsize); | |
} | |
size = nsize; | |
let mut j = 0; | |
let mut k = 0; | |
let mask = Self::MASK[ibit]; | |
let c0 = Self::C0[ibit]; | |
while j < size { | |
flags[j] &= mask[k]; | |
j += c0[k] as usize; | |
k = (k + 1) % 8; | |
} | |
} | |
// {7, 11, 13, 17, 19} による篩結果の繰り返しコピー | |
assert_eq!(size, initial_size); | |
while size < flags_size { | |
flags.as_mut_slice().copy_within(0..(size.min(flags_size - size)), size); | |
size = size.saturating_add(size).min(flags_size); | |
} | |
assert_eq!(flags.len(), flags_size); | |
// {1} を篩から削除 | |
assert_eq!(flags[0] & 1, 1); | |
flags[0] &= 0xfe; | |
assert_eq!(flags[0] & 1, 0); | |
// 末端の調整 | |
flags[flags_size - 1] &= match x % 30 { | |
0..= 0 => 0xff, | |
1..= 6 => 0x01, | |
7..=10 => 0x03, | |
11..=12 => 0x07, | |
13..=16 => 0x0f, | |
17..=18 => 0x1f, | |
19..=22 => 0x3f, | |
23..=28 => 0x7f, | |
29..=29 => 0xff, | |
_ => unreachable!(), | |
}; | |
// ceil(平方根),ceil(4乗根) の計算 | |
let sqrt_x = sqrt_ceil(x); | |
let qurt_x = sqrt_ceil(sqrt_x); | |
let sqrt_xi = sqrt_x as usize / 30 + 1; | |
let qurt_xi = qurt_x as usize / 30 + 1; | |
// 初期篩(4乗根までの素数から2乗根までの素数を生成) | |
for i in 0..qurt_xi { | |
let mut f = flags[i]; | |
while f != 0 { | |
let ibit = Self::bit_to_index(f); // == if f == 0 { 0 } else { f.trailing_zeros() as usize }; | |
let m = Self::MOD_30[ibit]; | |
let pm = 30 * i + 2 * m; | |
let mut j = i * pm + (m * m) / 30; | |
let mut k = ibit; | |
let mask = Self::MASK[ibit]; | |
let c0 = Self::C0[ibit]; | |
while j < sqrt_xi { | |
flags[j] &= mask[k]; | |
j += i * Self::C1[k] as usize + c0[k] as usize; | |
k = (k + 1) % 8; | |
} | |
f &= f.wrapping_sub(1); // elase least one bit | |
} | |
} | |
// 区間篩用の篩位置リスト | |
let mut indecies = vec![0usize; sqrt_xi << 3]; | |
for i in 0..sqrt_xi { | |
let mut f = flags[i]; | |
while f != 0 { | |
let ibit = Self::bit_to_index(f); // == if f == 0 { 0 } else { f.trailing_zeros() as usize }; | |
let iind = (i << 3) | ibit; | |
let m = Self::MOD_30[ibit]; | |
let pm = 30 * i + 2 * m; | |
let j = i * pm + (m * m) / 30; | |
let k = ibit; | |
indecies[iind] = (j << 3) | k; | |
f &= f.wrapping_sub(1); // elase least one bit | |
} | |
} | |
// SEGMENT_SIZE毎に区間篩の繰り返し | |
// SEGMENT_SIZE毎に処理し、CPUのキャッシュミスによる遅延を減らす | |
for limit in | |
(Self::SEGMENT_SIZE..flags_size) | |
.step_by(Self::SEGMENT_SIZE) | |
.chain([flags_size].iter().cloned()) | |
{ | |
let sqrt_l = sqrt_floor(limit as u64 * 30); | |
let sqrt_li = sqrt_l as usize / 30 + 1; | |
for i in 0..sqrt_li { | |
let mut f = unsafe { *flags.get_unchecked(i) }; | |
while f != 0 { | |
let ibit = Self::bit_to_index(f); // == if f == 0 { 0 } else { f.trailing_zeros() as usize }; | |
let mask = unsafe { *Self::MASK.get_unchecked(ibit) }; | |
let c0 = unsafe { *Self::C0.get_unchecked(ibit) }; | |
let iind = (i << 3) | ibit; | |
let mut j = unsafe { *indecies.get_unchecked(iind) / 8 }; | |
let mut k = unsafe { *indecies.get_unchecked(iind) % 8 }; | |
while k != 0 && j < limit { | |
*unsafe { flags.get_unchecked_mut(j) } &= mask[k]; j += i * Self::C1[k] as usize + c0[k] as usize; | |
k = (k + 1) % 8; | |
} | |
// loop unrolling | |
while j + i * 28 + Self::MOD_30[ibit] - 1 < limit { | |
*unsafe { flags.get_unchecked_mut(j) } &= mask[0]; j += i * Self::C1[0] as usize + c0[0] as usize; | |
*unsafe { flags.get_unchecked_mut(j) } &= mask[1]; j += i * Self::C1[1] as usize + c0[1] as usize; | |
*unsafe { flags.get_unchecked_mut(j) } &= mask[2]; j += i * Self::C1[2] as usize + c0[2] as usize; | |
*unsafe { flags.get_unchecked_mut(j) } &= mask[3]; j += i * Self::C1[3] as usize + c0[3] as usize; | |
*unsafe { flags.get_unchecked_mut(j) } &= mask[4]; j += i * Self::C1[4] as usize + c0[4] as usize; | |
*unsafe { flags.get_unchecked_mut(j) } &= mask[5]; j += i * Self::C1[5] as usize + c0[5] as usize; | |
*unsafe { flags.get_unchecked_mut(j) } &= mask[6]; j += i * Self::C1[6] as usize + c0[6] as usize; | |
*unsafe { flags.get_unchecked_mut(j) } &= mask[7]; j += i * Self::C1[7] as usize + c0[7] as usize; | |
} | |
while j < limit { | |
*unsafe { flags.get_unchecked_mut(j) } &= mask[k]; j += i * Self::C1[k] as usize + c0[k] as usize; | |
k = (k + 1) % 8; | |
} | |
*unsafe{ indecies.get_unchecked_mut(iind) } = (j << 3) | k; | |
f &= f.wrapping_sub(1); // elase least one bit | |
} | |
} | |
} | |
// {7, 11, 13, 17, 19, 23, 29} を素数リストに追加 | |
flags[0] = match x { | |
0..= 6 => 0x00, | |
7..=10 => 0x02, | |
11..=12 => 0x06, | |
13..=16 => 0x0e, | |
17..=18 => 0x1e, | |
19..=22 => 0x3e, | |
23..=28 => 0x7e, | |
29..=29 => 0xfe, | |
_ => 0xfe, | |
}; | |
// 結果出力 | |
Self { x, flags } | |
} | |
pub fn generate(x: u64) -> Self { | |
match USE_SIEVE_VERSION { | |
SieveVersion::V2 => Self::generate_v2(x), | |
SieveVersion::V6 => Self::generate_v6(x), | |
} | |
} | |
fn count(&self) -> u64 { | |
let mut count = match self.x { | |
0..=1 => 0, // count {} | |
2..=2 => 1, // count {2} | |
3..=4 => 2, // count {2, 3} | |
_ => 3, // count {2, 3, 5} | |
}; | |
for &e in self.flags.iter() { | |
count += e.count_ones() as u64; | |
} | |
count | |
} | |
fn _write_primes(&self, out: &mut dyn std::io::Write) { | |
for p in [2,3,5].iter().cloned().filter(|&p| p <= self.x) { | |
out.write_fmt(format_args!("{}\n", p)).unwrap(); | |
} | |
for i in 0..self.flags.len() { | |
let mut f = self.flags[i]; | |
while f != 0 { | |
let lsb = f & f.wrapping_neg(); // fetch least one bit | |
let ibit = Eratosthenes::bit_to_index(lsb); | |
let p = i as u64 * 30 + Eratosthenes::MOD_30[ibit] as u64; | |
out.write_fmt(format_args!("{}\n", p)).unwrap(); | |
f &= f.wrapping_sub(1); // elase least one bit | |
} | |
} | |
out.flush().unwrap(); | |
} | |
fn _write_last_primes(&self, out: &mut dyn std::io::Write) { | |
if let Some(i) = (0..self.flags.len()).rfind(|&i| self.flags[i] != 0) { | |
let mut f = self.flags[i]; | |
while f != 0 { | |
let lsb = f & f.wrapping_neg(); // fetch least one bit | |
let ibit = Eratosthenes::bit_to_index(lsb); | |
let p = i as u64 * 30 + Eratosthenes::MOD_30[ibit] as u64; | |
out.write_fmt(format_args!("{}\n", p)).unwrap(); | |
f &= f.wrapping_sub(1); // elase least one bit | |
} | |
} | |
out.flush().unwrap(); | |
} | |
} | |
/// 素数による整除判定用定数 | |
pub struct InvDivider { | |
/// 使用した素数篩の上限 | |
x: u64, | |
/// $`x`$ 以下の奇素数 $`P`$ について、 $`\operatorname{mod}2^{64}`$ での乗法逆元 $`Q`$ ($`P \times Q \mod 2^{64}`$)、 $`P`$ での除法の商の上限 $`\lfloor(2^{64}-1)/P\rfloor`$ の組 | |
/// | |
/// See [`u64mod_inv`], [`u64max_quot`], [`InvDivider::factor`] | |
id: Vec<(u64,u64)>, | |
} | |
impl InvDivider { | |
/// 素数による整除判定用定数の生成 | |
pub fn generate(er: &Eratosthenes) -> Self { | |
let x = er.x; | |
let mut id = Vec::with_capacity(approx_prime_pi(x) as usize); | |
id.push((u64mod_inv(3), u64max_quot(3))); | |
id.push((u64mod_inv(5), u64max_quot(5))); | |
let mut i30 = 0; | |
for i in 0..er.flags.len() { | |
let mut f = er.flags[i]; | |
while f != 0 { | |
let lsb = f & f.wrapping_neg(); // fetch least one bit | |
let ibit = Eratosthenes::bit_to_index(lsb); | |
let p = i30 + Eratosthenes::MOD_30[ibit] as u64; | |
// ゼロ除算エラーチェックの分岐生成防止に 1 と bitor して非ゼロ化 (元々奇数なので) | |
id.push((u64mod_inv(p), u64max_quot(p|1))); | |
f &= f.wrapping_sub(1); // elase least one bit | |
} | |
i30 += 30; | |
} | |
Self { x, id } | |
} | |
/// 単純な除算での試し割り法による素因数分解 | |
pub fn factor_plain(er: &Eratosthenes, n: u64) -> Vec<u64> { | |
// sqrt(n) 以下の素因数は全て事前生成済みであること | |
assert!(er.x.saturating_mul(er.x.saturating_add(2)) >= n); | |
let mut m = n; | |
let mut pv = vec![]; | |
if m == 0 { return pv; } | |
while (m % 2) == 0 { pv.push(2); m /= 2; } // 2の素因数 | |
while (m % 3) == 0 { pv.push(3); m /= 3; } // 3の素因数 | |
while (m % 5) == 0 { pv.push(5); m /= 5; } // 5の素因数 | |
let mut sqrtm30 = sqrt_floor(m) / 30; // (mの平方根)/30 を計算 | |
let mut i30 = 0; | |
for (i, f) in er.flags.iter().cloned().enumerate() { | |
let mut f = f; | |
while f != 0 { | |
let lsb = f & f.wrapping_neg(); // fetch least one bit | |
let ibit = Eratosthenes::bit_to_index(lsb); | |
let p = i30 + Eratosthenes::MOD_30[ibit] as u64; | |
if m % p == 0 { // 整除判定 | |
loop { | |
pv.push(p); // 素因数リストに追加 | |
m /= p; // 素数p で除算 | |
if m % p != 0 { break; } // 非整除判定 | |
} | |
sqrtm30 = sqrt_floor(m) / 30; // (mの平方根)/30 を再計算 | |
} | |
f &= f.wrapping_sub(1); // elase least one bit | |
} | |
if i as u64 > sqrtm30 { break; } // mの平方根 より 素数p が大きければ終了 | |
i30 += 30; | |
} | |
if m > 1 { pv.push(m); } // 余った素因数をリストに追加 | |
pv | |
} | |
/// ($`\operatorname{mod}2^{64}`$ 以外での)除法を極力用いない試し割り法による素因数分解 | |
/// | |
/// * $`P \times Q \mod 2^{64} = 1`$ を満たす 整数 $`Q`$ が存在するのは 整数 $`P`$ が奇数である場合である事に注意 | |
/// * 整数 $`P,Q,M`$ について、 $`P \times Q \mod 2^64 = 1, 1 \le P \lt 2^{64}, 0 \le M \lt 2^{64}`$ ならば | |
/// $`M \times Q \mod 2^{64} \le \lfloor(2^{64} - 1)/P\rfloor ⇔ M \mod P = 0 ⇔ (M × Q) \mod 2^{64} = M / P`$ | |
/// * 奇素数 $`P`$ `{p}` に対応する $`\operatorname{mod}2^{64}`$ における乗法逆元の整数 $`Q`$ `{pinv}`, $`\lfloor(2^{64} - 1)/P\rfloor`$ `{pdiv}` の値は `generate()` にて事前生成 | |
/// | |
/// See [`u64mod_inv`], [`u64max_quot`] | |
pub fn factor(&self, n: u64) -> Vec<u64> { | |
// sqrt(n) 以下の素因数は全て事前生成済みであること | |
assert!(self.x.saturating_mul(self.x.saturating_add(2)) >= n); | |
let mut m = n; | |
let mut pv = vec![]; | |
if m == 0 { return pv; } | |
while (m & 1) == 0 { pv.push(2); m >>= 1; } // 2の素因数は別途処理 | |
assert_eq!(m & 1, 1); // m は 奇数 | |
let mut i = 0; | |
while m > 1 { | |
// (2^64-1)/(mの平方根) を計算 | |
let isqrtm = u64max_quot(sqrt_floor(m).max(3)); | |
let limit_i = self.id | |
// pinv: 法2^64での乗法逆元, pdiv: floor((2^64-1)/p) | |
.binary_search_by(|(_pinv, pdiv)| isqrtm.cmp(pdiv)) | |
.unwrap_or_else(|e| e - 1) | |
.max(i); | |
if let Some((di, (pinv, pdiv))) = | |
self.id[i..=limit_i].iter().cloned().enumerate() | |
.find(|&(_di, (pinv, pdiv))| | |
// 整除判定, m % p == 0 | |
m.wrapping_mul(pinv) <= pdiv | |
) | |
{ | |
i += di + 1; | |
// 乗法逆元の乗法逆元は元の数 | |
let p = u64mod_inv(pinv); | |
loop { | |
// 素因数リストに追加 | |
pv.push(p); | |
// 素数p で除算, m /= p | |
m = m.wrapping_mul(pinv); | |
// 非整除判定, if m % p != 0 | |
if m.wrapping_mul(pinv) > pdiv { break; } | |
} | |
} else { | |
break; | |
} | |
} | |
if m > 1 { pv.push(m); } // 余った素因数をリストに追加 | |
pv | |
} | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn er_v2() { | |
bench_er_v2("10^2", 100, 25); | |
bench_er_v2("10^3", 1_000, 168); | |
bench_er_v2("10^4", 10_000, 1229); | |
bench_er_v2("10^5", 100_000, 9_592); | |
bench_er_v2("10^6", 1_000_000, 78_498); | |
bench_er_v2("10^7", 10_000_000, 664_579); | |
bench_er_v2("10^8", 100_000_000, 5_761_455); | |
bench_er_v2("10^9", 1_000_000_000, 50_847_534); | |
bench_er_v2("2^32", 1<<32, 203_280_221); | |
bench_er_v2("10^10", 10_000_000_000, 455_052_511); | |
} | |
#[test] fn er_v2_pow10_2() { bench_er_v2("10^2", 100, 25); } | |
#[test] fn er_v2_pow10_3() { bench_er_v2("10^3", 1_000, 168); } | |
#[test] fn er_v2_pow10_4() { bench_er_v2("10^4", 10_000, 1229); } | |
#[test] fn er_v2_pow10_5() { bench_er_v2("10^5", 100_000, 9_592); } | |
#[test] fn er_v2_pow10_6() { bench_er_v2("10^6", 1_000_000, 78_498); } | |
#[test] fn er_v2_pow10_7() { bench_er_v2("10^7", 10_000_000, 664_579); } | |
#[test] fn er_v2_pow10_8() { bench_er_v2("10^8", 100_000_000, 5_761_455); } | |
#[test] fn er_v2_pow10_9() { bench_er_v2("10^9", 1_000_000_000, 50_847_534); } | |
#[test] fn er_v2_pow2_32() { bench_er_v2("2^32", 1<<32, 203_280_221); } | |
#[test] fn er_v2_pow2_33() { bench_er_v2("2^33", 1<<33, 393_615_806); } | |
#[test] fn er_v2_pow10_10() { bench_er_v2("10^10", 10_000_000_000, 455_052_511); } | |
#[test] | |
fn er() { | |
bench_er_v6("10^2", 100, 25); | |
bench_er_v6("10^3", 1_000, 168); | |
bench_er_v6("10^4", 10_000, 1229); | |
bench_er_v6("10^5", 100_000, 9_592); | |
bench_er_v6("10^6", 1_000_000, 78_498); | |
bench_er_v6("10^7", 10_000_000, 664_579); | |
/* | |
bench_er_v6("2*10^7", 20_000_000, 1_270_607); | |
bench_er_v6("3*10^7", 30_000_000, 1_857_859); | |
bench_er_v6("4*10^7", 40_000_000, 2_433_654); | |
bench_er_v6("5*10^7", 50_000_000, 3_001_134); | |
bench_er_v6("6*10^7", 60_000_000, 3_562_115); | |
bench_er_v6("7*10^7", 70_000_000, 4_118_064); | |
bench_er_v6("8*10^7", 80_000_000, 4_669_382); | |
bench_er_v6("9*10^7", 90_000_000, 5_216_954); | |
*/ | |
bench_er_v6("10^8", 100_000_000, 5_761_455); | |
/* | |
bench_er_v6("2*10^8", 200_000_000, 11_078_937); | |
bench_er_v6("3*10^8", 300_000_000, 16_252_325); | |
bench_er_v6("4*10^8", 400_000_000, 21_336_326); | |
bench_er_v6("5*10^8", 500_000_000, 26_355_867); | |
bench_er_v6("6*10^8", 600_000_000, 31_324_703); | |
bench_er_v6("7*10^8", 700_000_000, 36_252_931); | |
bench_er_v6("8*10^8", 800_000_000, 41_146_179); | |
bench_er_v6("9*10^8", 900_000_000, 46_009_215); | |
*/ | |
bench_er_v6("10^9", 1_000_000_000, 50_847_534); | |
bench_er_v6("2^32", 1<<32, 203_280_221); | |
//bench_er_v6("2^33", 1<<33, 393_615_806); | |
bench_er_v6("10^10", 10_000_000_000, 455_052_511); | |
} | |
#[test] fn er_pow10_2() { bench_er_v6("10^2", 100, 25); } | |
#[test] fn er_pow10_3() { bench_er_v6("10^3", 1_000, 168); } | |
#[test] fn er_pow10_4() { bench_er_v6("10^4", 10_000, 1229); } | |
#[test] fn er_pow10_5() { bench_er_v6("10^5", 100_000, 9_592); } | |
#[test] fn er_pow10_6() { bench_er_v6("10^6", 1_000_000, 78_498); } | |
#[test] fn er_pow10_7() { bench_er_v6("10^7", 10_000_000, 664_579); } | |
#[test] fn er_pow10_8() { bench_er_v6("10^8", 100_000_000, 5_761_455); } | |
#[test] fn er_pow10_9() { bench_er_v6("10^9", 1_000_000_000, 50_847_534); } | |
#[test] fn er_pow2_30() { bench_er_v6("2^30", 1<<30, 54_400_028); } | |
#[test] fn er_pow2_31() { bench_er_v6("2^31", 1<<31, 105_097_565); } | |
#[test] fn er_pow2_32() { bench_er_v6("2^32", 1<<32, 203_280_221); } | |
#[test] fn er_pow2_33() { bench_er_v6("2^33", 1<<33, 393_615_806); } | |
#[test] fn er_pow10_10() { bench_er_v6("10^10", 10_000_000_000, 455_052_511); } | |
fn factor_test_cases() -> Vec<(u64, Vec<u64>)> { | |
vec![ | |
(3, vec![3]), | |
(11, vec![11]), | |
(111, vec![3, 37]), | |
(1111, vec![11, 101]), | |
(11111, vec![41, 271]), | |
(111111, vec![3, 7, 11, 13, 37]), | |
(1111111, vec![239, 4649]), | |
(11111111, vec![11, 73, 101, 137]), | |
(111111111, vec![3, 3, 37, 333667]), | |
(1111111111, vec![11, 41, 271, 9091]), | |
(11111111111, vec![21649, 513239]), | |
(111111111111, vec![3, 7, 11, 13, 37, 101, 9901]), | |
(1111111111111, vec![53, 79, 265371653]), | |
(11111111111111, vec![11, 239, 4649, 909091]), | |
(111111111111111, vec![3, 31, 37, 41, 271, 2906161]), | |
(1111111111111111, vec![11, 17, 73, 101, 137, 5882353]), | |
(11111111111111111, vec![2071723, 5363222357]), | |
(111111111111111111, vec![3, 3, 7, 11, 13, 19, 37, 52579, 333667]), | |
(1111111111111111111, vec![1111111111111111111]), | |
(6111111111111111111, vec![3, 2037037037037037037]), | |
(9223371915520229671, vec![3037000453, 3037000507]), | |
(9223372036854775783, vec![9223372036854775783]), | |
(11111111111111111111, vec![11, 41, 101, 271, 3541, 9091, 27961]), | |
(11297479344276717331, vec![3263519417, 3461747243]), | |
(18446744065119616769, vec![4294967279, 4294967311]), | |
(18446744073709551557, vec![18446744073709551557]), | |
(18446744073709551615, vec![3, 5, 17, 257, 641, 65537, 6700417]), | |
] | |
} | |
#[test] | |
fn factor_plain() { | |
let result = bench_factor(factor_test_cases().iter().cloned().map(|(a, _b)| a).collect(), false); | |
for (i, (_n, answer)) in factor_test_cases().iter().cloned().enumerate() { | |
assert_eq!(result[i], answer); | |
} | |
} | |
#[test] | |
fn factor_prediv() { | |
let result = bench_factor(factor_test_cases().iter().cloned().map(|(a, _b)| a).collect(), true); | |
for (i, (_n, answer)) in factor_test_cases().iter().cloned().enumerate() { | |
assert_eq!(result[i], answer); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment