149 lines
4.6 KiB
Rust
149 lines
4.6 KiB
Rust
use bevy_tasks::{ParallelIterator, TaskPoolBuilder};
|
|
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
|
|
|
struct ParChunks<'a, T>(std::slice::Chunks<'a, T>);
|
|
impl<'a, T> ParallelIterator<std::slice::Iter<'a, T>> for ParChunks<'a, T>
|
|
where
|
|
T: 'a + Send + Sync,
|
|
{
|
|
type Item = &'a T;
|
|
|
|
fn next_batch(&mut self) -> Option<std::slice::Iter<'a, T>> {
|
|
self.0.next().map(|s| s.iter())
|
|
}
|
|
}
|
|
|
|
struct ParChunksMut<'a, T>(std::slice::ChunksMut<'a, T>);
|
|
impl<'a, T> ParallelIterator<std::slice::IterMut<'a, T>> for ParChunksMut<'a, T>
|
|
where
|
|
T: 'a + Send + Sync,
|
|
{
|
|
type Item = &'a mut T;
|
|
|
|
fn next_batch(&mut self) -> Option<std::slice::IterMut<'a, T>> {
|
|
self.0.next().map(|s| s.iter_mut())
|
|
}
|
|
}
|
|
|
|
fn bench_overhead(c: &mut Criterion) {
|
|
fn noop(_: &mut usize) {};
|
|
|
|
let mut v = (0..10000).collect::<Vec<usize>>();
|
|
c.bench_function("overhead_iter", |b| {
|
|
b.iter(|| {
|
|
v.iter_mut().for_each(noop);
|
|
})
|
|
});
|
|
|
|
let mut v = (0..10000).collect::<Vec<usize>>();
|
|
let mut group = c.benchmark_group("overhead_par_iter");
|
|
for thread_count in &[1, 2, 4, 8, 16, 32] {
|
|
let pool = TaskPoolBuilder::new().num_threads(*thread_count).build();
|
|
group.bench_with_input(
|
|
BenchmarkId::new("threads", thread_count),
|
|
thread_count,
|
|
|b, _| {
|
|
b.iter(|| {
|
|
ParChunksMut(v.chunks_mut(100)).for_each(&pool, noop);
|
|
})
|
|
},
|
|
);
|
|
}
|
|
group.finish();
|
|
}
|
|
|
|
fn bench_for_each(c: &mut Criterion) {
|
|
fn busy_work(n: usize) {
|
|
let mut i = n;
|
|
while i > 0 {
|
|
i = black_box(i - 1);
|
|
}
|
|
}
|
|
|
|
let mut v = (0..10000).collect::<Vec<usize>>();
|
|
c.bench_function("for_each_iter", |b| {
|
|
b.iter(|| {
|
|
v.iter_mut().for_each(|x| {
|
|
busy_work(10000);
|
|
*x *= *x;
|
|
});
|
|
})
|
|
});
|
|
|
|
let mut v = (0..10000).collect::<Vec<usize>>();
|
|
let mut group = c.benchmark_group("for_each_par_iter");
|
|
for thread_count in &[1, 2, 4, 8, 16, 32] {
|
|
let pool = TaskPoolBuilder::new().num_threads(*thread_count).build();
|
|
group.bench_with_input(
|
|
BenchmarkId::new("threads", thread_count),
|
|
thread_count,
|
|
|b, _| {
|
|
b.iter(|| {
|
|
ParChunksMut(v.chunks_mut(100)).for_each(&pool, |x| {
|
|
busy_work(10000);
|
|
*x *= *x;
|
|
});
|
|
})
|
|
},
|
|
);
|
|
}
|
|
group.finish();
|
|
}
|
|
|
|
fn bench_many_maps(c: &mut Criterion) {
|
|
fn busy_doubles(mut x: usize, n: usize) -> usize {
|
|
for _ in 0..n {
|
|
x = black_box(x.wrapping_mul(2));
|
|
}
|
|
x
|
|
}
|
|
|
|
let v = (0..10000).collect::<Vec<usize>>();
|
|
c.bench_function("many_maps_iter", |b| {
|
|
b.iter(|| {
|
|
v.iter()
|
|
.map(|x| busy_doubles(*x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.for_each(drop);
|
|
})
|
|
});
|
|
|
|
let v = (0..10000).collect::<Vec<usize>>();
|
|
let mut group = c.benchmark_group("many_maps_par_iter");
|
|
for thread_count in &[1, 2, 4, 8, 16, 32] {
|
|
let pool = TaskPoolBuilder::new().num_threads(*thread_count).build();
|
|
group.bench_with_input(
|
|
BenchmarkId::new("threads", thread_count),
|
|
thread_count,
|
|
|b, _| {
|
|
b.iter(|| {
|
|
ParChunks(v.chunks(100))
|
|
.map(|x| busy_doubles(*x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.map(|x| busy_doubles(x, 1000))
|
|
.for_each(&pool, drop);
|
|
})
|
|
},
|
|
);
|
|
}
|
|
group.finish();
|
|
}
|
|
|
|
criterion_group!(benches, bench_overhead, bench_for_each, bench_many_maps);
|
|
criterion_main!(benches);
|