Browse Source

Recombine mass, s and child data into a node tuple

With the values spread across three Vecs, this means that we touch three
cache lines when we need all values. That would be fine if we went
through the data sequentially, but since the octree makes us jump around
wildly in memory, we're likely only using a single value from each cache
line.

Since we need all four values in the hot path, and all four togehter
nicely fit into a single cache line, it's more efficient to stuff them
together.

Really this should be a struct, but I'm lazy...
opt
Björn Steinbrink 8 months ago
parent
commit
636a7cf356
1 changed files with 14 additions and 23 deletions
  1. +14
    -23
      src/barnes_hut.rs

+ 14
- 23
src/barnes_hut.rs View File

@ -7,25 +7,21 @@ const G: f64 = 0.00667408;
const THETA: f64 = 0.5;
pub struct Octree {
children: Vec<(isize, u8)>, // TODO make this an Option<[usize; 8]>
mass: Vec<f64>,
nodes: Vec<(f64, f64, usize, u8)>, // mass, s, child base, child mask ~ 1 cache line
min_coord: Vec<Vector3D>,
max_coord: Vec<Vector3D>,
avg_weighted_position: Vec<Vector3D>, // Divide by mass for this to be accurate!
center_of_mass: Vec<Vector3D>, // Calculated once from avg_weighted_position
s: Vec<f64>,
}
impl Octree {
pub fn new(min: Vector3D, max: Vector3D, n: usize) -> Self {
let mut o = Octree {
children: Vec::with_capacity(n),
mass: Vec::with_capacity(n),
nodes: Vec::with_capacity(n),
min_coord: Vec::with_capacity(n),
max_coord: Vec::with_capacity(n),
avg_weighted_position: Vec::with_capacity(n),
center_of_mass: Vec::with_capacity(n),
s: Vec::with_capacity(n),
};
let width = max.x - min.x;
let height = max.y - min.y;
@ -33,12 +29,10 @@ impl Octree {
let s = if width > height { width } else { height };
let s = if depth > s { depth } else { s };
o.children.push((-1, 0));
o.mass.push(0.0);
o.nodes.push((0.0, s, 0, 0));
o.min_coord.push(min);
o.max_coord.push(max);
o.avg_weighted_position.push(Vector3D::new(0.0, 0.0, 0.0));
o.s.push(s);
o
}
@ -46,8 +40,8 @@ impl Octree {
self.add_particles_to_specific_node(&particles, 0);
//calculate center of masses
for (&mass, &awp) in self.mass.iter().zip(&self.avg_weighted_position) {
self.center_of_mass.push(awp / mass);
for (node, &awp) in self.nodes.iter().zip(&self.avg_weighted_position) {
self.center_of_mass.push(awp / node.0);
}
}
@ -62,8 +56,8 @@ impl Octree {
let dis_vec = self.center_of_mass[cur_node] - particle.position;
let d_squared = dis_vec.dist_squared();
if self.children[cur_node].1 == 0 || {
let s = self.s[cur_node];
let (mass, s, mut child, mask) = self.nodes[cur_node];
if mask == 0 || {
let s_squared = s * s;
let theta_squared = THETA * THETA;
s_squared < theta_squared * d_squared
@ -74,13 +68,12 @@ impl Octree {
if d_squared < 0.1 { return; } // Worse, but higher-performance, method of smoothing
let dist = d_squared.sqrt();
let v = G * self.mass[cur_node] * delta / (dist * d_squared); // / (d_squared + 0.1).sqrt(); // Uncomment this to enable smoothing for short distances
let v = G * mass * delta / (dist * d_squared); // / (d_squared + 0.1).sqrt(); // Uncomment this to enable smoothing for short distances
particle.velocity += v * dis_vec;
}
else {
//println!("A{}", self.nodes[cur_node].num_particles);
//Divide and conquer
let (mut child, mask) = self.children[cur_node];
for c in 0..8 {
if mask & (1 << c) != 0 {
self.barnes_hut_specific_node(particle, child as usize, delta);
@ -96,7 +89,7 @@ impl Octree {
}
if particles.len() == 1 {
self.mass[cur_node] += particles[0].mass;
self.nodes[cur_node].0 += particles[0].mass;
self.avg_weighted_position[cur_node] += particles[0].position * particles[0].mass;
return;
}
@ -116,7 +109,7 @@ impl Octree {
for &particle in particles {
// Update ourself
self.mass[cur_node] += particle.mass;
self.nodes[cur_node].0 += particle.mass;
self.avg_weighted_position[cur_node] += particle.position * particle.mass;
let octant_index = Octree::get_id_from_center(center_point, particle.position);
@ -124,11 +117,11 @@ impl Octree {
}
//Recurse
let idx = self.mass.len();
self.children[cur_node].0 = idx as isize;
let idx = self.nodes.len();
self.nodes[cur_node].2 = idx;
for (i, particle_oct) in particle_octs.iter().enumerate() {
if !particle_oct.is_empty() {
self.children[cur_node].1 |= 1 << i;
self.nodes[cur_node].3 |= 1 << i;
let (min, max) = Octree::get_octant_bounding_box_from_id(self.min_coord[cur_node],
self.max_coord[cur_node],
@ -141,12 +134,10 @@ impl Octree {
let s = if width > height { width } else { height };
let s = if depth > s { depth } else { s };
self.children.push((-1, 0));
self.mass.push(0.0);
self.nodes.push((0.0, s, 0, 0));
self.min_coord.push(min);
self.max_coord.push(max);
self.avg_weighted_position.push(Vector3D::new(0.0, 0.0, 0.0));
self.s.push(s);
}
}


Loading…
Cancel
Save