diff --git a/sub_crates/oct32norm/benches/bench.rs b/sub_crates/oct32norm/benches/bench.rs
index 18a6940..012d7fb 100644
--- a/sub_crates/oct32norm/benches/bench.rs
+++ b/sub_crates/oct32norm/benches/bench.rs
@@ -1,26 +1,38 @@
 use bencher::{benchmark_group, benchmark_main, black_box, Bencher};
-use oct32norm::{decode, encode};
+use oct32norm::{decode, encode, encode_precise};
 use rand::{rngs::SmallRng, FromEntropy, Rng};
 
 //----
 
-fn encode_100_values(bench: &mut Bencher) {
+fn encode_1000_values(bench: &mut Bencher) {
     let mut rng = SmallRng::from_entropy();
     bench.iter(|| {
         let x = rng.gen::<f32>() - 0.5;
         let y = rng.gen::<f32>() - 0.5;
         let z = rng.gen::<f32>() - 0.5;
-        for _ in 0..100 {
+        for _ in 0..1000 {
             black_box(encode(black_box((x, y, z))));
         }
     });
 }
 
-fn decode_100_values(bench: &mut Bencher) {
+fn encode_precise_1000_values(bench: &mut Bencher) {
+    let mut rng = SmallRng::from_entropy();
+    bench.iter(|| {
+        let x = rng.gen::<f32>() - 0.5;
+        let y = rng.gen::<f32>() - 0.5;
+        let z = rng.gen::<f32>() - 0.5;
+        for _ in 0..1000 {
+            black_box(encode_precise(black_box((x, y, z))));
+        }
+    });
+}
+
+fn decode_1000_values(bench: &mut Bencher) {
     let mut rng = SmallRng::from_entropy();
     bench.iter(|| {
         let v = rng.gen::<u32>();
-        for _ in 0..100 {
+        for _ in 0..1000 {
             black_box(decode(black_box(v)));
         }
     });
@@ -28,5 +40,10 @@ fn decode_100_values(bench: &mut Bencher) {
 
 //----
 
-benchmark_group!(benches, encode_100_values, decode_100_values,);
+benchmark_group!(
+    benches,
+    encode_1000_values,
+    encode_precise_1000_values,
+    decode_1000_values,
+);
 benchmark_main!(benches);
diff --git a/sub_crates/oct32norm/src/lib.rs b/sub_crates/oct32norm/src/lib.rs
index 46c1086..6f54ebb 100644
--- a/sub_crates/oct32norm/src/lib.rs
+++ b/sub_crates/oct32norm/src/lib.rs
@@ -4,29 +4,60 @@
 //! of Efficient Representations for Independent Unit Vectors" by
 //! Cigolle et al.
 
+const STEP_SIZE: f32 = 1.0 / STEPS;
+const STEPS: f32 = ((1 << (16 - 1)) - 1) as f32;
+
 /// Encodes a vector of three floats to the oct32 format.
 ///
 /// The input vector does not need to be normalized--only the direction
 /// matters to the encoding process, not the length.
 #[inline]
 pub fn encode(vec: (f32, f32, f32)) -> u32 {
-    let l1_norm = vec.0.abs() + vec.1.abs() + vec.2.abs();
-    let v0_norm = vec.0 / l1_norm;
-    let v1_norm = vec.1 / l1_norm;
+    let (u, v) = vec3_to_oct(vec);
+    ((to_snorm_16(u) as u32) << 16) | to_snorm_16(v) as u32
+}
 
-    let (u, v) = if vec.2 < 0.0 {
-        (
-            u32::from(to_snorm_16((1.0 - v1_norm.abs()) * sign(vec.0))),
-            u32::from(to_snorm_16((1.0 - v0_norm.abs()) * sign(vec.1))),
-        )
-    } else {
-        (
-            u32::from(to_snorm_16(v0_norm)),
-            u32::from(to_snorm_16(v1_norm)),
-        )
+/// Encodes a vector of three floats to the oct32 format.
+///
+/// This is the same as `encode()` except that it is slower and encodes
+/// with slightly better precision.
+pub fn encode_precise(vec: (f32, f32, f32)) -> u32 {
+    #[inline(always)]
+    fn dot_norm(a: (f32, f32, f32), b: (f32, f32, f32)) -> f64 {
+        let l = ((a.0 as f64 * a.0 as f64) + (a.1 as f64 * a.1 as f64) + (a.2 as f64 * a.2 as f64))
+            .sqrt();
+        ((a.0 as f64 * b.0 as f64) + (a.1 as f64 * b.1 as f64) + (a.2 as f64 * b.2 as f64)) / l
+    }
+
+    // Calculate the initial floored version.
+    let s = {
+        let mut s = vec3_to_oct(vec); // Remap to the square.
+        s.0 = (s.0.max(-1.0).min(1.0) * STEPS).floor() * STEP_SIZE;
+        s.1 = (s.1.max(-1.0).min(1.0) * STEPS).floor() * STEP_SIZE;
+        s
     };
 
-    (u << 16) | v
+    // Test all combinations of floor and ceil and keep the best.
+    // Note that at +/- 1, this will exit the square, but that
+    // will be a worse encoding and never win.
+    let mut best_rep = s;
+    let mut max_dot = 0.0;
+    for &(i, j) in &[
+        (0.0, 0.0),
+        (0.0, STEP_SIZE),
+        (STEP_SIZE, 0.0),
+        (STEP_SIZE, STEP_SIZE),
+    ] {
+        let candidate = (s.0 + i, s.1 + j);
+        let oct = oct_to_vec3(candidate);
+        let dot = dot_norm(oct, vec);
+        if dot > max_dot {
+            best_rep = candidate;
+            max_dot = dot;
+        }
+    }
+
+    ((to_snorm_16(best_rep.0) as u32) << 16) | to_snorm_16(best_rep.1) as u32
 }
 
 /// Decodes from an oct32 to a vector of three floats.
@@ -35,27 +66,45 @@ pub fn encode(vec: (f32, f32, f32)) -> u32 {
 /// needs a normalized vector should normalize the returned vector.
 #[inline]
 pub fn decode(n: u32) -> (f32, f32, f32) {
-    let mut vec0 = from_snorm_16((n >> 16) as u16);
-    let mut vec1 = from_snorm_16(n as u16);
-    let vec2 = 1.0 - (vec0.abs() + vec1.abs());
+    oct_to_vec3((from_snorm_16((n >> 16) as u16), from_snorm_16(n as u16)))
+}
+
+#[inline(always)]
+fn vec3_to_oct(vec: (f32, f32, f32)) -> (f32, f32) {
+    let l1_norm = vec.0.abs() + vec.1.abs() + vec.2.abs();
+    let u = vec.0 / l1_norm;
+    let v = vec.1 / l1_norm;
+
+    if vec.2 > 0.0 {
+        (u, v)
+    } else {
+        ((1.0 - v.abs()) * sign(vec.0), (1.0 - u.abs()) * sign(vec.1))
+    }
+}
+
+#[inline(always)]
+fn oct_to_vec3(oct: (f32, f32)) -> (f32, f32, f32) {
+    let vec2 = 1.0 - (oct.0.abs() + oct.1.abs());
 
     if vec2 < 0.0 {
-        let old_x = vec0;
-        vec0 = (1.0 - vec1.abs()) * sign(old_x);
-        vec1 = (1.0 - old_x.abs()) * sign(vec1);
+        (
+            (1.0 - oct.1.abs()) * sign(oct.0),
+            (1.0 - oct.0.abs()) * sign(oct.1),
+            vec2,
+        )
+    } else {
+        (oct.0, oct.1, vec2)
     }
-
-    (vec0, vec1, vec2)
 }
 
 #[inline(always)]
 fn to_snorm_16(n: f32) -> u16 {
-    (n * ((1u32 << (16 - 1)) - 1) as f32).round() as i16 as u16
+    (n * STEPS).round() as i16 as u16
 }
 
 #[inline(always)]
 fn from_snorm_16(n: u16) -> f32 {
-    f32::from(n as i16) * (1.0f32 / ((1u32 << (16 - 1)) - 1) as f32)
+    f32::from(n as i16) * STEP_SIZE
 }
 
 #[inline(always)]
@@ -75,21 +124,27 @@ mod tests {
     fn axis_directions() {
         let px = (1.0, 0.0, 0.0);
         let px_oct = encode(px);
+        let px_octp = encode_precise(px);
 
         let nx = (-1.0, 0.0, 0.0);
         let nx_oct = encode(nx);
+        let nx_octp = encode_precise(nx);
 
         let py = (0.0, 1.0, 0.0);
         let py_oct = encode(py);
+        let py_octp = encode_precise(py);
 
         let ny = (0.0, -1.0, 0.0);
         let ny_oct = encode(ny);
+        let ny_octp = encode_precise(ny);
 
         let pz = (0.0, 0.0, 1.0);
         let pz_oct = encode(pz);
+        let pz_octp = encode_precise(pz);
 
         let nz = (0.0, 0.0, -1.0);
         let nz_oct = encode(nz);
+        let nz_octp = encode_precise(nz);
 
         assert_eq!(px, decode(px_oct));
         assert_eq!(nx, decode(nx_oct));
@@ -97,5 +152,12 @@ mod tests {
         assert_eq!(ny, decode(ny_oct));
         assert_eq!(pz, decode(pz_oct));
         assert_eq!(nz, decode(nz_oct));
+
+        assert_eq!(px, decode(px_octp));
+        assert_eq!(nx, decode(nx_octp));
+        assert_eq!(py, decode(py_octp));
+        assert_eq!(ny, decode(ny_octp));
+        assert_eq!(pz, decode(pz_octp));
+        assert_eq!(nz, decode(nz_octp));
     }
 }
diff --git a/sub_crates/oct32norm/tests/proptest_tests.rs b/sub_crates/oct32norm/tests/proptest_tests.rs
index 7cf8d71..3234055 100644
--- a/sub_crates/oct32norm/tests/proptest_tests.rs
+++ b/sub_crates/oct32norm/tests/proptest_tests.rs
@@ -2,7 +2,7 @@
 extern crate proptest;
 extern crate oct32norm;
 
-use oct32norm::{decode, encode};
+use oct32norm::{decode, encode, encode_precise};
 use proptest::test_runner::Config;
 
 /// Calculates the cosine of the angle between the two vectors,
@@ -56,18 +56,23 @@ proptest! {
     #[test]
     fn pt_roundtrip_angle_precision(v in (-1.0f32..1.0, -1.0f32..1.0, -1.0f32..1.0)) {
         let oct = encode(v);
-        let v2 = decode(oct);
+        let octp = encode_precise(v);
 
         // Check if the angle between the original and the roundtrip
         // is less than 0.004 degrees
-        assert!(cos_gt(v, v2, 0.9999999976));
+        assert!(cos_gt(v, decode(oct), 0.9999999976));
+
+        // Check if the angle between the original and the roundtrip
+        // is less than 0.003 degrees
+        assert!(cos_gt(v, decode(octp), 0.9999999986));
     }
 
     #[test]
     fn pt_roundtrip_component_precision(v in (-1.0f32..1.0, -1.0f32..1.0, -1.0f32..1.0)) {
         let oct = encode(v);
-        let v2 = decode(oct);
+        let octp = encode_precise(v);
 
-        assert!(l1_delta_lt(v, v2, 0.00005));
+        assert!(l1_delta_lt(v, decode(oct), 0.00005));
+        assert!(l1_delta_lt(v, decode(octp), 0.00003));
     }
 }