Compare commits

..

66 Commits

Author SHA1 Message Date
a9c2fb1d01 Name a constant better. 2023-08-04 00:51:02 +02:00
79b9bd5f49 Fix broken tests that I forgot to update. 2023-08-04 00:46:16 +02:00
f4b7767198 Move ulp increment/decrement functions into rmath utils. 2023-08-04 00:45:44 +02:00
911542c534 Handle NaNs in a reasonable way in rmath's ulp float functions. 2023-08-03 23:55:26 +02:00
f9acc8096c Added more tests and documentation for float ulps functions in rmath. 2023-08-03 23:34:30 +02:00
76c56e16f9 Add some (commented out) experiments with Halton.
Halton has less correlation artifacts than Sobol for DoF, but
converges slower.
2022-08-17 15:24:20 -07:00
4058c63637 Add DoF back to PsychoBlend exporter. 2022-08-17 13:43:39 -07:00
cb01d1aaea Add an f32() method to Rng. 2022-08-17 12:13:29 -07:00
f0e5d538b7 Use non-Owen-scrambled samples for motion blur and DoF.
This gives notably better results because it avoids what I'm
calling "sample overlap", which is an issue with jittering
approaches like Owen scrambling.  In general, of course, Owen
scrambling improves things.  But particularly for motion blur
it seems to cause issues.
2022-08-17 00:09:02 -07:00
76781eb639 Use new fast hash for base-4 Owen scrambling. 2022-08-14 13:29:14 -07:00
c603e24633 Make World own its own memory, along with distant disk lights. 2022-08-07 13:54:18 -07:00
fc7b8da17d Remove unused imports. 2022-08-07 13:29:40 -07:00
b5bf580b96 Make Camera own its own memory. 2022-08-07 13:28:00 -07:00
6decc48648 PsychoBlend: fix broken material ui panel. 2022-08-07 11:17:25 -07:00
e244664b32 Move shader bindings to objects rather than instances. 2022-08-07 11:05:34 -07:00
1c801ee605 PsychoBlend: implement object and material export.
Material bindings don't work, since they're now on the objects
themselves rather than the instances, and I haven't updated
Psychopath itself for that yet.
2022-08-07 10:23:54 -07:00
d132e6a015 Some minor cleanup in the Blender/Psychopath communication code. 2022-08-07 08:25:10 -07:00
69ace90689 Continue WIP update PsychoBlend for Blender 3.x.
It exports and renders successfully... except there are no objects.
Just a blank background.
2022-08-06 21:40:13 -07:00
6d7b8b280f WIP update PsychoBlend for Blender 3.x.
This just makes the UI not break.  Exporting/rendering still
doesn't work.
2022-08-06 13:50:40 -07:00
1d05063190 Poking at what the new file/streaming format might be. 2022-08-04 15:40:09 -07:00
2bb45a9876 Add streaming data tree parser sub-crate.
Not used yet.
2022-08-04 13:50:13 -07:00
e1c983a7e6 Added benchmarks to rrand sub-crate.
Also misc naming cleanup.
2022-08-04 11:52:49 -07:00
a12de4c3d7 Wrap sampling logic/tracking in a struct. 2022-08-04 11:11:25 -07:00
1f7c412e25 Benchmarks and precision tests for RMath sub-crate. 2022-08-03 17:10:27 -07:00
167d70b8df Made the hilbert spiral order a little more pleasant. 2022-08-03 10:55:51 -07:00
15cd261026 Fix bug resulting in cracks between triangles in some cases. 2022-08-03 10:23:26 -07:00
9569f772f0 Fix incorrect background color handling in glossy reflections.
Bug introduced in the previous refector removing the LightPath struct.
2022-08-03 00:07:03 -07:00
77ac8ef9f2 Implement "hilbert spiral" bucket rendering order. 2022-08-02 23:58:56 -07:00
7c750dcded Directly specify bucket size instead of inferring from sample count.
Since we aren't doing breadth-first ray tracing anymore, this makes
a lot more sense.
2022-08-02 19:41:23 -07:00
72b8397e9d Finished getting rid of the LightPath struct.
Also misc cleanup of related code.
2022-08-02 18:55:25 -07:00
6ccd4e306d WIP getting rid of LightPath struct.
Committing at this point because:
1. It compiles.
2. Rendering is totally wrong, but in a cool way.
2022-08-02 17:29:05 -07:00
181f7a6b85 Add convenience script for building with target native cpu. 2022-08-02 15:13:52 -07:00
608fe8bda1 Switch to colorbox and jakob upsampling for color handling. 2022-08-02 00:18:12 -07:00
5d246e66fa Remove stats that we can't reasonably collect anymore. 2022-08-01 22:57:13 -07:00
8bc6b24004 Switch to CIE XYZ lookup tables. 2022-08-01 22:04:14 -07:00
caed4c67de Do depth-first instead of breadth-first ray tracing.
This simplifies a lot of code, and will make experimenting with
other things a lot more straightforward.
2022-08-01 15:26:38 -07:00
98a9aeb374 Minor tweaks to Owen scrambling functions. 2022-07-25 15:43:07 -07:00
ef489c1ca2 Minor cleanup of the Owen scramble code. 2022-07-23 14:17:38 -07:00
f95e869848 Give Owen scramble functions their own hash.
This lets us move the seeding overhead outside the main loop,
which in turn lets us avoid taking it every round.
2022-07-23 13:24:24 -07:00
40d643b334 Put hilbert and morton code into one module. 2022-07-23 12:57:57 -07:00
0df18ce908 New hash seeding approach.
There didn't seem to be any issues in practice with the last
approach, but I thought of some other ways things could in theory
interact badly.  This fixes that.
2022-07-21 14:18:55 -07:00
570878d052 Eliminate branch in per-bit Owen scrambling functions. 2022-07-21 12:28:47 -07:00
f5a0210cdf More principled seeding approach in the hash functions. 2022-07-21 12:15:36 -07:00
7082f2d7f4 Update hash functions to known good ones instead of bespoke ones. 2022-07-21 05:22:22 -07:00
0d71ae86db Noticed that z-scrambling is actually just base-4 Owen scrambling.
Updated function name and comments to reflect that.
2022-07-21 04:44:05 -07:00
6b7538e25f Make the z-scrambling table smaller with bit fiddling.
This gets it down to 24 bytes.
2022-07-21 04:20:03 -07:00
ec9a121e72 Implement screen-space blue-noise sampling properly. 2022-07-21 03:59:47 -07:00
83b48f722d Simpler way to implement screen-space blue-noise sampling.
We now do the index scrambling at the top of the sampling loop,
which is also faster since we only have to run it once per pixel
instead of once per sample.
2022-07-20 18:54:38 -07:00
86814dbf8f Minor rearrangement of ray generation sampling code. 2022-07-17 21:18:46 -07:00
89429ed9f0 Fix silly bug in light transforms introduced during the switch to RMath. 2022-07-17 17:40:14 -07:00
d55ec9b025 Update psychopath code to work with RMath changes. 2022-07-17 17:24:58 -07:00
6dbdcba91a Whole bunch of cleanup on RMath. 2022-07-17 16:37:15 -07:00
e2044e6579 Implement simple screen-space blue-noise diffusion sampling.
From the paper "Screen-Space Blue-Noise Diffusion of Monte Carlo
Sampling Error via Hierarchical Ordering of Pixels" by Ahmed et al.
2022-07-16 19:35:23 -07:00
ea4ba81110 Use faster routines where precision isn't needed. 2022-07-16 01:09:33 -07:00
8dcf093dbb RMath: first pass at an SSE implementation. 2022-07-16 00:03:09 -07:00
08e2e6eb06 Convert Psychopath over to use new RMath library. 2022-07-15 21:42:35 -07:00
a84da943d0 RMath: implement transform composition. 2022-07-15 17:51:57 -07:00
5535775006 RMath: implement Bool4 type. 2022-07-15 15:20:44 -07:00
fa7be4e58c RMath: change fallback Float4 to be a tuple-struct. 2022-07-15 00:49:37 -07:00
a93a3f09da RMath: implement cross product and bring back some unit tests. 2022-07-15 00:39:14 -07:00
42cd282c47 RMath: implement transforms for Vector, Point, and Normal. 2022-07-14 23:23:22 -07:00
d8e1437db1 RMath: implement vector-matrix multiplication. 2022-07-14 19:02:08 -07:00
c398387b55 Implement dot products and 3x3 matrix inversion.
Both precise and fast versions.  But untested, so might be
incorrect!
2022-07-14 15:30:30 -07:00
8a695a7694 Some shuffling of the math sub-crate's organization. 2022-07-14 12:31:32 -07:00
732dee958e Remove Mat3x3 from math3d lib.
It was an extraneous abstraction.
2022-07-14 00:33:38 -07:00
658e4746ca Start work on new linear algebra library. 2022-07-13 18:54:44 -07:00
80 changed files with 7619 additions and 4067 deletions

66
Cargo.lock generated
View File

@ -1,5 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ansi_term"
version = "0.11.0"
@ -9,15 +11,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "approx"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f2a05fd1bd10b2527e20a2cd32d8873d115b8b39fe219ee25f42a8aca6ba278"
dependencies = [
"num-traits",
]
[[package]]
name = "arrayvec"
version = "0.5.2"
@ -133,6 +126,15 @@ dependencies = [
[[package]]
name = "color"
version = "0.1.0"
dependencies = [
"colorbox",
]
[[package]]
name = "colorbox"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d27d55561009760957654f467735e73806f8bc2d081cc4a22e93403ecd156fc"
[[package]]
name = "compact"
@ -155,6 +157,10 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24ce9782d4d5c53674646a6a4c1863a21a8fc0cb649b3c94dfc16e45071dea19"
[[package]]
name = "data_tree"
version = "0.1.0"
[[package]]
name = "fastapprox"
version = "0.3.0"
@ -184,12 +190,6 @@ dependencies = [
"wasi",
]
[[package]]
name = "glam"
version = "0.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "411e0584defa447c328f25c756ba3d0685727ecc126b46c3c1176001141cd4b6"
[[package]]
name = "half"
version = "1.7.1"
@ -240,14 +240,6 @@ version = "0.2.94"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18794a8ad5b29321f790b55d93dfba91e125cb1a9edbd4f8e3150acc771c1a5e"
[[package]]
name = "math3d"
version = "0.1.0"
dependencies = [
"approx",
"glam",
]
[[package]]
name = "memchr"
version = "2.4.0"
@ -350,20 +342,22 @@ dependencies = [
"bvh_order",
"clap",
"color",
"colorbox",
"compact",
"copy_in_place",
"crossbeam",
"data_tree",
"fastapprox",
"glam",
"half",
"halton",
"kioku",
"lazy_static",
"math3d",
"nom",
"num_cpus",
"openexr",
"png_encode_mini",
"rmath",
"rrand",
"rustc-serialize",
"scoped_threadpool",
"sobol_burley",
@ -569,6 +563,22 @@ dependencies = [
"winapi",
]
[[package]]
name = "rmath"
version = "0.1.0"
dependencies = [
"bencher",
"rand 0.6.5",
]
[[package]]
name = "rrand"
version = "0.1.0"
dependencies = [
"bencher",
"rand 0.6.5",
]
[[package]]
name = "rustc-serialize"
version = "0.3.24"
@ -607,15 +617,15 @@ checksum = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8"
[[package]]
name = "sobol_burley"
version = "0.3.0"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26e3528b09b1f1b1e152342a4462d1e80d568dc5623a0772252a6e584a53d550"
checksum = "3441b32bbb896e372f1b8e7eb51a983713aef99599c32c0eb69183aa490cb6a0"
[[package]]
name = "spectral_upsampling"
version = "0.1.0"
dependencies = [
"glam",
"rmath",
]
[[package]]

View File

@ -3,8 +3,10 @@ members = [
"sub_crates/bvh_order",
"sub_crates/color",
"sub_crates/compact",
"sub_crates/data_tree",
"sub_crates/halton",
"sub_crates/math3d",
"sub_crates/rmath",
"sub_crates/rrand",
"sub_crates/spectral_upsampling",
]
@ -30,13 +32,14 @@ nom = "5"
num_cpus = "1.8"
openexr = "0.7"
kioku = "0.3"
sobol_burley = "0.3"
sobol_burley = "0.4"
png_encode_mini = "0.1.2"
rustc-serialize = "0.3"
scoped_threadpool = "0.1"
time = "0.1"
glam = "0.15"
fastapprox = "0.3"
colorbox = "0.3"
# Local crate dependencies
[dependencies.bvh_order]
@ -47,12 +50,18 @@ path = "sub_crates/color"
[dependencies.compact]
path = "sub_crates/compact"
[dependencies.halton]
[dependencies.data_tree]
path = "sub_crates/data_tree"
[dependencies.halton]
path = "sub_crates/halton"
[dependencies.math3d]
path = "sub_crates/math3d"
[dependencies.rmath]
path = "sub_crates/rmath"
[dependencies.rrand]
path = "sub_crates/rrand"
[dependencies.spectral_upsampling]
path = "sub_crates/spectral_upsampling"

View File

@ -13,9 +13,7 @@ efficiently handle very large data sets, complex shading, motion blur, color
management, etc. presents a much richer and more challenging problem space to
explore than just writing a basic path tracer.
## Building
Psychopath is written in [Rust](https://www.rust-lang.org), and is pretty
straightforward to build except for its OpenEXR dependency.
@ -36,7 +34,6 @@ documented in the [OpenEXR-rs readme](https://github.com/cessen/openexr-rs/blob/
Once those environment variables are set, then you should be able to build using
the same simple cargo command above.
# PsychoBlend
Included in the repository is an add-on for [Blender](http://www.blender.org)
@ -53,6 +50,15 @@ doesn't support them yet.
- Exports dupligroups with full hierarchical instancing
- Limited auto-detection of instanced meshes
# Contributing
I'm not looking for contributions right now, and I'm likely to reject pull
requests. This is currently a solo project and I like it that way.
However, if you're looking for projects _related_ to Psychopath to contribute to,
[OpenEXR-rs](https://github.com/cessen/openexr-rs) is definitely a
collaborative project that I would love more help with! And I fully expect more
such projects to come out of Psychopath in the future.
# License
@ -63,13 +69,3 @@ See LICENSE.md for details. But the gist is:
* Most crates under the `sub_crates` directory are dual-licensed under MIT and Apache 2.0 (but with some exceptions--see each crate for its respective licenses).
The intent of this scheme is to keep Psychopath itself copyleft, while allowing smaller reusable components to be licensed more liberally.
# Contributing
This is a personal, experimental, for-fun project, and I am specifically
not looking for contributions of any kind. All PRs will be rejected
without review.
However, feel free to fork this into an entirely new project, or examine
the code for ideas for a project of your own.

2
build_native.sh Executable file
View File

@ -0,0 +1,2 @@
#!/bin/sh
RUSTFLAGS="-C target-cpu=native" cargo build --release

View File

@ -1,60 +1,73 @@
Scene $Scene_fr1 {
Output {
Path ["test_renders/cube.png"]
}
RenderSettings {
Resolution [960 540]
SamplesPerPixel [16]
Seed [1]
}
Camera {
Fov [49.134342]
FocalDistance [9.559999]
ApertureRadius [0.250000]
Transform [0.685881 0.727634 -0.010817 0.000000 -0.317370 0.312469 0.895343 0.000000 -0.654862 0.610666 -0.445245 0.000000 7.481132 -6.507640 5.343665 1.000000]
}
World {
BackgroundShader {
Type [Color]
Color [rec709, 0.050876 0.050876 0.050876]
}
}
Shaders {
SurfaceShader $Material {
Type [Lambert]
Color [rec709, 0.800000 0.800000 0.800000]
}
}
Objects {
MeshSurface $__Plane_ {
SurfaceShaderBind [$Material]
Vertices [-1.000000 -1.000000 0.000000 1.000000 -1.000000 0.000000 -1.000000 1.000000 0.000000 1.000000 1.000000 0.000000]
FaceVertCounts [4 ]
FaceVertIndices [0 1 3 2 ]
}
MeshSurface $__Cube_ {
SurfaceShaderBind [$Material]
Vertices [1.000000 1.000000 -1.000000 1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 1.000000 -1.000000 1.000000 0.999999 1.000000 0.999999 -1.000001 1.000000 -1.000000 -1.000000 1.000000 -1.000000 1.000000 1.000000 ]
FaceVertCounts [4 4 4 4 4 4 ]
FaceVertIndices [0 1 2 3 4 7 6 5 0 4 5 1 1 5 6 2 2 6 7 3 4 0 3 7 ]
}
SphereLight $__Lamp {
Color [rec709, 50.000000 50.000000 50.000000]
Radius [0.100000]
}
}
Assembly {
Instance {
Data [$__Plane_]
Transform [0.078868 -0.000000 0.000000 -0.000000 -0.000000 0.078868 -0.000000 0.000000 0.000000 -0.000000 0.078868 -0.000000 -0.000000 0.000000 -0.000000 1.000000]
}
Instance {
Data [$__Cube_]
Transform [1.000000 -0.000000 0.000000 -0.000000 -0.000000 1.000000 -0.000000 0.000000 0.000000 -0.000000 1.000000 -0.000000 -0.000000 0.000000 -1.000000 1.000000]
}
Instance {
Data [$__Lamp]
Transform [0.019856 -0.060763 0.000000 -0.000000 0.015191 0.079422 -0.000000 0.000000 0.000000 -0.000000 1.000000 -0.000000 -0.026851 -0.125233 -4.432303 1.000000]
}
ClearScene {}
Output {
Path ["test_renders/cube.png"]
}
RenderSettings {
Resolution [960 540]
SamplesPerPixel [16]
Seed [1]
}
Camera {
Fov [49.134342]
FocalDistance [9.559999]
ApertureRadius [0.250000]
Transform [0.685881 0.727634 -0.010817 -0.317370 0.312469 0.895343 -0.654862 0.610666 -0.445245 7.481132 -6.507640 5.343665]
}
World {
BackgroundShader {
Type [Color]
Color [rec709, 0.050876 0.050876 0.050876]
}
}
AddShader {
SurfaceShader {
Name [Material]
Type [Lambert]
Color [rec709, 0.800000 0.800000 0.800000]
}
}
AddObject {
MeshSurface {
Name [__Plane_]
SurfaceShaderBind [Material]
Vertices [-1.000000 -1.000000 0.000000 1.000000 -1.000000 0.000000 -1.000000 1.000000 0.000000 1.000000 1.000000 0.000000]
FaceVertCounts [4 ]
FaceVertIndices [0 1 3 2 ]
}
MeshSurface {
Name [__Cube_]
SurfaceShaderBind [Material]
Vertices [1.000000 1.000000 -1.000000 1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 1.000000 -1.000000 1.000000 0.999999 1.000000 0.999999 -1.000001 1.000000 -1.000000 -1.000000 1.000000 -1.000000 1.000000 1.000000 ]
FaceVertCounts [4 4 4 4 4 4 ]
FaceVertIndices [0 1 2 3 4 7 6 5 0 4 5 1 1 5 6 2 2 6 7 3 4 0 3 7 ]
}
SphereLight {
Name [__Lamp]
Color [rec709, 50.000000 50.000000 50.000000]
Radius [0.100000]
}
}
AddInstance {
Instance {
Name [__Plane_]
Data [__Plane_]
Transform [0.078868 0.000000 0.000000 0.000000 0.078868 0.000000 0.000000 0.000000 0.078868 0.000000 0.000000 0.000000]
}
Instance {
Name [__Cube_]
Data [__Cube_]
Transform [1.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 -1.000000]
}
Instance {
Name [__Lamp]
Data [__Lamp]
Transform [0.019856 -0.060763 0.000000 0.015191 0.079422 0.000000 0.000000 0.000000 1.000000 -0.026851 -0.125233 -4.432303]
}
}

View File

@ -1,22 +1,17 @@
bl_info = {
"name": "PsychoBlend",
"version": (0, 1),
"version": (0, 1, 0),
"author": "Nathan Vegdahl",
"blender": (2, 70, 0),
"blender": (3, 1, 0),
"description": "Psychopath renderer integration",
"location": "",
"wiki_url": "https://github.com/cessen/psychopath/wiki",
"tracker_url": "https://github.com/cessen/psychopath/issues",
"category": "Render"}
# "wiki_url": "https://github.com/cessen/psychopath/wiki",
# "tracker_url": "https://github.com/cessen/psychopath/issues",
"category": "Render"
}
if "bpy" in locals():
import imp
imp.reload(ui)
imp.reload(psy_export)
imp.reload(render)
else:
from . import ui, psy_export, render
from . import ui, psy_export, render
import bpy
from bpy.types import (AddonPreferences,
@ -33,53 +28,46 @@ from bpy.props import (StringProperty,
)
# Custom Scene settings
# Custom Scene settings.
class RenderPsychopathSettingsScene(PropertyGroup):
spp = IntProperty(
spp: IntProperty(
name="Samples Per Pixel", description="Total number of samples to take per pixel",
min=1, max=65536, default=16
)
max_samples_per_bucket = IntProperty(
name="Max Samples Per Bucket", description="How many samples to simultaneously calculate per thread; indirectly determines bucket size",
min=1, max=2**28, soft_max=2**16, default=4096
bucket_size: IntProperty(
name="Bucket Size", description="The height and width of each render bucket in pixels.",
min=1, max=4096, soft_max=256, default=32
)
dicing_rate = FloatProperty(
dicing_rate: FloatProperty(
name="Dicing Rate", description="The target microgeometry width in pixels",
min=0.0001, max=100.0, soft_min=0.125, soft_max=1.0, default=0.25
)
motion_blur_segments = IntProperty(
motion_blur_segments: IntProperty(
name="Motion Segments", description="The number of segments to use in motion blur. Zero means no motion blur. Will be rounded down to the nearest power of two.",
min=0, max=256, default=0
)
shutter_start = FloatProperty(
shutter_start: FloatProperty(
name="Shutter Open", description="The time during the frame that the shutter opens, for motion blur",
min=-1.0, max=1.0, soft_min=0.0, soft_max=1.0, default=0.0
)
shutter_end = FloatProperty(
shutter_end: FloatProperty(
name="Shutter Close", description="The time during the frame that the shutter closes, for motion blur",
min=-1.0, max=1.0, soft_min=0.0, soft_max=1.0, default=0.5
)
export_path = StringProperty(
export_path: StringProperty(
name="Export Path", description="The path to where the .psy files should be exported when rendering. If left blank, /tmp or the equivalent is used.",
subtype='FILE_PATH'
)
# Custom Camera properties
class PsychopathCamera(bpy.types.PropertyGroup):
aperture_radius = FloatProperty(
name="Aperture Radius", description="Size of the camera's aperture, for DoF",
min=0.0, max=10000.0, soft_min=0.0, soft_max=2.0, default=0.0
)
# Psychopath material
# Psychopath material.
class PsychopathLight(bpy.types.PropertyGroup):
color_type = EnumProperty(
color_type: EnumProperty(
name="Color Type", description="",
items=[
('Rec709', 'Rec709', ""),
@ -89,27 +77,27 @@ class PsychopathLight(bpy.types.PropertyGroup):
default="Rec709"
)
color_blackbody_temp = FloatProperty(
color_blackbody_temp: FloatProperty(
name="Temperature", description="Blackbody temperature in kelvin",
min=0.0, soft_min=800.0, soft_max=6500.0, default=1200.0
)
# Custom Mesh properties
# Custom Mesh properties.
class PsychopathMesh(bpy.types.PropertyGroup):
is_subdivision_surface = BoolProperty(
is_subdivision_surface: BoolProperty(
name="Is Subdivision Surface", description="Whether this is a sibdivision surface or just a normal mesh",
default=False
)
# Psychopath material
class PsychopathMaterial(bpy.types.PropertyGroup):
surface_shader_type = EnumProperty(
surface_shader_type: EnumProperty(
name="Surface Shader Type", description="",
items=[('Emit', 'Emit', ""), ('Lambert', 'Lambert', ""), ('GGX', 'GGX', "")],
default="Lambert"
)
color_type = EnumProperty(
color_type: EnumProperty(
name="Color Type", description="",
items=[
('Rec709', 'Rec709', ""),
@ -119,39 +107,47 @@ class PsychopathMaterial(bpy.types.PropertyGroup):
default="Rec709"
)
color = FloatVectorProperty(
color: FloatVectorProperty(
name="Color", description="",
subtype='COLOR',
min=0.0, soft_min=0.0, soft_max = 1.0,
default=[0.8,0.8,0.8]
)
color_blackbody_temp = FloatProperty(
color_blackbody_temp: FloatProperty(
name="Temperature", description="Blackbody temperature in kelvin",
min=0.0, soft_min=800.0, soft_max=6500.0, default=1200.0
)
roughness = FloatProperty(
roughness: FloatProperty(
name="Roughness", description="",
min=-1.0, max=1.0, soft_min=0.0, soft_max=1.0, default=0.1
)
tail_shape = FloatProperty(
tail_shape: FloatProperty(
name="Tail Shape", description="",
min=0.0, max=8.0, soft_min=1.0, soft_max=3.0, default=2.0
)
fresnel = FloatProperty(
fresnel: FloatProperty(
name="Fresnel", description="",
min=0.0, max=1.0, soft_min=0.0, soft_max=1.0, default=0.9
)
# Psychopath world.
class PsychopathWorld(bpy.types.PropertyGroup):
background_color: FloatVectorProperty(
name="Background Color", description="",
subtype='COLOR',
min=0.0, soft_min=0.0, soft_max = 1.0,
default=[0.8,0.8,0.8]
)
# Addon Preferences
# Addon Preferences.
class PsychopathPreferences(AddonPreferences):
bl_idname = __name__
filepath_psychopath = StringProperty(
filepath_psychopath: StringProperty(
name="Psychopath Location",
description="Path to renderer executable",
subtype='DIR_PATH',
@ -166,15 +162,15 @@ class PsychopathPreferences(AddonPreferences):
def register():
bpy.utils.register_class(PsychopathPreferences)
bpy.utils.register_class(RenderPsychopathSettingsScene)
bpy.utils.register_class(PsychopathCamera)
bpy.utils.register_class(PsychopathLight)
bpy.utils.register_class(PsychopathMesh)
bpy.utils.register_class(PsychopathMaterial)
bpy.utils.register_class(PsychopathWorld)
bpy.types.Scene.psychopath = PointerProperty(type=RenderPsychopathSettingsScene)
bpy.types.Camera.psychopath = PointerProperty(type=PsychopathCamera)
bpy.types.Lamp.psychopath = PointerProperty(type=PsychopathLight)
bpy.types.Light.psychopath = PointerProperty(type=PsychopathLight)
bpy.types.Mesh.psychopath = PointerProperty(type=PsychopathMesh)
bpy.types.Material.psychopath = PointerProperty(type=PsychopathMaterial)
bpy.types.World.psychopath = PointerProperty(type=PsychopathWorld)
render.register()
ui.register()
@ -182,14 +178,14 @@ def register():
def unregister():
bpy.utils.unregister_class(PsychopathPreferences)
bpy.utils.unregister_class(RenderPsychopathSettingsScene)
bpy.utils.unregister_class(PsychopathCamera)
bpy.utils.unregister_class(PsychopathLight)
bpy.utils.unregister_class(PsychopathMesh)
bpy.utils.unregister_class(PsychopathMaterial)
bpy.utils.unregister_class(PsychopathWorld)
del bpy.types.Scene.psychopath
del bpy.types.Camera.psychopath
del bpy.types.Lamp.psychopath
del bpy.types.Light.psychopath
del bpy.types.Mesh.psychopath
del bpy.types.Material.psychopath
del bpy.types.World.psychopath
render.unregister()
ui.unregister()

View File

@ -1,398 +0,0 @@
import bpy
from .util import escape_name, mat2str, needs_def_mb, needs_xform_mb, ExportCancelled
class Assembly:
def __init__(self, render_engine, objects, visible_layers, group_prefix="", translation_offset=(0,0,0)):
self.name = group_prefix
self.translation_offset = translation_offset
self.render_engine = render_engine
self.materials = []
self.objects = []
self.instances = []
self.material_names = set()
self.mesh_names = set()
self.assembly_names = set()
# Collect all the objects, materials, instances, etc.
for ob in objects:
# Check if render is cancelled
if render_engine.test_break():
raise ExportCancelled()
# Check if the object is visible for rendering
vis_layer = False
for i in range(len(ob.layers)):
vis_layer = vis_layer or (ob.layers[i] and visible_layers[i])
if ob.hide_render or not vis_layer:
continue
# Store object data
name = None
if ob.type == 'EMPTY':
if ob.dupli_type == 'GROUP':
name = group_prefix + "__" + escape_name(ob.dupli_group.name)
if name not in self.assembly_names:
self.assembly_names.add(name)
self.objects += [Assembly(self.render_engine, ob.dupli_group.objects, ob.dupli_group.layers, name, ob.dupli_group.dupli_offset*-1)]
elif ob.type == 'MESH':
name = self.get_mesh(ob, group_prefix)
elif ob.type == 'LAMP' and ob.data.type == 'POINT':
name = self.get_sphere_lamp(ob, group_prefix)
elif ob.type == 'LAMP' and ob.data.type == 'AREA':
name = self.get_rect_lamp(ob, group_prefix)
# Store instance
if name != None:
self.instances += [Instance(render_engine, ob, name)]
def export(self, render_engine, w):
if self.name == "":
w.write("Assembly {\n")
else:
w.write("Assembly $%s {\n" % self.name)
w.indent()
for mat in self.materials:
# Check if render is cancelled
if render_engine.test_break():
raise ExportCancelled()
mat.export(render_engine, w)
for ob in self.objects:
# Check if render is cancelled
if render_engine.test_break():
raise ExportCancelled()
ob.export(render_engine, w)
for inst in self.instances:
# Check if render is cancelled
if render_engine.test_break():
raise ExportCancelled()
inst.export(render_engine, w)
w.unindent()
w.write("}\n")
#----------------
def take_sample(self, render_engine, scene, time):
for mat in self.materials:
# Check if render is cancelled
if render_engine.test_break():
raise ExportCancelled()
mat.take_sample(render_engine, scene, time)
for ob in self.objects:
# Check if render is cancelled
if render_engine.test_break():
raise ExportCancelled()
ob.take_sample(render_engine, scene, time)
for inst in self.instances:
# Check if render is cancelled
if render_engine.test_break():
raise ExportCancelled()
inst.take_sample(render_engine, time, self.translation_offset)
def cleanup(self):
for mat in self.materials:
mat.cleanup()
for ob in self.objects:
ob.cleanup()
def get_mesh(self, ob, group_prefix):
# Figure out if we need to export or not and figure out what name to
# export with.
has_modifiers = len(ob.modifiers) > 0
deform_mb = needs_def_mb(ob)
if has_modifiers or deform_mb:
mesh_name = group_prefix + escape_name("__" + ob.name + "__" + ob.data.name + "_")
else:
mesh_name = group_prefix + escape_name("__" + ob.data.name + "_")
has_faces = len(ob.data.polygons) > 0
should_export_mesh = has_faces and (mesh_name not in self.mesh_names)
# Get mesh
if should_export_mesh:
self.mesh_names.add(mesh_name)
self.objects += [Mesh(self.render_engine, ob, mesh_name)]
# Get materials
for ms in ob.material_slots:
if ms != None:
if ms.material.name not in self.material_names:
self.material_names.add(ms.material.name)
self.materials += [Material(self.render_engine, ms.material)]
return mesh_name
else:
return None
def get_sphere_lamp(self, ob, group_prefix):
name = group_prefix + "__" + escape_name(ob.name)
self.objects += [SphereLamp(self.render_engine, ob, name)]
return name
def get_rect_lamp(self, ob, group_prefix):
name = group_prefix + "__" + escape_name(ob.name)
self.objects += [RectLamp(self.render_engine, ob, name)]
return name
#=========================================================================
class Mesh:
""" Holds data for a mesh to be exported.
"""
def __init__(self, render_engine, ob, name):
self.ob = ob
self.name = name
self.needs_mb = needs_def_mb(self.ob)
self.time_meshes = []
def take_sample(self, render_engine, scene, time):
if len(self.time_meshes) == 0 or self.needs_mb:
render_engine.update_stats("", "Psychopath: Collecting '{}' at time {}".format(self.ob.name, time))
self.time_meshes += [self.ob.to_mesh(scene, True, 'RENDER')]
def cleanup(self):
for mesh in self.time_meshes:
bpy.data.meshes.remove(mesh)
def export(self, render_engine, w):
render_engine.update_stats("", "Psychopath: Exporting %s" % self.ob.name)
if self.ob.data.psychopath.is_subdivision_surface == False:
# Exporting normal mesh
w.write("MeshSurface $%s {\n" % self.name)
w.indent()
else:
# Exporting subdivision surface cage
w.write("SubdivisionSurface $%s {\n" % self.name)
w.indent()
# Write vertices and (if it's smooth shaded) normals
for ti in range(len(self.time_meshes)):
w.write("Vertices [")
w.write(" ".join([("%f" % i) for vert in self.time_meshes[ti].vertices for i in vert.co]), False)
w.write("]\n", False)
if self.time_meshes[0].polygons[0].use_smooth and self.ob.data.psychopath.is_subdivision_surface == False:
w.write("Normals [")
w.write(" ".join([("%f" % i) for vert in self.time_meshes[ti].vertices for i in vert.normal]), False)
w.write("]\n", False)
# Write face vertex counts
w.write("FaceVertCounts [")
w.write(" ".join([("%d" % len(p.vertices)) for p in self.time_meshes[0].polygons]), False)
w.write("]\n", False)
# Write face vertex indices
w.write("FaceVertIndices [")
w.write(" ".join([("%d"%v) for p in self.time_meshes[0].polygons for v in p.vertices]), False)
w.write("]\n", False)
# MeshSurface/SubdivisionSurface section end
w.unindent()
w.write("}\n")
class SphereLamp:
""" Holds data for a sphere light to be exported.
"""
def __init__(self, render_engine, ob, name):
self.ob = ob
self.name = name
self.time_col = []
self.time_rad = []
def take_sample(self, render_engine, scene, time):
render_engine.update_stats("", "Psychopath: Collecting '{}' at time {}".format(self.ob.name, time))
if self.ob.data.psychopath.color_type == 'Rec709':
self.time_col += [('Rec709', self.ob.data.color * self.ob.data.energy)]
elif self.ob.data.psychopath.color_type == 'Blackbody':
self.time_col += [('Blackbody', self.ob.data.psychopath.color_blackbody_temp, self.ob.data.energy)]
elif self.ob.data.psychopath.color_type == 'ColorTemperature':
self.time_col += [('ColorTemperature', self.ob.data.psychopath.color_blackbody_temp, self.ob.data.energy)]
self.time_rad += [self.ob.data.shadow_soft_size]
def cleanup(self):
pass
def export(self, render_engine, w):
render_engine.update_stats("", "Psychopath: Exporting %s" % self.ob.name)
w.write("SphereLight $%s {\n" % self.name)
w.indent()
for col in self.time_col:
if col[0] == 'Rec709':
w.write("Color [rec709, %f %f %f]\n" % (col[1][0], col[1][1], col[1][2]))
elif col[0] == 'Blackbody':
w.write("Color [blackbody, %f %f]\n" % (col[1], col[2]))
elif col[0] == 'ColorTemperature':
w.write("Color [color_temperature, %f %f]\n" % (col[1], col[2]))
for rad in self.time_rad:
w.write("Radius [%f]\n" % rad)
w.unindent()
w.write("}\n")
class RectLamp:
""" Holds data for a rectangular light to be exported.
"""
def __init__(self, render_engine, ob, name):
self.ob = ob
self.name = name
self.time_col = []
self.time_dim = []
def take_sample(self, render_engine, scene, time):
render_engine.update_stats("", "Psychopath: Collecting '{}' at time {}".format(self.ob.name, time))
if self.ob.data.psychopath.color_type == 'Rec709':
self.time_col += [('Rec709', self.ob.data.color * self.ob.data.energy)]
elif self.ob.data.psychopath.color_type == 'Blackbody':
self.time_col += [('Blackbody', self.ob.data.psychopath.color_blackbody_temp, self.ob.data.energy)]
elif self.ob.data.psychopath.color_type == 'ColorTemperature':
self.time_col += [('ColorTemperature', self.ob.data.psychopath.color_blackbody_temp, self.ob.data.energy)]
if self.ob.data.shape == 'RECTANGLE':
self.time_dim += [(self.ob.data.size, self.ob.data.size_y)]
else:
self.time_dim += [(self.ob.data.size, self.ob.data.size)]
def cleanup(self):
pass
def export(self, render_engine, w):
render_engine.update_stats("", "Psychopath: Exporting %s" % self.ob.name)
w.write("RectangleLight $%s {\n" % self.name)
w.indent()
for col in self.time_col:
if col[0] == 'Rec709':
w.write("Color [rec709, %f %f %f]\n" % (col[1][0], col[1][1], col[1][2]))
elif col[0] == 'Blackbody':
w.write("Color [blackbody, %f %f]\n" % (col[1], col[2]))
elif col[0] == 'ColorTemperature':
w.write("Color [color_temperature, %f %f]\n" % (col[1], col[2]))
for dim in self.time_dim:
w.write("Dimensions [%f %f]\n" % dim)
w.unindent()
w.write("}\n")
class Instance:
def __init__(self, render_engine, ob, data_name):
self.ob = ob
self.data_name = data_name
self.needs_mb = needs_xform_mb(self.ob)
self.time_xforms = []
def take_sample(self, render_engine, time, translation_offset):
if len(self.time_xforms) == 0 or self.needs_mb:
render_engine.update_stats("", "Psychopath: Collecting '{}' xforms at time {}".format(self.ob.name, time))
mat = self.ob.matrix_world.copy()
mat[0][3] += translation_offset[0]
mat[1][3] += translation_offset[1]
mat[2][3] += translation_offset[2]
self.time_xforms += [mat]
def export(self, render_engine, w):
render_engine.update_stats("", "Psychopath: Exporting %s" % self.ob.name)
w.write("Instance {\n")
w.indent()
w.write("Data [$%s]\n" % self.data_name)
for mat in self.time_xforms:
w.write("Transform [%s]\n" % mat2str(mat.inverted()))
for ms in self.ob.material_slots:
if ms != None:
w.write("SurfaceShaderBind [$%s]\n" % escape_name(ms.material.name))
break
w.unindent()
w.write("}\n")
class Material:
def __init__(self, render_engine, material):
self.mat = material
def take_sample(self, render_engine, time, translation_offset):
# TODO: motion blur of material settings
pass
def export(self, render_engine, w):
render_engine.update_stats("", "Psychopath: Exporting %s" % self.mat.name)
w.write("SurfaceShader $%s {\n" % escape_name(self.mat.name))
w.indent()
if self.mat.psychopath.surface_shader_type == 'Emit':
w.write("Type [Emit]\n")
if self.mat.psychopath.color_type == 'Rec709':
col = self.mat.psychopath.color
w.write("Color [rec709, %f %f %f]\n" % (
col[0], col[1], col[2],
))
elif self.mat.psychopath.color_type == 'Blackbody':
w.write("Color [blackbody, %f %f]\n" % (
self.mat.psychopath.color_blackbody_temp,
1.0,
))
elif self.mat.psychopath.color_type == 'ColorTemperature':
w.write("Color [color_temperature, %f %f]\n" % (
self.mat.psychopath.color_blackbody_temp,
1.0,
))
elif self.mat.psychopath.surface_shader_type == 'Lambert':
w.write("Type [Lambert]\n")
if self.mat.psychopath.color_type == 'Rec709':
col = self.mat.psychopath.color
w.write("Color [rec709, %f %f %f]\n" % (
col[0], col[1], col[2],
))
elif self.mat.psychopath.color_type == 'Blackbody':
w.write("Color [blackbody, %f %f]\n" % (
self.mat.psychopath.color_blackbody_temp,
1.0,
))
elif self.mat.psychopath.color_type == 'ColorTemperature':
w.write("Color [color_temperature, %f %f]\n" % (
self.mat.psychopath.color_blackbody_temp,
1.0,
))
elif self.mat.psychopath.surface_shader_type == 'GGX':
w.write("Type [GGX]\n")
if self.mat.psychopath.color_type == 'Rec709':
col = self.mat.psychopath.color
w.write("Color [rec709, %f %f %f]\n" % (
col[0], col[1], col[2],
))
elif self.mat.psychopath.color_type == 'Blackbody':
w.write("Color [blackbody, %f %f]\n" % (
self.mat.psychopath.color_blackbody_temp,
1.0,
))
elif self.mat.psychopath.color_type == 'ColorTemperature':
w.write("Color [color_temperature, %f %f]\n" % (
self.mat.psychopath.color_blackbody_temp,
1.0,
))
w.write("Roughness [%f]\n" % self.mat.psychopath.roughness)
w.write("Fresnel [%f]\n" % self.mat.psychopath.fresnel)
else:
raise "Unsupported surface shader type '%s'" % self.mat.psychopath.surface_shader_type
w.unindent()
w.write("}\n")
def cleanup(self):
pass

77
psychoblend/material.py Normal file
View File

@ -0,0 +1,77 @@
import bpy
from .util import escape_name, mat2str, needs_def_mb, needs_xform_mb, ExportCancelled
class Material:
def __init__(self, render_engine, depsgraph, material):
self.mat = material
def take_sample(self, render_engine, depsgraph, time):
# TODO: motion blur of material settings
pass
def export(self, render_engine, w):
render_engine.update_stats("", "Psychopath: Exporting %s" % self.mat.name)
w.write("SurfaceShader $%s {\n" % escape_name(self.mat.name))
w.indent()
if self.mat.psychopath.surface_shader_type == 'Emit':
w.write("Type [Emit]\n")
if self.mat.psychopath.color_type == 'Rec709':
col = self.mat.psychopath.color
w.write("Color [rec709, %f %f %f]\n" % (
col[0], col[1], col[2],
))
elif self.mat.psychopath.color_type == 'Blackbody':
w.write("Color [blackbody, %f %f]\n" % (
self.mat.psychopath.color_blackbody_temp,
1.0,
))
elif self.mat.psychopath.color_type == 'ColorTemperature':
w.write("Color [color_temperature, %f %f]\n" % (
self.mat.psychopath.color_blackbody_temp,
1.0,
))
elif self.mat.psychopath.surface_shader_type == 'Lambert':
w.write("Type [Lambert]\n")
if self.mat.psychopath.color_type == 'Rec709':
col = self.mat.psychopath.color
w.write("Color [rec709, %f %f %f]\n" % (
col[0], col[1], col[2],
))
elif self.mat.psychopath.color_type == 'Blackbody':
w.write("Color [blackbody, %f %f]\n" % (
self.mat.psychopath.color_blackbody_temp,
1.0,
))
elif self.mat.psychopath.color_type == 'ColorTemperature':
w.write("Color [color_temperature, %f %f]\n" % (
self.mat.psychopath.color_blackbody_temp,
1.0,
))
elif self.mat.psychopath.surface_shader_type == 'GGX':
w.write("Type [GGX]\n")
if self.mat.psychopath.color_type == 'Rec709':
col = self.mat.psychopath.color
w.write("Color [rec709, %f %f %f]\n" % (
col[0], col[1], col[2],
))
elif self.mat.psychopath.color_type == 'Blackbody':
w.write("Color [blackbody, %f %f]\n" % (
self.mat.psychopath.color_blackbody_temp,
1.0,
))
elif self.mat.psychopath.color_type == 'ColorTemperature':
w.write("Color [color_temperature, %f %f]\n" % (
self.mat.psychopath.color_blackbody_temp,
1.0,
))
w.write("Roughness [%f]\n" % self.mat.psychopath.roughness)
w.write("Fresnel [%f]\n" % self.mat.psychopath.fresnel)
else:
raise "Unsupported surface shader type '%s'" % self.mat.psychopath.surface_shader_type
w.unindent()
w.write("}\n")
def cleanup(self):
pass

206
psychoblend/objects.py Normal file
View File

@ -0,0 +1,206 @@
import bpy
from .util import escape_name, mat2str, needs_def_mb, needs_xform_mb, ExportCancelled
from mathutils import Vector, Matrix
def make_object_data_cache(render_engine, depsgraph, ob, name):
if ob.type == 'MESH':
return Mesh(render_engine, depsgraph, ob, name)
elif ob.type == 'LIGHT':
if ob.data.type == 'POINT':
return SphereLamp(render_engine, depsgraph, ob, name)
elif ob.data.type == 'AREA':
return RectLamp(render_engine, depsgraph, ob, name)
elif ob.data.type == 'AREA':
return RectLamp(render_engine, depsgraph, ob, name)
class Mesh:
""" Holds data for a mesh to be exported.
"""
def __init__(self, render_engine, depsgraph, ob, name):
self.name = name
self.material_name = None
if len(ob.material_slots) >= 1 and ob.material_slots[0].material != None:
self.material_name = ob.material_slots[0].material.name
self.is_subdiv = ob.data.psychopath.is_subdivision_surface
self.needs_mb = needs_def_mb(ob)
self.time_meshes = []
def take_sample(self, render_engine, depsgraph, ob, time):
if len(self.time_meshes) == 0 or self.needs_mb:
render_engine.update_stats("", "Psychopath: Collecting '{}' at time {}".format(self.name, time))
self.time_meshes += [ob.to_mesh(depsgraph=depsgraph).copy()]
def cleanup(self):
for mesh in self.time_meshes:
bpy.data.meshes.remove(mesh)
def export(self, render_engine, w):
render_engine.update_stats("", "Psychopath: Exporting %s" % self.name)
if self.is_subdiv == False:
# Exporting normal mesh
w.write("MeshSurface $%s {\n" % escape_name(self.name))
w.indent()
else:
# Exporting subdivision surface cage
w.write("SubdivisionSurface $%s {\n" % escape_name(self.name))
w.indent()
# Material bindings.
if self.material_name != None:
w.write("SurfaceShaderBind [${}]\n".format(escape_name(self.material_name)))
# Write vertices and (if it's smooth shaded) normals
for ti in range(len(self.time_meshes)):
w.write("Vertices [")
w.write(" ".join([("%f" % i) for vert in self.time_meshes[ti].vertices for i in vert.co]), False)
w.write("]\n", False)
if self.time_meshes[0].polygons[0].use_smooth and self.is_subdiv == False:
w.write("Normals [")
w.write(" ".join([("%f" % i) for vert in self.time_meshes[ti].vertices for i in vert.normal]), False)
w.write("]\n", False)
# Write face vertex counts
w.write("FaceVertCounts [")
w.write(" ".join([("%d" % len(p.vertices)) for p in self.time_meshes[0].polygons]), False)
w.write("]\n", False)
# Write face vertex indices
w.write("FaceVertIndices [")
w.write(" ".join([("%d"%v) for p in self.time_meshes[0].polygons for v in p.vertices]), False)
w.write("]\n", False)
# MeshSurface/SubdivisionSurface section end
w.unindent()
w.write("}\n")
class SphereLamp:
""" Holds data for a sphere light to be exported.
"""
def __init__(self, render_engine, depsgraph, ob, name):
self.name = name
self.time_col = []
self.time_rad = []
def take_sample(self, render_engine, depsgraph, ob, time):
render_engine.update_stats("", "Psychopath: Collecting '{}' at time {}".format(ob.name, time))
if ob.data.psychopath.color_type == 'Rec709':
self.time_col += [('Rec709', ob.data.color * ob.data.energy)]
elif ob.data.psychopath.color_type == 'Blackbody':
self.time_col += [('Blackbody', ob.data.psychopath.color_blackbody_temp, ob.data.energy)]
elif ob.data.psychopath.color_type == 'ColorTemperature':
self.time_col += [('ColorTemperature', ob.data.psychopath.color_blackbody_temp, ob.data.energy)]
self.time_rad += [ob.data.shadow_soft_size]
def cleanup(self):
pass
def export(self, render_engine, w):
render_engine.update_stats("", "Psychopath: Exporting %s" % self.name)
w.write("SphereLight $%s {\n" % escape_name(self.name))
w.indent()
for col in self.time_col:
if col[0] == 'Rec709':
w.write("Color [rec709, %f %f %f]\n" % (col[1][0], col[1][1], col[1][2]))
elif col[0] == 'Blackbody':
w.write("Color [blackbody, %f %f]\n" % (col[1], col[2]))
elif col[0] == 'ColorTemperature':
w.write("Color [color_temperature, %f %f]\n" % (col[1], col[2]))
for rad in self.time_rad:
w.write("Radius [%f]\n" % rad)
w.unindent()
w.write("}\n")
class RectLamp:
""" Holds data for a rectangular light to be exported.
"""
def __init__(self, render_engine, depsgraph, ob, name):
self.name = name
self.time_col = []
self.time_dim = []
def take_sample(self, render_engine, depsgraph, ob, time):
render_engine.update_stats("", "Psychopath: Collecting '{}' at time {}".format(self.name, time))
if ob.data.psychopath.color_type == 'Rec709':
self.time_col += [('Rec709', ob.data.color * ob.data.energy / 2)]
elif ob.data.psychopath.color_type == 'Blackbody':
self.time_col += [('Blackbody', ob.data.psychopath.color_blackbody_temp, ob.data.energy)]
elif ob.data.psychopath.color_type == 'ColorTemperature':
self.time_col += [('ColorTemperature', ob.data.psychopath.color_blackbody_temp, ob.data.energy)]
if ob.data.shape == 'RECTANGLE':
self.time_dim += [(ob.data.size, ob.data.size_y)]
else:
self.time_dim += [(ob.data.size, ob.data.size)]
def cleanup(self):
pass
def export(self, render_engine, w):
render_engine.update_stats("", "Psychopath: Exporting %s" % self.name)
w.write("RectangleLight $%s {\n" % escape_name(self.name))
w.indent()
for col in self.time_col:
if col[0] == 'Rec709':
w.write("Color [rec709, %f %f %f]\n" % (col[1][0], col[1][1], col[1][2]))
elif col[0] == 'Blackbody':
w.write("Color [blackbody, %f %f]\n" % (col[1], col[2]))
elif col[0] == 'ColorTemperature':
w.write("Color [color_temperature, %f %f]\n" % (col[1], col[2]))
for dim in self.time_dim:
w.write("Dimensions [%f %f]\n" % dim)
w.unindent()
w.write("}\n")
class DistantDiskLamp:
def __init__(self, render_engine, depsgraph, ob, name):
self.name = name
self.time_col = []
self.time_dir = []
self.time_rad = []
def take_sample(self, render_engine, depsgraph, ob, time):
render_engine.update_stats("", "Psychopath: Collecting '{}' at time {}".format(self.name, time))
self.time_dir += [tuple(ob.matrix_world.to_3x3() @ Vector((0, 0, -1)))]
if ob.data.psychopath.color_type == 'Rec709':
self.time_col += [('Rec709', ob.data.color * ob.data.energy)]
elif ob.data.psychopath.color_type == 'Blackbody':
self.time_col += [('Blackbody', ob.data.psychopath.color_blackbody_temp, ob.data.energy)]
elif ob.data.psychopath.color_type == 'ColorTemperature':
self.time_col += [('ColorTemperature', ob.data.psychopath.color_blackbody_temp, ob.data.energy)]
self.time_rad += [ob.data.shadow_soft_size]
def cleanup(self):
pass
def export(self, render_engine, w):
render_engine.update_stats("", "Psychopath: Exporting %s" % escape_name(self.name))
w.write("DistantDiskLight $%s {\n" % self.name)
w.indent()
for direc in self.time_dir:
w.write("Direction [%f %f %f]\n" % (direc[0], direc[1], direc[2]))
for col in self.time_col:
if col[0] == 'Rec709':
w.write("Color [rec709, %f %f %f]\n" % (col[1][0], col[1][1], col[1][2]))
elif col[0] == 'Blackbody':
w.write("Color [blackbody, %f %f]\n" % (col[1], col[2]))
elif col[0] == 'ColorTemperature':
w.write("Color [color_temperature, %f %f]\n" % (col[1], col[2]))
for rad in self.time_rad:
w.write("Radius [%f]\n" % rad)
w.unindent()
w.write("}\n")

View File

@ -2,9 +2,11 @@ import bpy
from math import log
from .assembly import Assembly
from .material import Material
from .objects import make_object_data_cache, Mesh, DistantDiskLamp
from .util import escape_name, mat2str, ExportCancelled
from .world import World
from .world import World, Camera
from . import bl_info
class IndentedWriter:
@ -29,25 +31,43 @@ class IndentedWriter:
class PsychoExporter:
def __init__(self, f, render_engine, scene):
def __init__(self, f, render_engine, depsgraph):
self.w = IndentedWriter(f)
self.render_engine = render_engine
self.scene = scene
self.depsgraph = depsgraph
self.scene = depsgraph.scene
self.view_layer = depsgraph.view_layer
self.mesh_names = {}
self.group_names = {}
# For camera data.
res_x = int(self.scene.render.resolution_x * (self.scene.render.resolution_percentage / 100))
res_y = int(self.scene.render.resolution_y * (self.scene.render.resolution_percentage / 100))
self.camera = Camera(render_engine, depsgraph.scene.camera, float(res_x) / float(res_y))
# Motion blur segments are rounded down to a power of two
if scene.psychopath.motion_blur_segments > 0:
self.time_samples = (2**int(log(scene.psychopath.motion_blur_segments, 2))) + 1
# For world data.
self.world = World(render_engine, depsgraph)
# For all objects except sun lamps.
self.object_data = {} # name -> cached_data
self.instances = {} # instance_id -> [object_data_name, transform_list]
# For all sun lamps.
self.sun_lamp_data = {} # name -> cached_data
self.sun_lamp_instances = {} # instance_id -> [sun_lamp_data_name, transform_list]
# For all materials.
self.materials = {} # name -> cached_data
# Motion blur segments are rounded down to a power of two.
if self.scene.psychopath.motion_blur_segments > 0:
self.time_samples = (2**int(log(self.scene.psychopath.motion_blur_segments, 2))) + 1
else:
self.time_samples = 1
# pre-calculate useful values for exporting motion blur
self.shutter_start = scene.psychopath.shutter_start
self.shutter_diff = (scene.psychopath.shutter_end - scene.psychopath.shutter_start) / max(1, (self.time_samples-1))
# pre-calculate useful values for exporting motion blur.
self.shutter_start = self.scene.psychopath.shutter_start
self.shutter_diff = (self.scene.psychopath.shutter_end - self.scene.psychopath.shutter_start) / max(1, (self.time_samples-1))
self.fr = scene.frame_current
self.fr = self.scene.frame_current
def set_frame(self, frame, fraction):
@ -70,25 +90,31 @@ class PsychoExporter:
def _export_psy(self):
# Info
self.w.write("# Exported from Blender 2.7x\n")
self.w.write("# Exported from Blender {} with PsychoBlend {}.{}.{}\n".format(
bpy.app.version_string,
bl_info["version"][0],
bl_info["version"][1],
bl_info["version"][2],
))
# Scene begin
self.w.write("\n\nScene $%s_fr%d {\n" % (escape_name(self.scene.name), self.fr))
self.w.indent()
#######################
# Output section begin
#------------------------------------------------------
# Output section.
self.w.write("Output {\n")
self.w.indent()
self.w.write('Path [""]\n')
# Output section end
self.w.unindent()
self.w.write("}\n")
###############################
# RenderSettings section begin
#------------------------------------------------------
# RenderSettings section.
self.w.write("RenderSettings {\n")
self.w.indent()
@ -99,34 +125,145 @@ class PsychoExporter:
self.w.write("DicingRate [%f]\n" % self.scene.psychopath.dicing_rate)
self.w.write('Seed [%d]\n' % self.fr)
# RenderSettings section end
self.w.unindent()
self.w.write("}\n")
###############################
# Export world and object data
world = None
root_assembly = None
#------------------------------------------------------
# Collect materials.
# TODO: handle situations where there are more than one
# material with the same name. This can happen through
# library linking.
for inst in self.depsgraph.object_instances:
ob = inst.object
if ob.type in ['MESH']:
for ms in ob.material_slots:
if ms.material != None:
if ms.material.name not in self.materials:
self.materials[ms.material.name] = Material(self.render_engine, self.depsgraph, ms.material)
#------------------------------------------------------
# Collect world and object data.
try:
# Prep for data collection
world = World(self.render_engine, self.scene, self.scene.layers, float(res_x) / float(res_y))
root_assembly = Assembly(self.render_engine, self.scene.objects, self.scene.layers)
# Collect data for each time sample
for i in range(self.time_samples):
time = self.fr + self.shutter_start + (self.shutter_diff*i)
self.set_frame(self.fr, self.shutter_start + (self.shutter_diff*i))
world.take_sample(self.render_engine, self.scene, time)
root_assembly.take_sample(self.render_engine, self.scene, time)
# Check if render is cancelled
if self.render_engine.test_break():
raise ExportCancelled()
# Export collected data
world.export(self.render_engine, self.w)
root_assembly.export(self.render_engine, self.w)
finally:
if world != None:
world.cleanup()
if root_assembly != None:
root_assembly.cleanup()
subframe = self.shutter_start + (self.shutter_diff*i)
time = self.fr + subframe
self.depsgraph.scene.frame_set(self.fr, subframe=subframe)
self.depsgraph.update()
# Collect camera and world data.
self.camera.take_sample(self.render_engine, self.depsgraph, time)
self.world.take_sample(self.render_engine, self.depsgraph, time)
# Collect renderable objects.
collected_objs = set() # Names of the objects whose data has already been collected.
for inst in self.depsgraph.object_instances:
# Check if render is cancelled
if self.render_engine.test_break():
raise ExportCancelled()
if inst.object.type not in ['MESH', 'LIGHT']:
continue
# We use this a couple of times, so make a shorthand.
is_sun_lamp = inst.object.type == 'LIGHT' and inst.object.data.type == 'SUN'
# TODO: handle situations where there are more than one
# object with the same name. This can happen through
# library linking.
# Get a unique id for the instance. This is surprisingly
# tricky, because the instance's "persistent_id" property
# isn't globally unique, as I would have expected from
# the documentation.
id = None
if inst.is_instance:
id = (
hash((inst.object.name, inst.parent.name)),
# Has to be turned into a tuple, otherwise it doesn't
# work as part of the ID for some reason.
tuple(inst.persistent_id),
)
else:
id = inst.object.name
# Save the instance transforms.
if is_sun_lamp:
if id not in self.sun_lamp_instances:
self.sun_lamp_instances[id] = [inst.object.name, [inst.matrix_world.copy()]]
else:
self.sun_lamp_instances[id][1] += [inst.matrix_world.copy()]
else:
if id not in self.instances:
self.instances[id] = [inst.object.name, [inst.matrix_world.copy()]]
else:
self.instances[id][1] += [inst.matrix_world.copy()]
# Save the object data if it hasn't already been saved.
if inst.object.name not in collected_objs:
collected_objs.add(inst.object.name)
if is_sun_lamp:
if inst.object.name not in self.sun_lamp_data:
self.sun_lamp_data[inst.object.name] = DistantDiskLamp(self.render_engine, self.depsgraph, inst.object, inst.object.name)
self.sun_lamp_data[inst.object.name].take_sample(self.render_engine, self.depsgraph, inst.object, time)
else:
if inst.object.name not in self.object_data:
self.object_data[inst.object.name] = make_object_data_cache(self.render_engine, self.depsgraph, inst.object, inst.object.name)
self.object_data[inst.object.name].take_sample(self.render_engine, self.depsgraph, inst.object, time)
#------------------------------------------------------
# Export world and object data.
self.camera.export(self.render_engine, self.w)
self.world.export(self.render_engine, self.w)
self.w.write("Assembly {\n")
self.w.indent()
# Export materials.
for name in self.materials:
self.materials[name].export(self.render_engine, self.w)
# Export objects.
for name in self.object_data:
self.object_data[name].export(self.render_engine, self.w)
# Export instances.
for id in self.instances:
[obj_name, xforms] = self.instances[id]
self.render_engine.update_stats("", "Psychopath: Exporting %s instance" % obj_name)
prefix = str(hex(hash(id)))
name = "inst_{}__{}".format(prefix, escape_name(obj_name))
self.w.write("Instance {\n")
self.w.indent()
self.w.write("Data [${}]\n".format(escape_name(obj_name)))
for mat in xforms:
self.w.write("Transform [{}]\n".format(mat2str(mat)))
self.w.unindent()
self.w.write("}\n")
self.w.unindent()
self.w.write("}\n")
finally:
#------------------------------------------------------
# Cleanup collected data.
self.camera.cleanup()
self.world.cleanup()
for data in self.sun_lamp_data:
self.sun_lamp_data[data].cleanup()
for data in self.object_data:
self.object_data[data].cleanup()
# Scene end
self.w.unindent()

View File

@ -9,11 +9,37 @@ from . import psy_export
class PsychopathRender(bpy.types.RenderEngine):
bl_idname = 'PSYCHOPATH_RENDER'
bl_label = "Psychopath"
DELAY = 1.0
bl_use_preview = False
def __init__(self):
pass
def __del__(self):
pass
def update(self, data, depsgraph):
pass
def render(self, depsgraph):
self._process = None
try:
self._render(depsgraph)
except:
if self._process != None:
self._process.terminate()
raise
def view_update(self, context, depsgraph):
pass
def view_draw(self, context, depsgraph):
pass
#----------------------------------------------------------
@staticmethod
def _locate_binary():
addon_prefs = bpy.context.user_preferences.addons[__package__].preferences
addon_prefs = bpy.context.preferences.addons[__package__].preferences
# Use the system preference if its set.
psy_binary = addon_prefs.filepath_psychopath
@ -23,7 +49,7 @@ class PsychopathRender(bpy.types.RenderEngine):
else:
print("User Preference to psychopath %r NOT FOUND, checking $PATH" % psy_binary)
# search the path all os's
# Search for the path.
psy_binary_default = "psychopath"
os_path_ls = os.getenv("PATH").split(':') + [""]
@ -45,13 +71,13 @@ class PsychopathRender(bpy.types.RenderEngine):
if crop != None:
args += ["--crop", str(crop[0]), str(self.size_y - crop[3]), str(crop[2] - 1), str(self.size_y - crop[1] - 1)]
if use_stdin:
args += ["--spb", str(scene.psychopath.max_samples_per_bucket), "--serialized_output", "--use_stdin"]
args += ["--bucket_size", str(scene.psychopath.bucket_size), "--serialized_output", "--use_stdin"]
else:
args += ["--spb", str(scene.psychopath.max_samples_per_bucket), "--serialized_output", "-i", psy_filepath]
args += ["--bucket_size", str(scene.psychopath.bucket_size), "--serialized_output", "-i", psy_filepath]
# Start Rendering!
try:
self._process = subprocess.Popen([psy_binary] + args, bufsize=1, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
self._process = subprocess.Popen([psy_binary] + args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
except OSError:
# TODO, report api
print("Psychopath: could not execute '%s'" % psy_binary)
@ -73,28 +99,21 @@ class PsychopathRender(bpy.types.RenderEngine):
height = bucket_info[3] - bucket_info[1]
# Decode pixel data
pixels = [p for p in struct.iter_unpack("ffff", base64.b64decode(pixels_encoded))]
pixels_flipped = []
pixels_unpacked = [p for p in struct.iter_unpack("ffff", base64.b64decode(pixels_encoded))]
pixels = []
for i in range(height):
n = height - i - 1
pixels_flipped += pixels[n*width:(n+1)*width]
pixels += pixels_unpacked[n*width:(n+1)*width]
# Write pixel data to render image
result = self.begin_result(x, y, width, height)
lay = result.layers[0].passes["Combined"]
lay.rect = pixels_flipped
lay.rect = pixels
self.end_result(result)
def render(self, scene):
self._process = None
try:
self._render(scene)
except:
if self._process != None:
self._process.terminate()
raise
def _render(self, depsgraph):
scene = depsgraph.scene
def _render(self, scene):
# has to be called to update the frame on exporting animations
scene.frame_set(scene.frame_current)
@ -130,8 +149,8 @@ class PsychopathRender(bpy.types.RenderEngine):
return
self.update_stats("", "Psychopath: Collecting...")
# Export to Psychopath's stdin
if not psy_export.PsychoExporter(self._process.stdin, self, scene).export_psy():
# Export to Psychopath's stdin.
if not psy_export.PsychoExporter(self._process.stdin, self, depsgraph).export_psy():
# Render cancelled in the middle of exporting,
# so just return.
self._process.terminate()
@ -142,7 +161,7 @@ class PsychopathRender(bpy.types.RenderEngine):
# Export to file
self.update_stats("", "Psychopath: Exporting data from Blender")
with open(export_path, 'w+b') as f:
if not psy_export.PsychoExporter(f, self, scene).export_psy():
if not psy_export.PsychoExporter(f, self, depsgraph).export_psy():
# Render cancelled in the middle of exporting,
# so just return.
return
@ -183,7 +202,7 @@ class PsychopathRender(bpy.types.RenderEngine):
# Get render output from stdin
tmp = self._process.stdout.read1(2**16)
if len(tmp) == 0:
time.sleep(0.0001) # Don't spin on the CPU
time.sleep(0.001) # Don't spin on the CPU
if render_process_finished:
all_output_consumed = True
continue

View File

@ -1,18 +1,291 @@
import bpy
# Use some of the existing buttons.
from bl_ui import properties_render
properties_render.RENDER_PT_render.COMPAT_ENGINES.add('PSYCHOPATH_RENDER')
properties_render.RENDER_PT_dimensions.COMPAT_ENGINES.add('PSYCHOPATH_RENDER')
properties_render.RENDER_PT_output.COMPAT_ENGINES.add('PSYCHOPATH_RENDER')
del properties_render
#--------------------------------------------------------------
# Specify which existing Blender UI panels Psychopath
# uses/is compatible with.
from bl_ui import properties_data_camera
properties_data_camera.DATA_PT_lens.COMPAT_ENGINES.add('PSYCHOPATH_RENDER')
properties_data_camera.DATA_PT_camera.COMPAT_ENGINES.add('PSYCHOPATH_RENDER')
properties_data_camera.DATA_PT_camera_display.COMPAT_ENGINES.add('PSYCHOPATH_RENDER')
properties_data_camera.DATA_PT_custom_props_camera.COMPAT_ENGINES.add('PSYCHOPATH_RENDER')
del properties_data_camera
import bl_ui
def register_engine_with_panels(area, list):
# TODO: reverse this, so we're checking if the list item is
# in the real panels, and throw an error if it's not. That
# way things don't just silently fail.
for p in list:
eval("bl_ui.{}.{}.COMPAT_ENGINES.add('PSYCHOPATH_RENDER')".format(area, p))
register_engine_with_panels(
"properties_render",
[
"RENDER_PT_color_management",
"RENDER_PT_color_management_curves",
]
)
register_engine_with_panels(
"properties_output",
[
"RENDER_PT_encoding",
"RENDER_PT_encoding_audio",
"RENDER_PT_encoding_video",
"RENDER_PT_format",
"RENDER_PT_frame_range",
"RENDER_PT_output",
"RENDER_PT_output_views",
"RENDER_PT_post_processing",
"RENDER_PT_stamp",
"RENDER_PT_stamp_burn",
"RENDER_PT_stamp_note",
# "RENDER_PT_stereoscopy",
"RENDER_PT_time_stretching",
]
)
register_engine_with_panels(
"properties_view_layer",
[
"VIEWLAYER_PT_layer",
]
)
register_engine_with_panels(
"properties_data_camera",
[
"DATA_PT_context_camera",
"DATA_PT_custom_props_camera",
"DATA_PT_camera",
"DATA_PT_lens",
"DATA_PT_camera_dof",
"DATA_PT_camera_dof_aperture",
"DATA_PT_camera_display",
"DATA_PT_camera_display_composition_guides",
"DATA_PT_camera_safe_areas",
"DATA_PT_camera_safe_areas_center_cut",
"DATA_PT_camera_background_image",
]
)
register_engine_with_panels(
"properties_data_mesh",
[
"DATA_PT_context_mesh",
"DATA_PT_custom_props_mesh",
"DATA_PT_customdata",
"DATA_PT_face_maps",
"DATA_PT_mesh_attributes",
"DATA_PT_normals",
"DATA_PT_remesh",
"DATA_PT_shape_keys",
"DATA_PT_texture_space",
"DATA_PT_uv_texture",
"DATA_PT_vertex_colors",
"DATA_PT_vertex_groups",
]
)
register_engine_with_panels(
"properties_particle",
[
"PARTICLE_MT_context_menu",
"PARTICLE_PT_boidbrain",
"PARTICLE_PT_cache",
"PARTICLE_PT_children",
"PARTICLE_PT_children_clumping",
"PARTICLE_PT_children_clumping_noise",
"PARTICLE_PT_children_kink",
"PARTICLE_PT_children_parting",
"PARTICLE_PT_children_roughness",
"PARTICLE_PT_context_particles",
"PARTICLE_PT_custom_props",
"PARTICLE_PT_draw",
"PARTICLE_PT_emission",
"PARTICLE_PT_emission_source",
"PARTICLE_PT_field_weights",
"PARTICLE_PT_force_fields",
"PARTICLE_PT_force_fields_type1",
"PARTICLE_PT_force_fields_type1_falloff",
"PARTICLE_PT_force_fields_type2",
"PARTICLE_PT_force_fields_type2_falloff",
"PARTICLE_PT_hair_dynamics",
"PARTICLE_PT_hair_dynamics_collision",
"PARTICLE_PT_hair_dynamics_presets",
"PARTICLE_PT_hair_dynamics_structure",
"PARTICLE_PT_hair_dynamics_volume",
"PARTICLE_PT_hair_shape",
"PARTICLE_PT_physics",
"PARTICLE_PT_physics_boids_battle",
"PARTICLE_PT_physics_boids_misc",
"PARTICLE_PT_physics_boids_movement",
"PARTICLE_PT_physics_deflection",
"PARTICLE_PT_physics_fluid_advanced",
"PARTICLE_PT_physics_fluid_interaction",
"PARTICLE_PT_physics_fluid_springs",
"PARTICLE_PT_physics_fluid_springs_advanced",
"PARTICLE_PT_physics_fluid_springs_viscoelastic",
"PARTICLE_PT_physics_forces",
"PARTICLE_PT_physics_integration",
"PARTICLE_PT_physics_relations",
"PARTICLE_PT_render",
"PARTICLE_PT_render_collection",
"PARTICLE_PT_render_collection_use_count",
"PARTICLE_PT_render_extra",
"PARTICLE_PT_render_object",
"PARTICLE_PT_render_path",
"PARTICLE_PT_render_path_timing",
"PARTICLE_PT_rotation",
"PARTICLE_PT_rotation_angular_velocity",
"PARTICLE_PT_textures",
"PARTICLE_PT_velocity",
"PARTICLE_PT_vertexgroups",
]
)
# Physics.
# (Why these require renderer compatibility settings
# is beyond me. But they do.)
register_engine_with_panels(
"properties_physics_cloth",
[
"PHYSICS_PT_cloth",
"PHYSICS_PT_cloth_cache",
"PHYSICS_PT_cloth_collision",
"PHYSICS_PT_cloth_damping",
"PHYSICS_PT_cloth_field_weights",
"PHYSICS_PT_cloth_internal_springs",
"PHYSICS_PT_cloth_object_collision",
"PHYSICS_PT_cloth_physical_properties",
"PHYSICS_PT_cloth_pressure",
"PHYSICS_PT_cloth_property_weights",
"PHYSICS_PT_cloth_self_collision",
"PHYSICS_PT_cloth_shape",
"PHYSICS_PT_cloth_stiffness",
],
)
register_engine_with_panels(
"properties_physics_common",
[
"PHYSICS_PT_add",
]
)
register_engine_with_panels(
"properties_physics_dynamicpaint",
[
"PHYSICS_PT_dp_brush_source",
"PHYSICS_PT_dp_brush_source_color_ramp",
"PHYSICS_PT_dp_brush_velocity",
"PHYSICS_PT_dp_brush_velocity_color_ramp",
"PHYSICS_PT_dp_brush_velocity_smudge",
"PHYSICS_PT_dp_brush_wave",
"PHYSICS_PT_dp_cache",
"PHYSICS_PT_dp_canvas_initial_color",
"PHYSICS_PT_dp_canvas_output",
"PHYSICS_PT_dp_canvas_output_paintmaps",
"PHYSICS_PT_dp_canvas_output_wetmaps",
"PHYSICS_PT_dp_effects",
"PHYSICS_PT_dp_effects_drip",
"PHYSICS_PT_dp_effects_drip_weights",
"PHYSICS_PT_dp_effects_shrink",
"PHYSICS_PT_dp_effects_spread",
"PHYSICS_PT_dp_surface_canvas",
"PHYSICS_PT_dp_surface_canvas_paint_dissolve",
"PHYSICS_PT_dp_surface_canvas_paint_dry",
"PHYSICS_PT_dynamic_paint",
"PHYSICS_PT_dynamic_paint_settings",
]
)
register_engine_with_panels(
"properties_physics_field",
[
"PHYSICS_PT_collision",
"PHYSICS_PT_collision_particle",
"PHYSICS_PT_collision_softbody",
"PHYSICS_PT_field",
"PHYSICS_PT_field_falloff",
"PHYSICS_PT_field_falloff_angular",
"PHYSICS_PT_field_falloff_radial",
"PHYSICS_PT_field_settings",
"PHYSICS_PT_field_settings_kink",
"PHYSICS_PT_field_settings_texture_select",
]
)
register_engine_with_panels(
"properties_physics_fluid",
[
"PHYSICS_PT_adaptive_domain",
"PHYSICS_PT_borders",
"PHYSICS_PT_cache",
"PHYSICS_PT_collections",
"PHYSICS_PT_diffusion",
"PHYSICS_PT_export",
"PHYSICS_PT_field_weights",
"PHYSICS_PT_fire",
"PHYSICS_PT_flow_initial_velocity",
"PHYSICS_PT_flow_source",
"PHYSICS_PT_flow_texture",
"PHYSICS_PT_fluid",
"PHYSICS_PT_guide",
"PHYSICS_PT_liquid",
"PHYSICS_PT_mesh",
"PHYSICS_PT_noise",
"PHYSICS_PT_particles",
"PHYSICS_PT_settings",
"PHYSICS_PT_smoke",
"PHYSICS_PT_smoke_dissolve",
"PHYSICS_PT_viscosity",
]
)
register_engine_with_panels(
"properties_physics_rigidbody",
[
"PHYSICS_PT_rigid_body",
"PHYSICS_PT_rigid_body_collisions",
"PHYSICS_PT_rigid_body_collisions_collections",
"PHYSICS_PT_rigid_body_collisions_sensitivity",
"PHYSICS_PT_rigid_body_collisions_surface",
"PHYSICS_PT_rigid_body_dynamics",
"PHYSICS_PT_rigid_body_dynamics_deactivation",
"PHYSICS_PT_rigid_body_settings",
]
)
register_engine_with_panels(
"properties_physics_rigidbody_constraint",
[
"PHYSICS_PT_rigid_body_constraint",
"PHYSICS_PT_rigid_body_constraint_limits",
"PHYSICS_PT_rigid_body_constraint_limits_angular",
"PHYSICS_PT_rigid_body_constraint_limits_linear",
"PHYSICS_PT_rigid_body_constraint_motor",
"PHYSICS_PT_rigid_body_constraint_motor_angular",
"PHYSICS_PT_rigid_body_constraint_motor_linear",
"PHYSICS_PT_rigid_body_constraint_objects",
"PHYSICS_PT_rigid_body_constraint_override_iterations",
"PHYSICS_PT_rigid_body_constraint_settings",
"PHYSICS_PT_rigid_body_constraint_springs",
"PHYSICS_PT_rigid_body_constraint_springs_angular",
"PHYSICS_PT_rigid_body_constraint_springs_linear",
]
)
register_engine_with_panels(
"properties_physics_softbody",
[
"PHYSICS_PT_softbody",
"PHYSICS_PT_softbody_cache",
"PHYSICS_PT_softbody_collision",
"PHYSICS_PT_softbody_edge",
"PHYSICS_PT_softbody_edge_aerodynamics",
"PHYSICS_PT_softbody_edge_stiffness",
"PHYSICS_PT_softbody_field_weights",
"PHYSICS_PT_softbody_goal",
"PHYSICS_PT_softbody_goal_settings",
"PHYSICS_PT_softbody_goal_strengths",
"PHYSICS_PT_softbody_object",
"PHYSICS_PT_softbody_simulation",
"PHYSICS_PT_softbody_solver",
"PHYSICS_PT_softbody_solver_diagnostics",
"PHYSICS_PT_softbody_solver_helpers",
]
)
#--------------------------------------------------------------
class PsychopathPanel():
COMPAT_ENGINES = {'PSYCHOPATH_RENDER'}
@ -20,7 +293,7 @@ class PsychopathPanel():
@classmethod
def poll(cls, context):
rd = context.scene.render
return (rd.use_game_engine is False) and (rd.engine in cls.COMPAT_ENGINES)
return rd.engine in cls.COMPAT_ENGINES
class RENDER_PT_psychopath_render_settings(PsychopathPanel, bpy.types.Panel):
@ -47,7 +320,7 @@ class RENDER_PT_psychopath_render_settings(PsychopathPanel, bpy.types.Panel):
col.prop(scene.psychopath, "shutter_end")
col.label(text="Performance")
col.prop(scene.psychopath, "max_samples_per_bucket")
col.prop(scene.psychopath, "bucket_size")
class RENDER_PT_psychopath_export_settings(PsychopathPanel, bpy.types.Panel):
@ -78,29 +351,7 @@ class WORLD_PT_psychopath_background(PsychopathPanel, bpy.types.Panel):
layout = self.layout
world = context.world
layout.prop(world, "horizon_color", text="Color")
class DATA_PT_psychopath_camera_dof(PsychopathPanel, bpy.types.Panel):
bl_label = "Depth of Field"
bl_space_type = 'PROPERTIES'
bl_region_type = 'WINDOW'
bl_context = "data"
@classmethod
def poll(cls, context):
engine = context.scene.render.engine
return context.camera and PsychopathPanel.poll(context)
def draw(self, context):
ob = context.active_object
layout = self.layout
col = layout.column()
col.prop(ob.data, "dof_object")
col.prop(ob.data, "dof_distance")
col.prop(ob.data.psychopath, "aperture_radius")
layout.prop(world.psychopath, "background_color", text="Color")
class DATA_PT_psychopath_lamp(PsychopathPanel, bpy.types.Panel):
@ -112,7 +363,7 @@ class DATA_PT_psychopath_lamp(PsychopathPanel, bpy.types.Panel):
@classmethod
def poll(cls, context):
engine = context.scene.render.engine
return context.lamp and PsychopathPanel.poll(context)
return context.active_object.type == 'LIGHT' and PsychopathPanel.poll(context)
def draw(self, context):
ob = context.active_object
@ -144,22 +395,23 @@ class DATA_PT_psychopath_area_lamp(PsychopathPanel, bpy.types.Panel):
@classmethod
def poll(cls, context):
lamp = context.lamp
engine = context.scene.render.engine
return (lamp and lamp.type == 'AREA') and (engine in cls.COMPAT_ENGINES)
return context.active_object.type == 'LIGHT' \
and context.active_object.data.type == 'AREA' \
and (engine in cls.COMPAT_ENGINES)
def draw(self, context):
layout = self.layout
lamp = context.lamp
lamp = context.active_object.data
col = layout.column()
col.row().prop(lamp, "shape", expand=True)
sub = col.row(align=True)
if lamp.shape == 'SQUARE':
if lamp.shape == 'SQUARE' or lamp.shape == 'DISK':
sub.prop(lamp, "size")
elif lamp.shape == 'RECTANGLE':
elif lamp.shape == 'RECTANGLE' or lamp.shape == 'ELLIPSE':
sub.prop(lamp, "size", text="Size X")
sub.prop(lamp, "size_y", text="Size Y")
@ -208,10 +460,10 @@ class MATERIAL_PT_psychopath_context_material(PsychopathPanel, bpy.types.Panel):
row.template_list("MATERIAL_UL_matslots", "", ob, "material_slots", ob, "active_material_index", rows=1)
col = row.column(align=True)
col.operator("object.material_slot_add", icon='ZOOMIN', text="")
col.operator("object.material_slot_remove", icon='ZOOMOUT', text="")
col.operator("object.material_slot_add", icon='ADD', text="")
col.operator("object.material_slot_remove", icon='REMOVE', text="")
col.menu("MATERIAL_MT_specials", icon='DOWNARROW_HLT', text="")
col.menu("MATERIAL_MT_context_menu", icon='DOWNARROW_HLT', text="")
if ob.mode == 'EDIT':
row = layout.row(align=True)
@ -219,7 +471,7 @@ class MATERIAL_PT_psychopath_context_material(PsychopathPanel, bpy.types.Panel):
row.operator("object.material_slot_select", text="Select")
row.operator("object.material_slot_deselect", text="Deselect")
split = layout.split(percentage=0.65)
split = layout.split(factor=0.65)
if ob:
split.template_ID(ob, "active_material", new="material.new")
@ -271,7 +523,6 @@ def register():
bpy.utils.register_class(RENDER_PT_psychopath_render_settings)
bpy.utils.register_class(RENDER_PT_psychopath_export_settings)
bpy.utils.register_class(WORLD_PT_psychopath_background)
bpy.utils.register_class(DATA_PT_psychopath_camera_dof)
bpy.utils.register_class(DATA_PT_psychopath_mesh)
bpy.utils.register_class(DATA_PT_psychopath_lamp)
bpy.utils.register_class(DATA_PT_psychopath_area_lamp)
@ -282,8 +533,7 @@ def unregister():
bpy.utils.unregister_class(RENDER_PT_psychopath_render_settings)
bpy.utils.unregister_class(RENDER_PT_psychopath_export_settings)
bpy.utils.unregister_class(WORLD_PT_psychopath_background)
bpy.utils.unregister_class(DATA_PT_psychopath_camera_dof)
bpy.utils.register_class(DATA_PT_psychopath_mesh)
bpy.utils.unregister_class(DATA_PT_psychopath_mesh)
bpy.utils.unregister_class(DATA_PT_psychopath_lamp)
bpy.utils.unregister_class(DATA_PT_psychopath_area_lamp)
bpy.utils.unregister_class(MATERIAL_PT_psychopath_context_material)

View File

@ -1,52 +1,33 @@
import bpy
from math import degrees, tan, atan
from math import degrees, sin, asin, tan, atan
from mathutils import Vector, Matrix
from .util import escape_name, mat2str, ExportCancelled
class World:
def __init__(self, render_engine, scene, visible_layers, aspect_ratio):
def __init__(self, render_engine, depsgraph):
scene = depsgraph.scene
self.background_shader = BackgroundShader(render_engine, scene.world)
self.camera = Camera(render_engine, scene.camera, aspect_ratio)
self.lights = []
# Collect infinite-extent light sources.
# TODO: also get sun lamps inside group instances.
for ob in scene.objects:
if ob.type == 'LAMP' and ob.data.type == 'SUN':
name = escape_name(ob.name)
self.lights += [DistantDiskLamp(ob, name)]
def take_sample(self, render_engine, scene, time):
self.camera.take_sample(render_engine, scene, time)
def take_sample(self, render_engine, depsgraph, time):
if render_engine.test_break():
raise ExportCancelled()
self.background_shader.take_sample(render_engine, depsgraph, time)
for light in self.lights:
# Check if render is cancelled
if render_engine.test_break():
raise ExportCancelled()
light.take_sample(render_engine, scene, time)
def cleanup(self):
pass
def export(self, render_engine, w):
self.camera.export(render_engine, w)
w.write("World {\n")
w.indent()
self.background_shader.export(render_engine, w)
for light in self.lights:
light.export(render_engine, w)
w.unindent()
w.write("}\n")
def cleanup(self):
# For future use. This is run by the calling code when finished,
# even if export did not succeed.
pass
#================================================================
class Camera:
def __init__(self, render_engine, ob, aspect_ratio):
@ -58,31 +39,43 @@ class Camera:
self.focal_distances = []
self.xforms = []
def take_sample(self, render_engine, scene, time):
def take_sample(self, render_engine, depsgraph, time):
render_engine.update_stats("", "Psychopath: Collecting '{}' at time {}".format(self.ob.name, time))
# Fov
if self.aspect_ratio >= 1.0:
self.fovs += [degrees(self.ob.data.angle)]
# Fov.
# TODO: account for the various ways sensor size can be specified.
x_extent = depsgraph.scene.render.resolution_x / depsgraph.scene.render.pixel_aspect_x
y_extent = depsgraph.scene.render.resolution_y / depsgraph.scene.render.pixel_aspect_y
aspect_ratio = x_extent / y_extent
if aspect_ratio >= 1.0:
self.fovs += [degrees(self.ob.data.angle_x)]
else:
self.fovs += [degrees(2.0 * atan(tan(self.ob.data.angle * 0.5) * self.aspect_ratio))]
self.fovs += [degrees(2.0 * atan(tan(self.ob.data.angle_x * 0.5) * aspect_ratio))]
# Aperture radius
self.aperture_radii += [self.ob.data.psychopath.aperture_radius]
if self.ob.data.dof.use_dof:
# Aperture radius.
radius = self.ob.data.lens / 2000.0 / self.ob.data.dof.aperture_fstop
self.aperture_radii += [radius]
# Dof distance
if self.ob.data.dof_object == None:
self.focal_distances += [self.ob.data.dof_distance]
# Dof distance
if self.ob.data.dof.focus_object == None:
self.focal_distances += [self.ob.data.dof.focus_distance]
else:
# TODO: implement DoF object tracking here
self.focal_distances += [0.0]
print("WARNING: DoF object tracking not yet implemented.")
else:
# TODO: implement DoF object tracking here
self.focal_distances += [0.0]
print("WARNING: DoF object tracking not yet implemented.")
self.aperture_radii += [0.0]
self.focal_distances += [1.0]
# Transform
mat = self.ob.matrix_world.copy()
matz = Matrix()
matz[2][2] = -1
self.xforms += [mat * matz]
self.xforms += [(mat @ matz).inverted()]
def cleanup(self):
pass
def export(self, render_engine, w):
render_engine.update_stats("", "Psychopath: Exporting %s" % self.ob.name)
@ -108,55 +101,24 @@ class Camera:
class BackgroundShader:
def __init__(self, render_engine, world):
self.world = world
self.color = []
def take_sample(self, render_engine, depsgraph, time):
if self.world != None:
self.color = (world.horizon_color[0], world.horizon_color[1], world.horizon_color[2])
self.color += [(
self.world.psychopath.background_color[0],
self.world.psychopath.background_color[1],
self.world.psychopath.background_color[2],
)]
def export(self, render_engine, w):
if self.world != None:
w.write("BackgroundShader {\n")
w.indent();
w.write("Type [Color]\n")
w.write("Color [rec709, %f %f %f]\n" % self.color)
for c in self.color:
w.write("Color [rec709, %f %f %f]\n" % c)
w.unindent()
w.write("}\n")
class DistantDiskLamp:
def __init__(self, ob, name):
self.ob = ob
self.name = name
self.time_col = []
self.time_dir = []
self.time_rad = []
def take_sample(self, render_engine, scene, time):
render_engine.update_stats("", "Psychopath: Collecting '{}' at time {}".format(self.ob.name, time))
self.time_dir += [tuple(self.ob.matrix_world.to_3x3() * Vector((0, 0, -1)))]
if self.ob.data.psychopath.color_type == 'Rec709':
self.time_col += [('Rec709', self.ob.data.color * self.ob.data.energy)]
elif self.ob.data.psychopath.color_type == 'Blackbody':
self.time_col += [('Blackbody', self.ob.data.psychopath.color_blackbody_temp, self.ob.data.energy)]
elif self.ob.data.psychopath.color_type == 'ColorTemperature':
self.time_col += [('ColorTemperature', self.ob.data.psychopath.color_blackbody_temp, self.ob.data.energy)]
self.time_rad += [self.ob.data.shadow_soft_size]
def export(self, render_engine, w):
render_engine.update_stats("", "Psychopath: Exporting %s" % self.ob.name)
w.write("DistantDiskLight $%s {\n" % self.name)
w.indent()
for direc in self.time_dir:
w.write("Direction [%f %f %f]\n" % (direc[0], direc[1], direc[2]))
for col in self.time_col:
if col[0] == 'Rec709':
w.write("Color [rec709, %f %f %f]\n" % (col[1][0], col[1][1], col[1][2]))
elif col[0] == 'Blackbody':
w.write("Color [blackbody, %f %f]\n" % (col[1], col[2]))
elif col[0] == 'ColorTemperature':
w.write("Color [color_temperature, %f %f]\n" % (col[1], col[2]))
for rad in self.time_rad:
w.write("Radius [%f]\n" % rad)
w.unindent()
w.write("}\n")

View File

@ -67,7 +67,91 @@ impl<'a> BVH<'a> {
self.depth
}
pub fn traverse<T, F>(&self, rays: &mut [AccelRay], objects: &[T], mut obj_ray_test: F)
pub fn traverse<T, F>(&self, ray: &mut AccelRay, objects: &[T], mut obj_ray_test: F)
where
F: FnMut(&T, &mut AccelRay),
{
if self.root.is_none() {
return;
}
let mut timer = Timer::new();
let mut trav_time: f64 = 0.0;
let mut node_tests: u64 = 0;
let ray_sign = [
ray.dir_inv.x() >= 0.0,
ray.dir_inv.y() >= 0.0,
ray.dir_inv.z() >= 0.0,
];
// +2 of max depth for root and last child
let mut node_stack = [self.root.unwrap(); BVH_MAX_DEPTH + 2];
let mut stack_ptr = 1;
while stack_ptr > 0 && !ray.is_done {
node_tests += 1;
match *node_stack[stack_ptr] {
BVHNode::Internal {
children,
bounds_start,
bounds_len,
split_axis,
} => {
let bounds =
unsafe { std::slice::from_raw_parts(bounds_start, bounds_len as usize) };
let is_hit = lerp_slice(bounds, ray.time).intersect_accel_ray(&ray);
if is_hit {
if ray_sign[split_axis as usize] {
node_stack[stack_ptr] = children.1;
node_stack[stack_ptr + 1] = children.0;
} else {
node_stack[stack_ptr] = children.0;
node_stack[stack_ptr + 1] = children.1;
}
stack_ptr += 1;
} else {
stack_ptr -= 1;
}
}
BVHNode::Leaf {
object_range,
bounds_start,
bounds_len,
} => {
let bounds =
unsafe { std::slice::from_raw_parts(bounds_start, bounds_len as usize) };
let is_hit = lerp_slice(bounds, ray.time).intersect_accel_ray(&ray);
trav_time += timer.tick() as f64;
if is_hit {
for obj in &objects[object_range.0..object_range.1] {
obj_ray_test(obj, ray);
}
}
timer.tick();
stack_ptr -= 1;
}
}
}
trav_time += timer.tick() as f64;
ACCEL_TRAV_TIME.with(|att| {
let v = att.get();
att.set(v + trav_time);
});
ACCEL_NODE_RAY_TESTS.with(|anv| {
let v = anv.get();
anv.set(v + node_tests);
});
}
pub fn traverse_multi<T, F>(&self, rays: &mut [AccelRay], objects: &[T], mut obj_ray_test: F)
where
F: FnMut(&T, &mut [AccelRay]),
{

View File

@ -1,13 +1,7 @@
//! This BVH4 implementation is based on the ideas from the paper
//! "Efficient Ray Tracing Kernels for Modern CPU Architectures"
//! by Fuetterling et al.
#![allow(dead_code)]
use std::mem::{transmute, MaybeUninit};
use glam::BVec4A;
use kioku::Arena;
use crate::{
@ -16,7 +10,7 @@ use crate::{
boundable::Boundable,
lerp::lerp_slice,
math::Vector,
ray::{RayBatch, RayStack},
ray::{LocalRay, Ray},
};
use super::{
@ -25,6 +19,7 @@ use super::{
};
use bvh_order::{calc_traversal_code, SplitAxes, TRAVERSAL_TABLE};
use rmath::wide4::Float4;
pub fn ray_code(dir: Vector) -> usize {
let ray_sign_is_neg = [dir.x() < 0.0, dir.y() < 0.0, dir.z() < 0.0];
@ -33,6 +28,8 @@ pub fn ray_code(dir: Vector) -> usize {
+ ((ray_sign_is_neg[2] as usize) << 2)
}
//-------------------------------------------------------------
#[derive(Copy, Clone, Debug)]
pub struct BVH4<'a> {
root: Option<&'a BVH4Node<'a>>,
@ -98,9 +95,9 @@ impl<'a> BVH4<'a> {
self.depth
}
pub fn traverse<F>(&self, rays: &mut RayBatch, ray_stack: &mut RayStack, mut obj_ray_test: F)
pub fn traverse<F>(&self, ray: &mut Ray, local_ray: &LocalRay, mut obj_ray_test: F)
where
F: FnMut(std::ops::Range<usize>, &mut RayBatch, &mut RayStack),
F: FnMut(std::ops::Range<usize>, &mut Ray),
{
if self.root.is_none() {
return;
@ -108,55 +105,48 @@ impl<'a> BVH4<'a> {
let mut node_tests: u64 = 0;
let traversal_table =
&TRAVERSAL_TABLE[ray_code(rays.dir_inv_local(ray_stack.next_task_ray_idx(0)))];
// SIMD-ready ray data.
let orig4 = [
local_ray.orig.0.aaaa(),
local_ray.orig.0.bbbb(),
local_ray.orig.0.cccc(),
];
let dir_inv4 = [
local_ray.dir_inv.0.aaaa(),
local_ray.dir_inv.0.bbbb(),
local_ray.dir_inv.0.cccc(),
];
let mut max_t4 = Float4::splat(ray.max_t);
// +2 of max depth for root and last child
let mut node_stack = [self.root.unwrap(); (BVH_MAX_DEPTH * 3) + 2];
let mut stack_ptr = 1;
while stack_ptr > 0 {
let traversal_table = &TRAVERSAL_TABLE[ray_code(local_ray.dir_inv)];
while stack_ptr > 0 && !ray.is_done() {
match *node_stack[stack_ptr] {
BVH4Node::Internal {
bounds,
children,
traversal_code,
} => {
node_tests += ray_stack.ray_count_in_next_task() as u64;
let mut all_hits = BVec4A::default();
node_tests += 1;
// Ray testing
ray_stack.pop_do_next_task_and_push_rays(children.len(), |ray_idx| {
if rays.is_done(ray_idx) {
BVec4A::default()
} else {
let hits = if bounds.len() == 1 {
bounds[0].intersect_ray(
rays.orig_local(ray_idx),
rays.dir_inv_local(ray_idx),
rays.max_t(ray_idx),
)
} else {
lerp_slice(bounds, rays.time(ray_idx)).intersect_ray(
rays.orig_local(ray_idx),
rays.dir_inv_local(ray_idx),
rays.max_t(ray_idx),
)
};
all_hits |= hits;
hits
}
});
let hits = if bounds.len() == 1 {
bounds[0].intersect_ray(orig4, dir_inv4, max_t4)
} else {
lerp_slice(bounds, ray.time).intersect_ray(orig4, dir_inv4, max_t4)
};
// If there were any intersections, create tasks.
if all_hits.any() {
// Push child nodes onto the stack if there were any hits.
if hits.any() {
let order_code = traversal_table[traversal_code as usize];
let hits = hits.to_bools();
let mut lane_count = 0;
let mut i = children.len() as u8;
while i > 0 {
i -= 1;
for i in (0..children.len() as u8).rev() {
let child_i = ((order_code >> (i * 2)) & 3) as usize;
if ray_stack.push_lane_to_task(child_i) {
if hits[child_i] {
node_stack[stack_ptr + lane_count] = &children[child_i];
lane_count += 1;
}
@ -169,8 +159,10 @@ impl<'a> BVH4<'a> {
}
BVH4Node::Leaf { object_range } => {
// Do the ray tests.
obj_ray_test(object_range.0..object_range.1, rays, ray_stack);
obj_ray_test(object_range.0..object_range.1, ray);
// Update SIMD max_t in case there was a hit.
max_t4 = Float4::splat(ray.max_t);
stack_ptr -= 1;
}

View File

@ -5,10 +5,9 @@ use std::{
mem::MaybeUninit,
};
use crate::{
hash::hash_u64,
lerp::{lerp_slice, Lerp},
};
use rrand::mix64_seed;
use crate::lerp::{lerp_slice, Lerp};
/// Selects an item from a slice based on a weighting function and a
/// number (n) between 0.0 and 1.0. Returns the index of the selected
@ -209,7 +208,7 @@ where
let mut seed = n as u64;
loop {
let i = left + (hash_u64(right as u64, seed) as usize % (right - left));
let i = left + (mix64_seed(right as u64, seed) as usize % (right - left));
slc.swap(i, right - 1);
let ii = left + {

View File

@ -7,7 +7,7 @@ use std::{
use crate::{
lerp::{lerp, lerp_slice, Lerp},
math::{Point, Transform, Vector},
math::{fast_minf32, Point, Vector, Xform},
};
const BBOX_MAXT_ADJUST: f32 = 1.000_000_24;
@ -41,21 +41,23 @@ impl BBox {
// Returns whether the given ray intersects with the bbox.
pub fn intersect_ray(&self, orig: Point, dir_inv: Vector, max_t: f32) -> bool {
// Calculate slab intersections
let t1 = (self.min.co - orig.co) * dir_inv.co;
let t2 = (self.max.co - orig.co) * dir_inv.co;
let t1 = (self.min.0 - orig.0) * dir_inv.0;
let t2 = (self.max.0 - orig.0) * dir_inv.0;
// Find the far and near intersection
let far_t = t1.max(t2).extend(std::f32::INFINITY);
let near_t = t1.min(t2).extend(0.0);
let far_hit_t = (far_t.min_element() * BBOX_MAXT_ADJUST).min(max_t);
let far_t = t1.max(t2).set_d(std::f32::INFINITY);
let near_t = t1.min(t2).set_d(0.0);
let far_hit_t = fast_minf32(far_t.min_element() * BBOX_MAXT_ADJUST, max_t);
let near_hit_t = near_t.max_element();
// Did we hit?
near_hit_t <= far_hit_t
}
// Creates a new BBox transformed into a different space.
pub fn transformed(&self, xform: Transform) -> BBox {
// Creates a new BBox transformed from its local space to the
// given space.
#[must_use]
pub fn xform(&self, xform: &Xform) -> BBox {
// BBox corners
let vs = [
Point::new(self.min.x(), self.min.y(), self.min.z()),
@ -71,7 +73,7 @@ impl BBox {
// Transform BBox corners and make new bbox
let mut b = BBox::new();
for v in &vs {
let v = *v * xform;
let v = v.xform(xform);
b.min = v.min(b.min);
b.max = v.max(b.max);
}
@ -103,12 +105,8 @@ impl BitOr for BBox {
fn bitor(self, rhs: BBox) -> BBox {
BBox::from_points(
Point {
co: self.min.co.min(rhs.min.co),
},
Point {
co: self.max.co.max(rhs.max.co),
},
Point(self.min.0.min(rhs.min.0)),
Point(self.max.0.max(rhs.max.0)),
)
}
}
@ -124,14 +122,7 @@ impl BitOr<Point> for BBox {
type Output = BBox;
fn bitor(self, rhs: Point) -> BBox {
BBox::from_points(
Point {
co: self.min.co.min(rhs.co),
},
Point {
co: self.max.co.max(rhs.co),
},
)
BBox::from_points(Point(self.min.0.min(rhs.0)), Point(self.max.0.max(rhs.0)))
}
}
@ -150,7 +141,7 @@ impl Lerp for BBox {
}
}
pub fn transform_bbox_slice_from(bbs_in: &[BBox], xforms: &[Transform], bbs_out: &mut Vec<BBox>) {
pub fn transform_bbox_slice_from(bbs_in: &[BBox], xforms: &[Xform], bbs_out: &mut Vec<BBox>) {
bbs_out.clear();
// Transform the bounding boxes
@ -158,17 +149,17 @@ pub fn transform_bbox_slice_from(bbs_in: &[BBox], xforms: &[Transform], bbs_out:
bbs_out.extend_from_slice(bbs_in);
} else if bbs_in.len() == xforms.len() {
for (bb, xf) in Iterator::zip(bbs_in.iter(), xforms.iter()) {
bbs_out.push(bb.transformed(xf.inverse()));
bbs_out.push(bb.xform(&xf));
}
} else if bbs_in.len() > xforms.len() {
let s = (bbs_in.len() - 1) as f32;
for (i, bb) in bbs_in.iter().enumerate() {
bbs_out.push(bb.transformed(lerp_slice(xforms, i as f32 / s).inverse()));
bbs_out.push(bb.xform(&lerp_slice(xforms, i as f32 / s)));
}
} else if bbs_in.len() < xforms.len() {
let s = (xforms.len() - 1) as f32;
for (i, xf) in xforms.iter().enumerate() {
bbs_out.push(lerp_slice(bbs_in, i as f32 / s).transformed(xf.inverse()));
bbs_out.push(lerp_slice(bbs_in, i as f32 / s).xform(&xf));
}
}
}

View File

@ -6,19 +6,18 @@ use std::ops::{BitOr, BitOrAssign};
use crate::{
bbox::BBox,
lerp::{lerp, Lerp},
math::{Point, Vector},
};
use glam::{BVec4A, Vec4};
use rmath::wide4::{Bool4, Float4};
const BBOX_MAXT_ADJUST: f32 = 1.000_000_24;
/// A SIMD set of 4 3D axis-aligned bounding boxes.
#[derive(Debug, Copy, Clone)]
pub struct BBox4 {
pub x: (Vec4, Vec4), // (min, max)
pub y: (Vec4, Vec4), // (min, max)
pub z: (Vec4, Vec4), // (min, max)
pub x: (Float4, Float4), // (min, max)
pub y: (Float4, Float4), // (min, max)
pub z: (Float4, Float4), // (min, max)
}
impl BBox4 {
@ -26,16 +25,16 @@ impl BBox4 {
pub fn new() -> BBox4 {
BBox4 {
x: (
Vec4::splat(std::f32::INFINITY),
Vec4::splat(std::f32::NEG_INFINITY),
Float4::splat(std::f32::INFINITY),
Float4::splat(std::f32::NEG_INFINITY),
),
y: (
Vec4::splat(std::f32::INFINITY),
Vec4::splat(std::f32::NEG_INFINITY),
Float4::splat(std::f32::INFINITY),
Float4::splat(std::f32::NEG_INFINITY),
),
z: (
Vec4::splat(std::f32::INFINITY),
Vec4::splat(std::f32::NEG_INFINITY),
Float4::splat(std::f32::INFINITY),
Float4::splat(std::f32::NEG_INFINITY),
),
}
}
@ -45,38 +44,30 @@ impl BBox4 {
pub fn from_bboxes(b1: BBox, b2: BBox, b3: BBox, b4: BBox) -> BBox4 {
BBox4 {
x: (
Vec4::new(b1.min.x(), b2.min.x(), b3.min.x(), b4.min.x()),
Vec4::new(b1.max.x(), b2.max.x(), b3.max.x(), b4.max.x()),
Float4::new(b1.min.x(), b2.min.x(), b3.min.x(), b4.min.x()),
Float4::new(b1.max.x(), b2.max.x(), b3.max.x(), b4.max.x()),
),
y: (
Vec4::new(b1.min.y(), b2.min.y(), b3.min.y(), b4.min.y()),
Vec4::new(b1.max.y(), b2.max.y(), b3.max.y(), b4.max.y()),
Float4::new(b1.min.y(), b2.min.y(), b3.min.y(), b4.min.y()),
Float4::new(b1.max.y(), b2.max.y(), b3.max.y(), b4.max.y()),
),
z: (
Vec4::new(b1.min.z(), b2.min.z(), b3.min.z(), b4.min.z()),
Vec4::new(b1.max.z(), b2.max.z(), b3.max.z(), b4.max.z()),
Float4::new(b1.min.z(), b2.min.z(), b3.min.z(), b4.min.z()),
Float4::new(b1.max.z(), b2.max.z(), b3.max.z(), b4.max.z()),
),
}
}
// Returns whether the given ray intersects with the bboxes.
pub fn intersect_ray(&self, orig: Point, dir_inv: Vector, max_t: f32) -> BVec4A {
// Get the ray data into SIMD format.
let ro_x = Vec4::splat(orig.co[0]);
let ro_y = Vec4::splat(orig.co[1]);
let ro_z = Vec4::splat(orig.co[2]);
let rdi_x = Vec4::splat(dir_inv.co[0]);
let rdi_y = Vec4::splat(dir_inv.co[1]);
let rdi_z = Vec4::splat(dir_inv.co[2]);
let max_t = Vec4::splat(max_t);
#[inline(always)]
pub fn intersect_ray(&self, orig: [Float4; 3], dir_inv: [Float4; 3], max_t: Float4) -> Bool4 {
// Slab tests
let t1_x = (self.x.0 - ro_x) * rdi_x;
let t1_y = (self.y.0 - ro_y) * rdi_y;
let t1_z = (self.z.0 - ro_z) * rdi_z;
let t2_x = (self.x.1 - ro_x) * rdi_x;
let t2_y = (self.y.1 - ro_y) * rdi_y;
let t2_z = (self.z.1 - ro_z) * rdi_z;
let t1_x = (self.x.0 - orig[0]) * dir_inv[0];
let t1_y = (self.y.0 - orig[1]) * dir_inv[1];
let t1_z = (self.z.0 - orig[2]) * dir_inv[2];
let t2_x = (self.x.1 - orig[0]) * dir_inv[0];
let t2_y = (self.y.1 - orig[1]) * dir_inv[1];
let t2_z = (self.z.1 - orig[2]) * dir_inv[2];
// Get the far and near t hits for each axis.
let t_far_x = t1_x.max(t2_x);
@ -87,10 +78,11 @@ impl BBox4 {
let t_near_z = t1_z.min(t2_z);
// Calculate over-all far t hit.
let far_t = (t_far_x.min(t_far_y.min(t_far_z)) * Vec4::splat(BBOX_MAXT_ADJUST)).min(max_t);
let far_t =
(t_far_x.min(t_far_y.min(t_far_z)) * Float4::splat(BBOX_MAXT_ADJUST)).min(max_t);
// Calculate over-all near t hit.
let near_t = t_near_x.max(t_near_y).max(t_near_z.max(Vec4::splat(0.0)));
let near_t = t_near_x.max(t_near_y).max(t_near_z.max(Float4::splat(0.0)));
// Hit results
near_t.cmplt(far_t)

View File

@ -1,31 +1,28 @@
#![allow(dead_code)]
use kioku::Arena;
use crate::{
lerp::lerp_slice,
math::{Point, Transform, Vector},
math::{Point, Vector, Xform},
ray::Ray,
sampling::square_to_circle,
};
#[derive(Copy, Clone, Debug)]
pub struct Camera<'a> {
transforms: &'a [Transform],
fovs: &'a [f32],
tfovs: &'a [f32],
aperture_radii: &'a [f32],
focus_distances: &'a [f32],
#[derive(Debug, Clone)]
pub struct Camera {
transforms: Vec<Xform>,
fovs: Vec<f32>,
tfovs: Vec<f32>,
aperture_radii: Vec<f32>,
focus_distances: Vec<f32>,
}
impl<'a> Camera<'a> {
impl Camera {
pub fn new(
arena: &'a Arena,
transforms: &[Transform],
transforms: &[Xform],
fovs: &[f32],
mut aperture_radii: &[f32],
mut focus_distances: &[f32],
) -> Camera<'a> {
) -> Camera {
assert!(!transforms.is_empty(), "Camera has no transform(s)!");
assert!(!fovs.is_empty(), "Camera has no fov(s)!");
@ -63,20 +60,20 @@ impl<'a> Camera<'a> {
.collect();
Camera {
transforms: arena.copy_slice(&transforms),
fovs: arena.copy_slice(&fovs),
tfovs: arena.copy_slice(&tfovs),
aperture_radii: arena.copy_slice(&aperture_radii),
focus_distances: arena.copy_slice(&focus_distances),
transforms: transforms.into(),
fovs: fovs.into(),
tfovs: tfovs.into(),
aperture_radii: aperture_radii.into(),
focus_distances: focus_distances.into(),
}
}
pub fn generate_ray(&self, x: f32, y: f32, time: f32, wavelength: f32, u: f32, v: f32) -> Ray {
// Get time-interpolated camera settings
let transform = lerp_slice(self.transforms, time);
let tfov = lerp_slice(self.tfovs, time);
let aperture_radius = lerp_slice(self.aperture_radii, time);
let focus_distance = lerp_slice(self.focus_distances, time);
let transform = lerp_slice(&self.transforms, time).to_full_fast().unwrap();
let tfov = lerp_slice(&self.tfovs, time);
let aperture_radius = lerp_slice(&self.aperture_radii, time);
let focus_distance = lerp_slice(&self.focus_distances, time);
// Ray origin
let orig = {
@ -92,12 +89,13 @@ impl<'a> Camera<'a> {
)
.normalized();
Ray {
orig: orig * transform,
dir: dir * transform,
time: time,
wavelength: wavelength,
max_t: std::f32::INFINITY,
}
Ray::new(
orig.xform_inv_fast(&transform),
dir.xform_inv_fast(&transform),
time,
wavelength,
std::f32::INFINITY,
false,
)
}
}

View File

@ -1,13 +1,8 @@
use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign};
pub use color::{
rec709_e_to_xyz, rec709_to_xyz, xyz_to_aces_ap0, xyz_to_aces_ap0_e, xyz_to_rec709,
xyz_to_rec709_e,
};
use crate::math::Float4;
use compact::fluv::fluv32;
use glam::Vec4;
use half::f16;
use spectral_upsampling::meng::{spectrum_xyz_to_p_4, EQUAL_ENERGY_REFLECTANCE};
use crate::{lerp::Lerp, math::fast_exp};
@ -31,10 +26,10 @@ fn nth_wavelength(hero_wavelength: f32, n: usize) -> f32 {
}
}
/// Returns all wavelengths of a hero wavelength set as a Vec4
/// Returns all wavelengths of a hero wavelength set as a Float4
#[inline(always)]
fn wavelengths(hero_wavelength: f32) -> Vec4 {
Vec4::new(
fn wavelengths(hero_wavelength: f32) -> Float4 {
Float4::new(
nth_wavelength(hero_wavelength, 0),
nth_wavelength(hero_wavelength, 1),
nth_wavelength(hero_wavelength, 2),
@ -94,7 +89,7 @@ impl Color {
} => {
SpectralSample::from_parts(
// TODO: make this SIMD
Vec4::new(
Float4::new(
plancks_law(temperature, wls[0]) * factor,
plancks_law(temperature, wls[1]) * factor,
plancks_law(temperature, wls[2]) * factor,
@ -109,7 +104,7 @@ impl Color {
} => {
SpectralSample::from_parts(
// TODO: make this SIMD
Vec4::new(
Float4::new(
plancks_law_normalized(temperature, wls[0]) * factor,
plancks_law_normalized(temperature, wls[1]) * factor,
plancks_law_normalized(temperature, wls[2]) * factor,
@ -386,7 +381,7 @@ fn plancks_law_normalized(temperature: f32, wavelength: f32) -> f32 {
#[derive(Copy, Clone, Debug)]
pub struct SpectralSample {
pub e: Vec4,
pub e: Float4,
hero_wavelength: f32,
}
@ -394,7 +389,7 @@ impl SpectralSample {
pub fn new(wavelength: f32) -> SpectralSample {
debug_assert!(wavelength >= WL_MIN && wavelength <= WL_MAX);
SpectralSample {
e: Vec4::splat(0.0),
e: Float4::splat(0.0),
hero_wavelength: wavelength,
}
}
@ -403,12 +398,12 @@ impl SpectralSample {
pub fn from_value(value: f32, wavelength: f32) -> SpectralSample {
debug_assert!(wavelength >= WL_MIN && wavelength <= WL_MAX);
SpectralSample {
e: Vec4::splat(value),
e: Float4::splat(value),
hero_wavelength: wavelength,
}
}
pub fn from_parts(e: Vec4, wavelength: f32) -> SpectralSample {
pub fn from_parts(e: Float4, wavelength: f32) -> SpectralSample {
debug_assert!(wavelength >= WL_MIN && wavelength <= WL_MAX);
SpectralSample {
e: e,
@ -599,30 +594,61 @@ impl DivAssign<f32> for XYZ {
/// the method in the paper "Physically Meaningful Rendering using Tristimulus
/// Colours" by Meng et al.
#[inline(always)]
fn xyz_to_spectrum_4(xyz: (f32, f32, f32), wavelengths: Vec4) -> Vec4 {
spectrum_xyz_to_p_4(wavelengths, xyz) * Vec4::splat(1.0 / EQUAL_ENERGY_REFLECTANCE)
// aces_to_spectrum_p4(wavelengths, xyz_to_aces_ap0_e(xyz))
fn xyz_to_spectrum_4(xyz: (f32, f32, f32), wavelengths: Float4) -> Float4 {
use spectral_upsampling as su;
// su::meng::spectrum_xyz_to_p_4(wavelengths, xyz)
// * Float4::splat(1.0 / su::meng::EQUAL_ENERGY_REFLECTANCE)
su::jakob::rec2020_to_spectrum_p4(wavelengths, color::xyz_to_rec2020_e(xyz))
// su::jakob::rec709_to_spectrum_p4(wavelengths, color::xyz_to_rec709_e(xyz))
}
/// Close analytic approximations of the CIE 1931 XYZ color curves.
/// From the paper "Simple Analytic Approximations to the CIE XYZ Color Matching
/// Functions" by Wyman et al.
pub fn x_1931(wavelength: f32) -> f32 {
let t1 = (wavelength - 442.0) * (if wavelength < 442.0 { 0.0624 } else { 0.0374 });
let t2 = (wavelength - 599.8) * (if wavelength < 599.8 { 0.0264 } else { 0.0323 });
let t3 = (wavelength - 501.1) * (if wavelength < 501.1 { 0.0490 } else { 0.0382 });
(0.362 * fast_exp(-0.5 * t1 * t1)) + (1.056 * fast_exp(-0.5 * t2 * t2))
- (0.065 * fast_exp(-0.5 * t3 * t3))
use colorbox::tables::cie_1931_xyz::{MAX_WAVELENGTH, MIN_WAVELENGTH, X};
let norm = 1.0 / (MAX_WAVELENGTH - MIN_WAVELENGTH);
let n = (wavelength - MIN_WAVELENGTH) * norm;
if n < 0.0 {
X[0]
} else if n > 1.0 {
*X.last().unwrap()
} else {
crate::lerp::lerp_slice(X, n)
}
}
pub fn y_1931(wavelength: f32) -> f32 {
let t1 = (wavelength - 568.8) * (if wavelength < 568.8 { 0.0213 } else { 0.0247 });
let t2 = (wavelength - 530.9) * (if wavelength < 530.9 { 0.0613 } else { 0.0322 });
(0.821 * fast_exp(-0.5 * t1 * t1)) + (0.286 * fast_exp(-0.5 * t2 * t2))
use colorbox::tables::cie_1931_xyz::{MAX_WAVELENGTH, MIN_WAVELENGTH, Y};
let norm = 1.0 / (MAX_WAVELENGTH - MIN_WAVELENGTH);
let n = (wavelength - MIN_WAVELENGTH) * norm;
if n < 0.0 {
Y[0]
} else if n > 1.0 {
*Y.last().unwrap()
} else {
crate::lerp::lerp_slice(Y, n)
}
}
pub fn z_1931(wavelength: f32) -> f32 {
let t1 = (wavelength - 437.0) * (if wavelength < 437.0 { 0.0845 } else { 0.0278 });
let t2 = (wavelength - 459.0) * (if wavelength < 459.0 { 0.0385 } else { 0.0725 });
(1.217 * fast_exp(-0.5 * t1 * t1)) + (0.681 * fast_exp(-0.5 * t2 * t2))
use colorbox::tables::cie_1931_xyz::{MAX_WAVELENGTH, MIN_WAVELENGTH, Z};
let norm = 1.0 / (MAX_WAVELENGTH - MIN_WAVELENGTH);
let n = (wavelength - MIN_WAVELENGTH) * norm;
if n < 0.0 {
Z[0]
} else if n > 1.0 {
*Z.last().unwrap()
} else {
crate::lerp::lerp_slice(Z, n)
}
}

View File

@ -4,6 +4,7 @@
//! From Theory to Implementation" 3rd edition by Pharr et al.
use crate::math::{dot, Normal, Point, Vector};
pub use rmath::utils::{decrement_ulp, increment_ulp};
#[inline(always)]
pub fn fp_gamma(n: u32) -> f32 {
@ -12,36 +13,6 @@ pub fn fp_gamma(n: u32) -> f32 {
(e * n as f32) / (1.0 - (e * n as f32))
}
pub fn increment_ulp(v: f32) -> f32 {
if v.is_finite() {
if v > 0.0 {
f32::from_bits(v.to_bits() + 1)
} else if v < -0.0 {
f32::from_bits(v.to_bits() - 1)
} else {
f32::from_bits(0x00_00_00_01)
}
} else {
// Infinity or NaN.
v
}
}
pub fn decrement_ulp(v: f32) -> f32 {
if v.is_finite() {
if v > 0.0 {
f32::from_bits(v.to_bits() - 1)
} else if v < -0.0 {
f32::from_bits(v.to_bits() + 1)
} else {
f32::from_bits(0x80_00_00_01)
}
} else {
// Infinity or NaN.
v
}
}
pub fn robust_ray_origin(pos: Point, pos_err: f32, nor: Normal, ray_dir: Vector) -> Point {
// Get surface normal pointing in the same
// direction as ray_dir.
@ -81,51 +52,7 @@ pub fn robust_ray_origin(pos: Point, pos_err: f32, nor: Normal, ray_dir: Vector)
Point::new(x, y, z)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn inc_ulp() {
assert!(increment_ulp(1.0) > 1.0);
assert!(increment_ulp(-1.0) > -1.0);
}
#[test]
fn dec_ulp() {
assert!(decrement_ulp(1.0) < 1.0);
assert!(decrement_ulp(-1.0) < -1.0);
}
#[test]
fn inc_ulp_zero() {
assert!(increment_ulp(0.0) > 0.0);
assert!(increment_ulp(0.0) > -0.0);
assert!(increment_ulp(-0.0) > 0.0);
assert!(increment_ulp(-0.0) > -0.0);
}
#[test]
fn dec_ulp_zero() {
assert!(decrement_ulp(0.0) < 0.0);
assert!(decrement_ulp(0.0) < -0.0);
assert!(decrement_ulp(-0.0) < 0.0);
assert!(decrement_ulp(-0.0) < -0.0);
}
#[test]
fn inc_dec_ulp() {
assert_eq!(decrement_ulp(increment_ulp(1.0)), 1.0);
assert_eq!(decrement_ulp(increment_ulp(-1.0)), -1.0);
assert_eq!(decrement_ulp(increment_ulp(1.2)), 1.2);
assert_eq!(decrement_ulp(increment_ulp(-1.2)), -1.2);
}
#[test]
fn dec_inc_ulp() {
assert_eq!(increment_ulp(decrement_ulp(1.0)), 1.0);
assert_eq!(increment_ulp(decrement_ulp(-1.0)), -1.0);
assert_eq!(increment_ulp(decrement_ulp(1.2)), 1.2);
assert_eq!(increment_ulp(decrement_ulp(-1.2)), -1.2);
}
}
// #[cfg(test)]
// mod tests {
// use super::*;
// }

View File

@ -1,29 +0,0 @@
pub fn hash_u32(n: u32, seed: u32) -> u32 {
let mut hash = n;
for _ in 0..3 {
hash = hash.wrapping_mul(0x736caf6f);
hash ^= hash.wrapping_shr(16);
hash ^= seed;
}
hash
}
pub fn hash_u64(n: u64, seed: u64) -> u64 {
let mut hash = n;
for _ in 0..4 {
hash = hash.wrapping_mul(32_416_190_071 * 314_604_959);
hash ^= hash.wrapping_shr(32);
hash ^= seed;
}
hash
}
/// Returns a random float in [0, 1] based on 'n' and a seed.
/// Generally use n for getting a bunch of different random
/// numbers, and use seed to vary between runs.
pub fn hash_u32_to_f32(n: u32, seed: u32) -> f32 {
const INV_MAX: f32 = 1.0 / std::u32::MAX as f32;
hash_u32(n, seed) as f32 * INV_MAX
}

View File

@ -1,77 +0,0 @@
#![allow(dead_code)]
const N: u32 = 1 << 16;
// Utility function used by the functions below.
fn hil_rot(n: u32, rx: u32, ry: u32, x: &mut u32, y: &mut u32) {
use std::mem;
if ry == 0 {
if rx == 1 {
*x = (n - 1).wrapping_sub(*x);
*y = (n - 1).wrapping_sub(*y);
}
mem::swap(x, y);
}
}
/// Convert (x,y) to hilbert curve index.
///
/// x: The x coordinate. Must be a positive integer no greater than 2^16-1.
/// y: The y coordinate. Must be a positive integer no greater than 2^16-1.
///
/// Returns the hilbert curve index corresponding to the (x,y) coordinates given.
pub fn xy2d(x: u32, y: u32) -> u32 {
assert!(x < N);
assert!(y < N);
let (mut x, mut y) = (x, y);
let mut d = 0;
let mut s = N >> 1;
while s > 0 {
let rx = if (x & s) > 0 { 1 } else { 0 };
let ry = if (y & s) > 0 { 1 } else { 0 };
d += s * s * ((3 * rx) ^ ry);
hil_rot(s, rx, ry, &mut x, &mut y);
s >>= 1
}
d
}
/// Convert hilbert curve index to (x,y).
///
/// d: The hilbert curve index.
///
/// Returns the (x, y) coords at the given index.
pub fn d2xy(d: u32) -> (u32, u32) {
let (mut x, mut y) = (0, 0);
let mut s = 1;
let mut t = d;
while s < N {
let rx = 1 & (t >> 1);
let ry = 1 & (t ^ rx);
hil_rot(s, rx, ry, &mut x, &mut y);
x += s * rx;
y += s * ry;
t >>= 2;
s <<= 1;
}
(x, y)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn reversible() {
let d = 54;
let (x, y) = d2xy(d);
let d2 = xy2d(x, y);
assert_eq!(d, d2);
}
}

View File

@ -13,7 +13,9 @@ use std::{
use half::f16;
use crate::color::{xyz_to_rec709_e, XYZ};
pub use color::{rec709_e_to_xyz, xyz_to_rec709_e};
use crate::color::XYZ;
#[derive(Debug)]
#[allow(clippy::type_complexity)]
@ -236,22 +238,35 @@ impl<'a> Bucket<'a> {
where
F: Fn((f32, f32, f32)) -> (f32, f32, f32),
{
use std::slice;
let mut data = Vec::with_capacity(
(4 * (self.max.0 - self.min.0) * (self.max.1 - self.min.1)) as usize,
let data_u8 = self.rgba_raw(color_convert);
base64::encode(&data_u8)
}
/// Returns the bucket's contents as a binary string.
///
/// The data is laid out as four-floats-per-pixel in scanline order.
/// The fourth channel is alpha, and is set to 1.0 for all pixels.
///
/// `color_convert` lets you do a colorspace conversion before base64
/// encoding if desired.
pub fn rgba_raw<F>(&mut self, color_convert: F) -> Vec<u8>
where
F: Fn((f32, f32, f32)) -> (f32, f32, f32),
{
let mut data: Vec<u8> = Vec::with_capacity(
std::mem::size_of::<f32>()
* (4 * (self.max.0 - self.min.0) * (self.max.1 - self.min.1)) as usize,
);
for y in self.min.1..self.max.1 {
for x in self.min.0..self.max.0 {
let color = color_convert(self.get(x, y).to_tuple());
data.push(color.0);
data.push(color.1);
data.push(color.2);
data.push(1.0);
data.extend_from_slice(&color.0.to_ne_bytes());
data.extend_from_slice(&color.1.to_ne_bytes());
data.extend_from_slice(&color.2.to_ne_bytes());
data.extend_from_slice(&1.0f32.to_ne_bytes());
}
}
let data_u8 =
unsafe { slice::from_raw_parts(&data[0] as *const f32 as *const u8, data.len() * 4) };
base64::encode(data_u8)
data
}
}

View File

@ -1,6 +1,6 @@
#![allow(dead_code)]
use math3d::{Normal, Point, Transform, Vector};
use rmath::{wide4::Float4, Normal, Point, Vector, Xform};
/// Trait for allowing a type to be linearly interpolated.
pub trait Lerp: Copy {
@ -100,36 +100,34 @@ impl<T: Lerp> Lerp for [T; 4] {
}
}
impl Lerp for glam::Vec4 {
fn lerp(self, other: glam::Vec4, alpha: f32) -> glam::Vec4 {
impl Lerp for Float4 {
fn lerp(self, other: Self, alpha: f32) -> Self {
(self * (1.0 - alpha)) + (other * alpha)
}
}
impl Lerp for Transform {
fn lerp(self, other: Transform, alpha: f32) -> Transform {
impl Lerp for Xform {
fn lerp(self, other: Self, alpha: f32) -> Self {
(self * (1.0 - alpha)) + (other * alpha)
}
}
impl Lerp for Normal {
fn lerp(self, other: Normal, alpha: f32) -> Normal {
fn lerp(self, other: Self, alpha: f32) -> Self {
(self * (1.0 - alpha)) + (other * alpha)
}
}
impl Lerp for Point {
fn lerp(self, other: Point, alpha: f32) -> Point {
let s = self;
let o = other;
Point {
co: (s.co * (1.0 - alpha)) + (o.co * alpha),
}
fn lerp(self, other: Self, alpha: f32) -> Self {
let a = self.0;
let b = other.0;
Point((a * (1.0 - alpha)) + (b * alpha))
}
}
impl Lerp for Vector {
fn lerp(self, other: Vector, alpha: f32) -> Vector {
fn lerp(self, other: Self, alpha: f32) -> Self {
(self * (1.0 - alpha)) + (other * alpha)
}
}
@ -215,20 +213,18 @@ mod tests {
#[test]
fn lerp_matrix() {
let a = Transform::new_from_values(
0.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0,
);
let b = Transform::new_from_values(
let a = Xform::new(0.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
let b = Xform::new(
-1.0, 1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
);
let c1 = Transform::new_from_values(
let c1 = Xform::new(
-0.25, 1.75, 2.25, 3.25, 4.25, 5.25, 6.25, 7.25, 8.25, 9.25, 10.25, 11.25,
);
let c2 = Transform::new_from_values(
let c2 = Xform::new(
-0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5,
);
let c3 = Transform::new_from_values(
let c3 = Xform::new(
-0.75, 1.25, 2.75, 3.75, 4.75, 5.75, 6.75, 7.75, 8.75, 9.75, 10.75, 11.75,
);

View File

@ -1,7 +1,5 @@
use std::f64::consts::PI as PI_64;
use kioku::Arena;
use crate::{
color::{Color, SpectralSample},
lerp::lerp_slice,
@ -13,24 +11,19 @@ use super::WorldLightSource;
// TODO: handle case where radius = 0.0.
#[derive(Copy, Clone, Debug)]
pub struct DistantDiskLight<'a> {
radii: &'a [f32],
directions: &'a [Vector],
colors: &'a [Color],
#[derive(Debug, Clone)]
pub struct DistantDiskLight {
radii: Vec<f32>,
directions: Vec<Vector>,
colors: Vec<Color>,
}
impl<'a> DistantDiskLight<'a> {
pub fn new(
arena: &'a Arena,
radii: &[f32],
directions: &[Vector],
colors: &[Color],
) -> DistantDiskLight<'a> {
impl DistantDiskLight {
pub fn new(radii: &[f32], directions: &[Vector], colors: &[Color]) -> DistantDiskLight {
DistantDiskLight {
radii: arena.copy_slice(&radii),
directions: arena.copy_slice(&directions),
colors: arena.copy_slice(&colors),
radii: radii.into(),
directions: directions.into(),
colors: colors.into(),
}
}
@ -55,7 +48,7 @@ impl<'a> DistantDiskLight<'a> {
// }
}
impl<'a> WorldLightSource for DistantDiskLight<'a> {
impl WorldLightSource for DistantDiskLight {
fn sample_from_point(
&self,
u: f32,
@ -64,9 +57,9 @@ impl<'a> WorldLightSource for DistantDiskLight<'a> {
time: f32,
) -> (SpectralSample, Vector, f32) {
// Calculate time interpolated values
let radius: f64 = lerp_slice(self.radii, time) as f64;
let direction = lerp_slice(self.directions, time);
let col = lerp_slice(self.colors, time);
let radius: f64 = lerp_slice(&self.radii, time) as f64;
let direction = lerp_slice(&self.directions, time);
let col = lerp_slice(&self.colors, time);
let solid_angle_inv = 1.0 / (2.0 * PI_64 * (1.0 - radius.cos()));
// Create a coordinate system from the vector pointing at the center of

View File

@ -6,7 +6,7 @@ use std::fmt::Debug;
use crate::{
color::SpectralSample,
math::{Normal, Point, Transform, Vector},
math::{Normal, Point, Vector, XformFull},
surface::Surface,
};
@ -34,7 +34,7 @@ pub trait SurfaceLight: Surface {
/// - The pdf of the sample.
fn sample_from_point(
&self,
space: &Transform,
space: &XformFull,
arr: Point,
u: f32,
v: f32,

View File

@ -5,8 +5,8 @@ use crate::{
boundable::Boundable,
color::{Color, SpectralSample},
lerp::lerp_slice,
math::{cross, dot, Normal, Point, Transform, Vector},
ray::{RayBatch, RayStack},
math::{cross, dot, Normal, Point, Vector, XformFull},
ray::{LocalRay, Ray},
sampling::{
spherical_triangle_solid_angle, triangle_surface_area, uniform_sample_spherical_triangle,
uniform_sample_triangle,
@ -51,7 +51,7 @@ impl<'a> RectangleLight<'a> {
// more efficiently by inlining it there.
fn sample_pdf(
&self,
space: &Transform,
space: &XformFull,
arr: Point,
sample_dir: Vector,
hit_point: Point,
@ -64,11 +64,10 @@ impl<'a> RectangleLight<'a> {
let dim = lerp_slice(self.dimensions, time);
// Get the four corners of the rectangle, transformed into world space
let space_inv = space.inverse();
let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0) * space_inv;
let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0) * space_inv;
let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0) * space_inv;
let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0) * space_inv;
let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0).xform(space);
let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0).xform(space);
let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0).xform(space);
let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0).xform(space);
// Get the four corners of the rectangle, projected on to the unit
// sphere centered around arr.
@ -82,7 +81,7 @@ impl<'a> RectangleLight<'a> {
let area_2 = spherical_triangle_solid_angle(sp4, sp1, sp3);
// World-space surface normal
let normal = Normal::new(0.0, 0.0, 1.0) * space_inv;
let normal = Normal::new(0.0, 0.0, 1.0).xform_fast(space);
// PDF
if (area_1 + area_2) < SIMPLE_SAMPLING_THRESHOLD {
@ -97,7 +96,7 @@ impl<'a> RectangleLight<'a> {
// fn outgoing(
// &self,
// space: &Transform,
// space: &XformFull,
// dir: Vector,
// u: f32,
// v: f32,
@ -120,7 +119,7 @@ impl<'a> RectangleLight<'a> {
impl<'a> SurfaceLight for RectangleLight<'a> {
fn sample_from_point(
&self,
space: &Transform,
space: &XformFull,
arr: Point,
u: f32,
v: f32,
@ -135,11 +134,10 @@ impl<'a> SurfaceLight for RectangleLight<'a> {
let surface_area_inv: f64 = 1.0 / surface_area;
// Get the four corners of the rectangle, transformed into world space
let space_inv = space.inverse();
let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0) * space_inv;
let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0) * space_inv;
let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0) * space_inv;
let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0) * space_inv;
let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0).xform(space);
let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0).xform(space);
let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0).xform(space);
let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0).xform(space);
// Get the four corners of the rectangle relative to arr.
let lp1 = p1 - arr;
@ -158,7 +156,7 @@ impl<'a> SurfaceLight for RectangleLight<'a> {
let area_2 = spherical_triangle_solid_angle(sp4, sp1, sp3);
// Calculate world-space surface normal
let normal = Normal::new(0.0, 0.0, 1.0) * space_inv;
let normal = Normal::new(0.0, 0.0, 1.0).xform_fast(space);
if (area_1 + area_2) < SIMPLE_SAMPLING_THRESHOLD {
// Simple sampling for more distant lights
@ -215,18 +213,16 @@ impl<'a> SurfaceLight for RectangleLight<'a> {
};
// Project shadow_vec back onto the light's surface
let arr_local = arr * *space;
let shadow_vec_local = shadow_vec * *space;
let arr_local = arr.xform_inv(space);
let shadow_vec_local = shadow_vec.xform_inv(space);
let shadow_vec_local = shadow_vec_local * (-arr_local.z() / shadow_vec_local.z());
let mut sample_point_local = arr_local + shadow_vec_local;
{
let x = sample_point_local.x().max(dim.0 * -0.5).min(dim.0 * 0.5);
let y = sample_point_local.y().max(dim.1 * -0.5).min(dim.1 * 0.5);
sample_point_local.set_x(x);
sample_point_local.set_y(y);
sample_point_local.set_z(0.0);
sample_point_local = Point::new(x, y, 0.0);
}
let sample_point = sample_point_local * space_inv;
let sample_point = sample_point_local.xform(space);
let point_err = 0.0001; // TODO: this is a hack, do properly.
// Calculate pdf and light energy
@ -255,85 +251,77 @@ impl<'a> SurfaceLight for RectangleLight<'a> {
}
impl<'a> Surface for RectangleLight<'a> {
fn intersect_rays(
fn intersect_ray(
&self,
rays: &mut RayBatch,
ray_stack: &mut RayStack,
isects: &mut [SurfaceIntersection],
shader: &dyn SurfaceShader,
space: &[Transform],
ray: &mut Ray,
_local_ray: &LocalRay,
space: &XformFull,
isect: &mut SurfaceIntersection,
_shaders: &[&dyn SurfaceShader],
) {
let _ = shader; // Silence 'unused' warning
let time = ray.time;
ray_stack.pop_do_next_task(|ray_idx| {
let time = rays.time(ray_idx);
let orig = rays.orig(ray_idx);
let dir = rays.dir(ray_idx);
let max_t = rays.max_t(ray_idx);
// Calculate time interpolated values.
let dim = lerp_slice(self.dimensions, time);
// Calculate time interpolated values
let dim = lerp_slice(self.dimensions, time);
let xform = lerp_slice(space, time);
// Get the four corners of the rectangle, transformed into world space.
let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0).xform(space);
let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0).xform(space);
let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0).xform(space);
let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0).xform(space);
let space_inv = xform.inverse();
// Test against two triangles that make up the light.
let ray_pre = triangle::RayTriPrecompute::new(ray.dir);
for tri in &[(p1, p2, p3), (p3, p4, p1)] {
if let Some((t, b0, b1, b2)) =
triangle::intersect_ray(ray.orig, ray_pre, ray.max_t, *tri)
{
if t < ray.max_t {
if ray.is_occlusion() {
*isect = SurfaceIntersection::Occlude;
ray.mark_done();
return;
} else {
let (pos, pos_err) = triangle::surface_point(*tri, (b0, b1, b2));
let normal = cross(tri.0 - tri.1, tri.0 - tri.2).into_normal();
// Get the four corners of the rectangle, transformed into world space
let p1 = Point::new(dim.0 * 0.5, dim.1 * 0.5, 0.0) * space_inv;
let p2 = Point::new(dim.0 * -0.5, dim.1 * 0.5, 0.0) * space_inv;
let p3 = Point::new(dim.0 * -0.5, dim.1 * -0.5, 0.0) * space_inv;
let p4 = Point::new(dim.0 * 0.5, dim.1 * -0.5, 0.0) * space_inv;
let intersection_data = SurfaceIntersectionData {
incoming: ray.dir,
t: t,
pos: pos,
pos_err: pos_err,
nor: normal,
nor_g: normal,
local_space: *space,
sample_pdf: self.sample_pdf(
space,
ray.orig,
ray.dir,
pos,
ray.wavelength,
time,
),
};
// Test against two triangles that make up the light
let ray_pre = triangle::RayTriPrecompute::new(dir);
for tri in &[(p1, p2, p3), (p3, p4, p1)] {
if let Some((t, b0, b1, b2)) = triangle::intersect_ray(orig, ray_pre, max_t, *tri) {
if t < max_t {
if rays.is_occlusion(ray_idx) {
isects[ray_idx] = SurfaceIntersection::Occlude;
rays.mark_done(ray_idx);
} else {
let (pos, pos_err) = triangle::surface_point(*tri, (b0, b1, b2));
let normal = cross(tri.0 - tri.1, tri.0 - tri.2).into_normal();
let closure = {
let inv_surface_area = (1.0 / (dim.0 as f64 * dim.1 as f64)) as f32;
let color = lerp_slice(self.colors, time) * inv_surface_area;
SurfaceClosure::Emit(color)
};
let intersection_data = SurfaceIntersectionData {
incoming: dir,
t: t,
pos: pos,
pos_err: pos_err,
nor: normal,
nor_g: normal,
local_space: xform,
sample_pdf: self.sample_pdf(
&xform,
orig,
dir,
pos,
rays.wavelength(ray_idx),
time,
),
};
// Fill in intersection.
*isect = SurfaceIntersection::Hit {
intersection_data: intersection_data,
closure: closure,
};
let closure = {
let inv_surface_area = (1.0 / (dim.0 as f64 * dim.1 as f64)) as f32;
let color = lerp_slice(self.colors, time) * inv_surface_area;
SurfaceClosure::Emit(color)
};
// Fill in intersection
isects[ray_idx] = SurfaceIntersection::Hit {
intersection_data: intersection_data,
closure: closure,
};
// Set ray's max t
rays.set_max_t(ray_idx, t);
}
break;
ray.max_t = t;
}
break;
}
}
});
}
}
}

View File

@ -7,8 +7,8 @@ use crate::{
boundable::Boundable,
color::{Color, SpectralSample},
lerp::lerp_slice,
math::{coordinate_system_from_vector, dot, Normal, Point, Transform, Vector},
ray::{RayBatch, RayStack},
math::{coordinate_system_from_vector, dot, Normal, Point, Vector, XformFull},
ray::{LocalRay, Ray},
sampling::{uniform_sample_cone, uniform_sample_cone_pdf, uniform_sample_sphere},
shading::surface_closure::SurfaceClosure,
shading::SurfaceShader,
@ -50,7 +50,7 @@ impl<'a> SphereLight<'a> {
// more efficiently by inlining it there.
fn sample_pdf(
&self,
space: &Transform,
space: &XformFull,
arr: Point,
sample_dir: Vector,
sample_u: f32,
@ -61,7 +61,7 @@ impl<'a> SphereLight<'a> {
// We're not using these, silence warnings
let _ = (sample_dir, sample_u, sample_v, wavelength);
let arr = arr * *space;
let arr = arr.xform_inv(space);
let pos = Point::new(0.0, 0.0, 0.0);
let radius: f64 = lerp_slice(self.radii, time) as f64;
@ -84,7 +84,7 @@ impl<'a> SphereLight<'a> {
impl<'a> SurfaceLight for SphereLight<'a> {
fn sample_from_point(
&self,
space: &Transform,
space: &XformFull,
arr: Point,
u: f32,
v: f32,
@ -92,12 +92,9 @@ impl<'a> SurfaceLight for SphereLight<'a> {
time: f32,
) -> (SpectralSample, (Point, Normal, f32), f32) {
// TODO: track fp error due to transforms
let arr = arr * *space;
let arr = arr.xform_inv(space);
let pos = Point::new(0.0, 0.0, 0.0);
// Precalculate local->world space transform matrix
let inv_space = space.inverse();
// Calculate time interpolated values
let radius: f64 = lerp_slice(self.radii, time) as f64;
let col = lerp_slice(self.colors, time);
@ -115,7 +112,7 @@ impl<'a> SurfaceLight for SphereLight<'a> {
// TODO: do this properly. This is a total hack.
let sample_point_err = {
let v = Vector::new(radius as f32, radius as f32, radius as f32);
let v2 = v * inv_space;
let v2 = v.xform(space);
v2.length() * SAMPLE_POINT_FUDGE
};
@ -159,8 +156,8 @@ impl<'a> SurfaceLight for SphereLight<'a> {
let normal = (arr + sample_vec).into_vector().normalized();
let point = normal * radius as f32;
(
point.into_point() * inv_space,
normal.into_normal() * inv_space,
point.into_point().xform(space),
normal.into_normal().xform_fast(space),
)
};
let pdf = uniform_sample_cone_pdf(cos_theta_max);
@ -177,8 +174,8 @@ impl<'a> SurfaceLight for SphereLight<'a> {
let normal = (arr + sample_vec).into_vector().normalized();
let point = normal * radius as f32;
(
point.into_point() * inv_space,
normal.into_normal() * inv_space,
point.into_point().xform(space),
normal.into_normal().xform_fast(space),
)
};
let pdf = 1.0 / (4.0 * PI_64);
@ -204,137 +201,122 @@ impl<'a> SurfaceLight for SphereLight<'a> {
}
impl<'a> Surface for SphereLight<'a> {
fn intersect_rays(
fn intersect_ray(
&self,
rays: &mut RayBatch,
ray_stack: &mut RayStack,
isects: &mut [SurfaceIntersection],
shader: &dyn SurfaceShader,
space: &[Transform],
ray: &mut Ray,
local_ray: &LocalRay,
space: &XformFull,
isect: &mut SurfaceIntersection,
_shaders: &[&dyn SurfaceShader],
) {
let _ = shader; // Silence 'unused' warning
let time = ray.time;
ray_stack.pop_do_next_task(|ray_idx| {
let time = rays.time(ray_idx);
// Get the radius of the sphere at the ray's time
let radius = lerp_slice(self.radii, time); // Radius of the sphere
// Get the transform space
let xform = lerp_slice(space, time);
// Code adapted to Rust from https://github.com/Tecla/Rayito
// Ray-sphere intersection can result in either zero, one or two points
// of intersection. It turns into a quadratic equation, so we just find
// the solution using the quadratic formula. Note that there is a
// slightly more stable form of it when computing it on a computer, and
// we use that method to keep everything accurate.
// Get the radius of the sphere at the ray's time
let radius = lerp_slice(self.radii, time); // Radius of the sphere
// Calculate quadratic coeffs
let a = local_ray.dir.length2();
let b = 2.0 * dot(local_ray.dir, local_ray.orig.into_vector());
let c = local_ray.orig.into_vector().length2() - (radius * radius);
// Get the ray origin and direction in local space
let orig = rays.orig_local(ray_idx).into_vector();
let dir = rays.dir(ray_idx) * xform;
let discriminant = (b * b) - (4.0 * a * c);
if discriminant < 0.0 {
// Discriminant less than zero? No solution => no intersection.
return;
}
let discriminant = discriminant.sqrt();
// Code adapted to Rust from https://github.com/Tecla/Rayito
// Ray-sphere intersection can result in either zero, one or two points
// of intersection. It turns into a quadratic equation, so we just find
// the solution using the quadratic formula. Note that there is a
// slightly more stable form of it when computing it on a computer, and
// we use that method to keep everything accurate.
// Compute a more stable form of our param t (t0 = q/a, t1 = c/q)
// q = -0.5 * (b - sqrt(b * b - 4.0 * a * c)) if b < 0, or
// q = -0.5 * (b + sqrt(b * b - 4.0 * a * c)) if b >= 0
let q = if b < 0.0 {
-0.5 * (b - discriminant)
} else {
-0.5 * (b + discriminant)
};
// Calculate quadratic coeffs
let a = dir.length2();
let b = 2.0 * dot(dir, orig);
let c = orig.length2() - (radius * radius);
// Get our final parametric values
let mut t0 = q / a;
let mut t1 = if q != 0.0 { c / q } else { ray.max_t };
let discriminant = (b * b) - (4.0 * a * c);
if discriminant < 0.0 {
// Discriminant less than zero? No solution => no intersection.
return;
}
let discriminant = discriminant.sqrt();
// Swap them so they are ordered right
if t0 > t1 {
use std::mem::swap;
swap(&mut t0, &mut t1);
}
// Compute a more stable form of our param t (t0 = q/a, t1 = c/q)
// q = -0.5 * (b - sqrt(b * b - 4.0 * a * c)) if b < 0, or
// q = -0.5 * (b + sqrt(b * b - 4.0 * a * c)) if b >= 0
let q = if b < 0.0 {
-0.5 * (b - discriminant)
} else {
-0.5 * (b + discriminant)
// Check our intersection for validity against this ray's extents
if t0 > ray.max_t || t1 <= 0.0 {
// Didn't hit because sphere is entirely outside of ray's extents
return;
}
let t = if t0 > 0.0 {
t0
} else if t1 <= ray.max_t {
t1
} else {
// Didn't hit because ray is entirely within the sphere, and
// therefore doesn't hit its surface.
return;
};
// We hit the sphere, so calculate intersection info.
if ray.is_occlusion() {
*isect = SurfaceIntersection::Occlude;
ray.mark_done();
} else {
// Position is calculated from the local-space ray and t, and then
// re-projected onto the surface of the sphere.
let t_pos = local_ray.orig + (local_ray.dir * t);
let unit_pos = t_pos.into_vector().normalized();
let pos = (unit_pos * radius).xform(space).into_point();
// TODO: proper error bounds.
let pos_err = 0.001;
let normal = unit_pos.into_normal().xform_fast(space);
let intersection_data = SurfaceIntersectionData {
incoming: ray.dir,
t: t,
pos: pos,
pos_err: pos_err,
nor: normal,
nor_g: normal,
local_space: *space,
sample_pdf: self.sample_pdf(
space,
ray.orig,
ray.dir,
0.0,
0.0,
ray.wavelength,
time,
),
};
// Get our final parametric values
let mut t0 = q / a;
let mut t1 = if q != 0.0 { c / q } else { rays.max_t(ray_idx) };
// Swap them so they are ordered right
if t0 > t1 {
use std::mem::swap;
swap(&mut t0, &mut t1);
}
// Check our intersection for validity against this ray's extents
if t0 > rays.max_t(ray_idx) || t1 <= 0.0 {
// Didn't hit because sphere is entirely outside of ray's extents
return;
}
let t = if t0 > 0.0 {
t0
} else if t1 <= rays.max_t(ray_idx) {
t1
} else {
// Didn't hit because ray is entirely within the sphere, and
// therefore doesn't hit its surface.
return;
let closure = {
let inv_surface_area = (1.0 / (4.0 * PI_64 * radius as f64 * radius as f64)) as f32;
let color = lerp_slice(self.colors, time) * inv_surface_area;
SurfaceClosure::Emit(color)
};
// We hit the sphere, so calculate intersection info.
if rays.is_occlusion(ray_idx) {
isects[ray_idx] = SurfaceIntersection::Occlude;
rays.mark_done(ray_idx);
} else {
let inv_xform = xform.inverse();
// Fill in intersection
*isect = SurfaceIntersection::Hit {
intersection_data: intersection_data,
closure: closure,
};
// Position is calculated from the local-space ray and t, and then
// re-projected onto the surface of the sphere.
let t_pos = orig + (dir * t);
let unit_pos = t_pos.normalized();
let pos = (unit_pos * radius * inv_xform).into_point();
// TODO: proper error bounds.
let pos_err = 0.001;
let normal = unit_pos.into_normal() * inv_xform;
let intersection_data = SurfaceIntersectionData {
incoming: rays.dir(ray_idx),
t: t,
pos: pos,
pos_err: pos_err,
nor: normal,
nor_g: normal,
local_space: xform,
sample_pdf: self.sample_pdf(
&xform,
rays.orig(ray_idx),
rays.dir(ray_idx),
0.0,
0.0,
rays.wavelength(ray_idx),
time,
),
};
let closure = {
let inv_surface_area =
(1.0 / (4.0 * PI_64 * radius as f64 * radius as f64)) as f32;
let color = lerp_slice(self.colors, time) * inv_surface_area;
SurfaceClosure::Emit(color)
};
// Fill in intersection
isects[ray_idx] = SurfaceIntersection::Hit {
intersection_data: intersection_data,
closure: closure,
};
// Set ray's max t
rays.set_max_t(ray_idx, t);
}
});
ray.max_t = t;
}
}
}

View File

@ -22,8 +22,6 @@ mod boundable;
mod camera;
mod color;
mod fp_utils;
mod hash;
mod hilbert;
mod image;
mod lerp;
mod light;
@ -34,11 +32,13 @@ mod ray;
mod renderer;
mod sampling;
mod scene;
mod scramble;
mod shading;
mod space_fill;
mod surface;
mod timer;
mod tracer;
mod transform_stack;
// mod transform_stack;
use std::{fs::File, io, io::Read, mem, path::Path, str::FromStr};
@ -51,7 +51,6 @@ use crate::{
accel::BVH4Node,
bbox::BBox,
parse::{parse_scene, DataTree},
renderer::LightPath,
surface::SurfaceIntersection,
timer::Timer,
};
@ -89,11 +88,11 @@ fn main() {
}),
)
.arg(
Arg::with_name("max_bucket_samples")
Arg::with_name("bucket_size")
.short("b")
.long("spb")
.long("bucket_size")
.value_name("N")
.help("Target number of samples per bucket (determines bucket size)")
.help("Height and width of each render bucket in pixels.")
.takes_value(true)
.validator(|s| {
usize::from_str(&s)
@ -163,7 +162,6 @@ fn main() {
"SurfaceIntersection size: {} bytes",
mem::size_of::<SurfaceIntersection>()
);
println!("LightPath size: {} bytes", mem::size_of::<LightPath>());
println!("BBox size: {} bytes", mem::size_of::<BBox>());
// println!("BVHNode size: {} bytes", mem::size_of::<BVHNode>());
println!("BVH4Node size: {} bytes", mem::size_of::<BVH4Node>());
@ -259,12 +257,11 @@ fn main() {
r.spp = usize::from_str(spp).unwrap();
}
let max_samples_per_bucket =
if let Some(max_samples_per_bucket) = args.value_of("max_bucket_samples") {
u32::from_str(max_samples_per_bucket).unwrap()
} else {
4096
};
let bucket_size = if let Some(bucket_size) = args.value_of("bucket_size") {
u32::from_str(bucket_size).unwrap()
} else {
32
};
let thread_count = if let Some(threads) = args.value_of("threads") {
u32::from_str(threads).unwrap()
@ -280,7 +277,7 @@ fn main() {
println!("Rendering scene with {} threads...", thread_count);
}
let (mut image, rstats) = r.render(
max_samples_per_bucket,
bucket_size,
crop,
thread_count,
args.is_present("serialized_output"),
@ -288,30 +285,9 @@ fn main() {
// Print render stats
if !args.is_present("serialized_output") {
let rtime = t.tick();
let ntime = rtime as f64 / rstats.total_time;
println!("\tRendered scene in {:.3}s", rtime);
println!(
"\t\tTrace: {:.3}s",
ntime * rstats.trace_time
);
println!("\t\t\tRays traced: {}", rstats.ray_count);
println!(
"\t\t\tRays/sec: {}",
(rstats.ray_count as f64 / (ntime * rstats.trace_time) as f64) as u64
);
println!("\t\t\tRay/node tests: {}", rstats.accel_node_visits);
println!(
"\t\tInitial ray generation: {:.3}s",
ntime * rstats.initial_ray_generation_time
);
println!(
"\t\tRay generation: {:.3}s",
ntime * rstats.ray_generation_time
);
println!(
"\t\tSample writing: {:.3}s",
ntime * rstats.sample_writing_time
);
}
// Write to disk

View File

@ -2,18 +2,78 @@
use std::f32;
pub use math3d::{cross, dot, CrossProduct, DotProduct, Normal, Point, Transform, Vector};
pub use rmath::{
cross, cross_fast, dot, dot_fast, wide4::Float4, AsXform, CrossProduct, DotProduct, Normal,
Point, Vector, Xform, XformFull,
};
/// Clamps a value between a min and max.
pub fn clamp<T: PartialOrd>(v: T, lower: T, upper: T) -> T {
if v < lower {
lower
} else if v > upper {
upper
} else {
v
}
}
// The stdlib min function is slower than a simple if statement for some reason.
pub fn fast_minf32(a: f32, b: f32) -> f32 {
if a < b {
a
} else {
b
}
}
// The stdlib max function is slower than a simple if statement for some reason.
pub fn fast_maxf32(a: f32, b: f32) -> f32 {
if a > b {
a
} else {
b
}
}
/// Rounds an integer up to the next power of two.
pub fn upper_power_of_two(mut v: u32) -> u32 {
v -= 1;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v + 1
}
/// Gets the log base 2 of the given integer
pub fn log2_64(n: u64) -> u64 {
// This works by finding the largest non-zero binary digit in the
// number. Its bit position is then the log2 of the integer.
pub fn log2_64(mut value: u64) -> u64 {
// This works by doing a binary search for the largest non-zero binary
// digit in the number. Its bit position is then the log2 of the integer.
if n == 0 {
0
} else {
(63 - n.leading_zeros()) as u64
let mut log = 0;
const POWERS: [(u64, u64); 6] = [
(32, (1 << 32) - 1),
(16, (1 << 16) - 1),
(8, (1 << 8) - 1),
(4, (1 << 4) - 1),
(2, (1 << 2) - 1),
(1, (1 << 1) - 1),
];
for &(i, j) in &POWERS {
let tmp = value >> i;
if tmp != 0 {
log += i;
value = tmp;
} else {
value &= j;
}
}
log
}
/// Creates a coordinate system from a single vector.

View File

@ -6,14 +6,10 @@ use nom::{combinator::all_consuming, sequence::tuple, IResult};
use kioku::Arena;
use color::rec709_e_to_xyz;
use crate::{
camera::Camera,
color::{rec709_e_to_xyz, Color},
light::WorldLightSource,
math::Transform,
renderer::Renderer,
scene::Scene,
scene::World,
camera::Camera, color::Color, math::Xform, renderer::Renderer, scene::Scene, scene::World,
};
use super::{
@ -153,13 +149,10 @@ pub fn parse_scene<'a>(
)?;
// Parse camera
let camera = parse_camera(
arena,
tree.iter_children_with_type("Camera").nth(0).unwrap(),
)?;
let camera = parse_camera(tree.iter_children_with_type("Camera").nth(0).unwrap())?;
// Parse world
let world = parse_world(arena, tree.iter_children_with_type("World").nth(0).unwrap())?;
let world = parse_world(tree.iter_children_with_type("World").nth(0).unwrap())?;
// Parse root scene assembly
let assembly = parse_assembly(
@ -350,7 +343,7 @@ fn parse_render_settings(tree: &DataTree) -> Result<((u32, u32), u32, u32), PsyP
};
}
fn parse_camera<'a>(arena: &'a Arena, tree: &'a DataTree) -> Result<Camera<'a>, PsyParseError> {
fn parse_camera<'a>(tree: &'a DataTree) -> Result<Camera, PsyParseError> {
if let DataTree::Internal { ref children, .. } = *tree {
let mut mats = Vec::new();
let mut fovs = Vec::new();
@ -435,13 +428,7 @@ fn parse_camera<'a>(arena: &'a Arena, tree: &'a DataTree) -> Result<Camera<'a>,
}
}
return Ok(Camera::new(
arena,
&mats,
&fovs,
&aperture_radii,
&focus_distances,
));
return Ok(Camera::new(&mats, &fovs, &aperture_radii, &focus_distances));
} else {
return Err(PsyParseError::ExpectedInternalNode(
tree.byte_offset(),
@ -452,10 +439,10 @@ fn parse_camera<'a>(arena: &'a Arena, tree: &'a DataTree) -> Result<Camera<'a>,
}
}
fn parse_world<'a>(arena: &'a Arena, tree: &'a DataTree) -> Result<World<'a>, PsyParseError> {
fn parse_world(tree: &DataTree) -> Result<World, PsyParseError> {
if tree.is_internal() {
let background_color;
let mut lights: Vec<&dyn WorldLightSource> = Vec::new();
let mut lights: Vec<_> = Vec::new();
// Parse background shader
let bgs = {
@ -531,7 +518,7 @@ fn parse_world<'a>(arena: &'a Arena, tree: &'a DataTree) -> Result<World<'a>, Ps
for child in tree.iter_children() {
match *child {
DataTree::Internal { type_name, .. } if type_name == "DistantDiskLight" => {
lights.push(arena.alloc(parse_distant_disk_light(arena, child)?));
lights.push(parse_distant_disk_light(child)?);
}
_ => {}
@ -541,7 +528,7 @@ fn parse_world<'a>(arena: &'a Arena, tree: &'a DataTree) -> Result<World<'a>, Ps
// Build and return the world
return Ok(World {
background_color: background_color,
lights: arena.copy_slice(&lights),
lights: lights,
});
} else {
return Err(PsyParseError::ExpectedInternalNode(
@ -553,17 +540,17 @@ fn parse_world<'a>(arena: &'a Arena, tree: &'a DataTree) -> Result<World<'a>, Ps
}
}
pub fn parse_matrix(contents: &str) -> Result<Transform, PsyParseError> {
pub fn parse_matrix(contents: &str) -> Result<Xform, PsyParseError> {
if let IResult::Ok((leftover, ns)) = all_consuming(tuple((
ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32, ws_f32,
ws_f32, ws_f32, ws_f32, ws_f32, ws_f32,
)))(contents)
{
if leftover.is_empty() {
return Ok(Transform::new_from_values(
return Ok(Xform::new(
// We throw away the last row, since it's not necessarily affine.
// TODO: is there a more correct way to handle this?
ns.0, ns.4, ns.8, ns.12, ns.1, ns.5, ns.9, ns.13, ns.2, ns.6, ns.10, ns.14,
ns.0, ns.1, ns.2, ns.4, ns.5, ns.6, ns.8, ns.9, ns.10, ns.12, ns.13, ns.14,
));
}
}

View File

@ -50,23 +50,6 @@ pub fn parse_assembly<'a>(
child.iter_leaf_children_with_type("Data").nth(0).unwrap().1
};
// Get surface shader binding, if any.
let surface_shader_name = if child
.iter_leaf_children_with_type("SurfaceShaderBind")
.count()
> 0
{
Some(
child
.iter_leaf_children_with_type("SurfaceShaderBind")
.nth(0)
.unwrap()
.1,
)
} else {
None
};
// Get xforms
let mut xforms = Vec::new();
for (_, contents, _) in child.iter_leaf_children_with_type("Transform") {
@ -75,7 +58,7 @@ pub fn parse_assembly<'a>(
// Add instance
if builder.name_exists(name) {
builder.add_instance(name, surface_shader_name, Some(&xforms));
builder.add_instance(name, Some(&xforms));
} else {
return Err(PsyParseError::InstancedMissingData(
child.iter_leaf_children_with_type("Data").nth(0).unwrap().2,
@ -113,7 +96,11 @@ pub fn parse_assembly<'a>(
{
builder.add_object(
ident,
Object::Surface(arena.alloc(parse_mesh_surface(arena, child)?)),
Object::Surface(arena.alloc(parse_mesh_surface(
arena,
child,
&builder.surface_shader_map,
)?)),
);
} else {
// TODO: error condition of some kind, because no ident

View File

@ -17,10 +17,7 @@ use super::{
DataTree,
};
pub fn parse_distant_disk_light<'a>(
arena: &'a Arena,
tree: &'a DataTree,
) -> Result<DistantDiskLight<'a>, PsyParseError> {
pub fn parse_distant_disk_light<'a>(tree: &'a DataTree) -> Result<DistantDiskLight, PsyParseError> {
if let DataTree::Internal { ref children, .. } = *tree {
let mut radii = Vec::new();
let mut directions = Vec::new();
@ -77,7 +74,7 @@ pub fn parse_distant_disk_light<'a>(
}
}
return Ok(DistantDiskLight::new(arena, &radii, &directions, &colors));
return Ok(DistantDiskLight::new(&radii, &directions, &colors));
} else {
return Err(PsyParseError::UnknownError(tree.byte_offset()));
}

View File

@ -1,6 +1,6 @@
#![allow(dead_code)]
use std::result::Result;
use std::{collections::HashMap, result::Result};
use nom::{sequence::tuple, IResult};
@ -27,7 +27,9 @@ use super::{
pub fn parse_mesh_surface<'a>(
arena: &'a Arena,
tree: &'a DataTree,
surface_shader_map: &HashMap<String, usize>,
) -> Result<TriangleMesh<'a>, PsyParseError> {
let mut shader_idx = None;
let mut verts = Vec::new(); // Vec of vecs, one for each time sample
let mut normals = Vec::new(); // Vec of vecs, on for each time sample
let mut face_vert_counts = Vec::new();
@ -36,6 +38,20 @@ pub fn parse_mesh_surface<'a>(
// TODO: make sure there are the right number of various children,
// and other validation.
// Get surface shader binding, if any.
if tree
.iter_leaf_children_with_type("SurfaceShaderBind")
.count()
> 0
{
let name = tree
.iter_leaf_children_with_type("SurfaceShaderBind")
.nth(0)
.unwrap()
.1;
shader_idx = surface_shader_map.get(name).map(|i| *i);
}
// Get verts
for (_, mut text, _) in tree.iter_leaf_children_with_type("Vertices") {
// Collect verts for this time sample
@ -116,6 +132,7 @@ pub fn parse_mesh_surface<'a>(
Ok(TriangleMesh::from_verts_and_indices(
arena,
shader_idx,
&verts,
&if normals.is_empty() {
None

View File

@ -1,16 +1,11 @@
#![allow(dead_code)]
use glam::BVec4A;
use crate::math::{Point, Vector, XformFull};
use crate::math::{Point, Transform, Vector};
type RayIndexType = u16;
type FlagType = u8;
const OCCLUSION_FLAG: FlagType = 1;
const DONE_FLAG: FlagType = 1 << 1;
/// This is never used directly in ray tracing--it's only used as a convenience
/// for filling the RayBatch structure.
#[derive(Debug, Copy, Clone)]
pub struct Ray {
pub orig: Point,
@ -18,384 +13,85 @@ pub struct Ray {
pub time: f32,
pub wavelength: f32,
pub max_t: f32,
pub flags: FlagType,
}
/// The hot (frequently accessed) parts of ray data.
/// A specifically local-space ray, for passing to functions when we've
/// already calculated the local-space version of a ray for the object
/// in question.
///
/// Also includes `dir_inv`, which is generally useful to have as well.
#[derive(Debug, Copy, Clone)]
struct RayHot {
orig_local: Point, // Local-space ray origin
dir_inv_local: Vector, // Local-space 1.0/ray direction
max_t: f32,
time: f32,
flags: FlagType,
pub struct LocalRay {
pub orig: Point,
pub dir: Vector,
pub dir_inv: Vector,
}
/// The cold (infrequently accessed) parts of ray data.
#[derive(Debug, Copy, Clone)]
struct RayCold {
orig: Point, // World-space ray origin
dir: Vector, // World-space ray direction
wavelength: f32,
}
/// A batch of rays, separated into hot and cold parts.
#[derive(Debug)]
pub struct RayBatch {
hot: Vec<RayHot>,
cold: Vec<RayCold>,
}
impl RayBatch {
/// Creates a new empty ray batch.
pub fn new() -> RayBatch {
RayBatch {
hot: Vec::new(),
cold: Vec::new(),
}
}
/// Creates a new empty ray batch, with pre-allocated capacity for
/// `n` rays.
pub fn with_capacity(n: usize) -> RayBatch {
RayBatch {
hot: Vec::with_capacity(n),
cold: Vec::with_capacity(n),
}
}
pub fn push(&mut self, ray: Ray, is_occlusion: bool) {
self.hot.push(RayHot {
orig_local: ray.orig, // Bogus, to place-hold.
dir_inv_local: ray.dir, // Bogus, to place-hold.
max_t: ray.max_t,
time: ray.time,
impl Ray {
pub fn new(
orig: Point,
dir: Vector,
time: f32,
wavelength: f32,
max_t: f32,
is_occlusion: bool,
) -> Self {
Self {
orig: orig,
dir: dir,
time: time,
wavelength: wavelength,
max_t: max_t,
flags: if is_occlusion { OCCLUSION_FLAG } else { 0 },
});
self.cold.push(RayCold {
orig: ray.orig,
dir: ray.dir,
wavelength: ray.wavelength,
});
}
}
pub fn swap(&mut self, a: usize, b: usize) {
self.hot.swap(a, b);
self.cold.swap(a, b);
/// Creates a local ray from the given transform.
pub fn to_local_xform(&self, xform: &XformFull) -> LocalRay {
let orig = self.orig.xform_inv(xform);
let dir = self.dir.xform_inv(xform);
LocalRay {
orig: orig,
dir: dir,
dir_inv: dir.recip(),
}
}
pub fn set_from_ray(&mut self, ray: &Ray, is_occlusion: bool, idx: usize) {
self.hot[idx].orig_local = ray.orig;
self.hot[idx].dir_inv_local = Vector {
co: ray.dir.co.recip(),
};
self.hot[idx].max_t = ray.max_t;
self.hot[idx].time = ray.time;
self.hot[idx].flags = if is_occlusion { OCCLUSION_FLAG } else { 0 };
self.cold[idx].orig = ray.orig;
self.cold[idx].dir = ray.dir;
self.cold[idx].wavelength = ray.wavelength;
/// Creates a local ray with no transform applied.
pub fn to_local(&self) -> LocalRay {
LocalRay {
orig: self.orig,
dir: self.dir,
dir_inv: self.dir.recip(),
}
}
pub fn truncate(&mut self, len: usize) {
self.hot.truncate(len);
self.cold.truncate(len);
}
/// Clear all rays, settings the size of the batch back to zero.
///
/// Capacity is maintained.
pub fn clear(&mut self) {
self.hot.clear();
self.cold.clear();
}
pub fn len(&self) -> usize {
self.hot.len()
}
/// Updates the accel data of the given ray (at index `idx`) with the
/// given world-to-local-space transform matrix.
///
/// This should be called when entering (and exiting) traversal of a
/// new transform space.
pub fn update_local(&mut self, idx: usize, xform: &Transform) {
self.hot[idx].orig_local = self.cold[idx].orig * *xform;
self.hot[idx].dir_inv_local = Vector {
co: (self.cold[idx].dir * *xform).co.recip(),
};
}
//==========================================================
// Data access
//---------------------------------------------------------
// Flags.
/// Returns whether this is an occlusion ray.
#[inline(always)]
pub fn orig(&self, idx: usize) -> Point {
self.cold[idx].orig
pub fn is_occlusion(&self) -> bool {
(self.flags & OCCLUSION_FLAG) != 0
}
/// Returns whether this ray has finished traversal.
#[inline(always)]
pub fn dir(&self, idx: usize) -> Vector {
self.cold[idx].dir
pub fn is_done(&self) -> bool {
(self.flags & DONE_FLAG) != 0
}
/// Marks this as an occlusion ray.
#[inline(always)]
pub fn orig_local(&self, idx: usize) -> Point {
self.hot[idx].orig_local
pub fn mark_occlusion(&mut self) {
self.flags |= OCCLUSION_FLAG
}
/// Marks this as having finished traversal.
#[inline(always)]
pub fn dir_inv_local(&self, idx: usize) -> Vector {
self.hot[idx].dir_inv_local
}
#[inline(always)]
pub fn time(&self, idx: usize) -> f32 {
self.hot[idx].time
}
#[inline(always)]
pub fn max_t(&self, idx: usize) -> f32 {
self.hot[idx].max_t
}
#[inline(always)]
pub fn set_max_t(&mut self, idx: usize, new_max_t: f32) {
self.hot[idx].max_t = new_max_t;
}
#[inline(always)]
pub fn wavelength(&self, idx: usize) -> f32 {
self.cold[idx].wavelength
}
/// Returns whether the given ray (at index `idx`) is an occlusion ray.
#[inline(always)]
pub fn is_occlusion(&self, idx: usize) -> bool {
(self.hot[idx].flags & OCCLUSION_FLAG) != 0
}
/// Returns whether the given ray (at index `idx`) has finished traversal.
#[inline(always)]
pub fn is_done(&self, idx: usize) -> bool {
(self.hot[idx].flags & DONE_FLAG) != 0
}
/// Marks the given ray (at index `idx`) as an occlusion ray.
#[inline(always)]
pub fn mark_occlusion(&mut self, idx: usize) {
self.hot[idx].flags |= OCCLUSION_FLAG
}
/// Marks the given ray (at index `idx`) as having finished traversal.
#[inline(always)]
pub fn mark_done(&mut self, idx: usize) {
self.hot[idx].flags |= DONE_FLAG
pub fn mark_done(&mut self) {
self.flags |= DONE_FLAG
}
}
/// A structure used for tracking traversal of a ray batch through a scene.
#[derive(Debug)]
pub struct RayStack {
lanes: Vec<Lane>,
tasks: Vec<RayTask>,
}
impl RayStack {
pub fn new() -> RayStack {
RayStack {
lanes: Vec::new(),
tasks: Vec::new(),
}
}
/// Returns whether the stack is empty of tasks or not.
pub fn is_empty(&self) -> bool {
self.tasks.is_empty()
}
/// Makes sure there are at least `count` lanes.
pub fn ensure_lane_count(&mut self, count: usize) {
while self.lanes.len() < count {
self.lanes.push(Lane {
idxs: Vec::new(),
end_len: 0,
})
}
}
pub fn ray_count_in_next_task(&self) -> usize {
let task = self.tasks.last().unwrap();
let end = self.lanes[task.lane].end_len;
end - task.start_idx
}
pub fn next_task_ray_idx(&self, i: usize) -> usize {
let task = self.tasks.last().unwrap();
let i = i + task.start_idx;
debug_assert!(i < self.lanes[task.lane].end_len);
self.lanes[task.lane].idxs[i] as usize
}
/// Clears the lanes and tasks of the RayStack.
///
/// Note: this is (importantly) different than calling clear individually
/// on the `lanes` and `tasks` members. Specifically, we don't want to
/// clear `lanes` itself, as that would also free all the memory of the
/// individual lanes. Instead, we want to iterate over the individual
/// lanes and clear them, but leave `lanes` itself untouched.
pub fn clear(&mut self) {
for lane in self.lanes.iter_mut() {
lane.idxs.clear();
lane.end_len = 0;
}
self.tasks.clear();
}
/// Pushes the given ray index onto the end of the specified lane.
pub fn push_ray_index(&mut self, ray_idx: usize, lane: usize) {
assert!(self.lanes.len() > lane);
self.lanes[lane].idxs.push(ray_idx as RayIndexType);
}
/// Pushes any excess indices on the given lane to a new task on the
/// task stack.
///
/// Returns whether a task was pushed or not. No task will be pushed
/// if there are no excess indices on the end of the lane.
pub fn push_lane_to_task(&mut self, lane_idx: usize) -> bool {
if self.lanes[lane_idx].end_len < self.lanes[lane_idx].idxs.len() {
self.tasks.push(RayTask {
lane: lane_idx,
start_idx: self.lanes[lane_idx].end_len,
});
self.lanes[lane_idx].end_len = self.lanes[lane_idx].idxs.len();
true
} else {
false
}
}
/// Takes the given list of lane indices, and pushes any excess indices on
/// the end of each into a new task, in the order provided.
pub fn push_lanes_to_tasks(&mut self, lane_idxs: &[usize]) {
for &l in lane_idxs {
self.push_lane_to_task(l);
}
}
pub fn duplicate_next_task(&mut self) {
let task = self.tasks.last().unwrap();
let l = task.lane;
let start = task.start_idx;
let end = self.lanes[l].end_len;
// Extend the indices vector
self.lanes[l].idxs.reserve(end - start);
let old_len = self.lanes[l].idxs.len();
let new_len = old_len + end - start;
unsafe {
self.lanes[l].idxs.set_len(new_len);
}
// Copy elements
copy_in_place::copy_in_place(&mut self.lanes[l].idxs, start..end, end);
// Push the new task onto the stack
self.tasks.push(RayTask {
lane: l,
start_idx: end,
});
self.lanes[l].end_len = self.lanes[l].idxs.len();
}
// Pops the next task off the stack.
pub fn pop_task(&mut self) {
let task = self.tasks.pop().unwrap();
self.lanes[task.lane].end_len = task.start_idx;
self.lanes[task.lane].idxs.truncate(task.start_idx);
}
// Executes a task without popping it from the task stack.
pub fn do_next_task<F>(&mut self, mut handle_ray: F)
where
F: FnMut(usize),
{
let task = self.tasks.last().unwrap();
let task_range = (task.start_idx, self.lanes[task.lane].end_len);
// Execute task.
for i in task_range.0..task_range.1 {
let ray_idx = self.lanes[task.lane].idxs[i];
handle_ray(ray_idx as usize);
}
}
/// Pops the next task off the stack, and executes the provided closure for
/// each ray index in the task.
#[inline(always)]
pub fn pop_do_next_task<F>(&mut self, handle_ray: F)
where
F: FnMut(usize),
{
self.do_next_task(handle_ray);
self.pop_task();
}
/// Pops the next task off the stack, executes the provided closure for
/// each ray index in the task, and pushes the ray indices back onto the
/// indicated lanes.
pub fn pop_do_next_task_and_push_rays<F>(&mut self, output_lane_count: usize, mut handle_ray: F)
where
F: FnMut(usize) -> BVec4A,
{
// Pop the task and do necessary bookkeeping.
let task = self.tasks.pop().unwrap();
let task_range = (task.start_idx, self.lanes[task.lane].end_len);
self.lanes[task.lane].end_len = task.start_idx;
// SAFETY: this is probably evil, and depends on behavior of Vec that
// are not actually promised. But we're essentially truncating the lane
// to the start of our task range, but will continue to access it's
// elements beyond that range via `get_unchecked()` below. Because the
// memory is not freed nor altered, this is safe. However, again, the
// Vec apis don't promise this behavior. So:
//
// TODO: build a slightly different lane abstraction to get this same
// efficiency without depending on implicit Vec behavior.
unsafe {
self.lanes[task.lane].idxs.set_len(task.start_idx);
}
// Execute task.
for i in task_range.0..task_range.1 {
let ray_idx = *unsafe { self.lanes[task.lane].idxs.get_unchecked(i) };
let push_mask = handle_ray(ray_idx as usize).bitmask();
for l in 0..output_lane_count {
if (push_mask & (1 << l)) != 0 {
self.lanes[l as usize].idxs.push(ray_idx);
}
}
}
}
}
/// A lane within a RayStack.
#[derive(Debug)]
struct Lane {
idxs: Vec<RayIndexType>,
end_len: usize,
}
/// A task within a RayStack.
//
// Specifies the lane that the relevant ray pointers are in, and the
// starting index within that lane. The relevant pointers are always
// `&[start_idx..]` within the given lane.
#[derive(Debug)]
struct RayTask {
lane: usize,
start_idx: usize,
}

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
use std::{f32::consts::FRAC_PI_4 as QPI_32, f32::consts::PI as PI_32, f64::consts::PI as PI_64};
use crate::math::{cross, dot, Point, Vector};
use crate::math::{cross_fast, dot_fast, Point, Vector};
/// Maps the unit square to the unit circle.
/// NOTE: x and y should be distributed within [-1, 1],
@ -90,7 +90,7 @@ pub fn uniform_sample_triangle(va: Vector, vb: Vector, vc: Vector, i: f32, j: f3
/// Calculates the surface area of a triangle.
pub fn triangle_surface_area(p0: Point, p1: Point, p2: Point) -> f32 {
0.5 * cross(p1 - p0, p2 - p0).length()
0.5 * cross_fast(p1 - p0, p2 - p0).length()
}
/// Calculates the projected solid angle of a spherical triangle.
@ -98,9 +98,9 @@ pub fn triangle_surface_area(p0: Point, p1: Point, p2: Point) -> f32 {
/// A, B, and C are the points of the triangle on a unit sphere.
pub fn spherical_triangle_solid_angle(va: Vector, vb: Vector, vc: Vector) -> f32 {
// Calculate sines and cosines of the spherical triangle's edge lengths
let cos_a: f64 = dot(vb, vc).max(-1.0).min(1.0) as f64;
let cos_b: f64 = dot(vc, va).max(-1.0).min(1.0) as f64;
let cos_c: f64 = dot(va, vb).max(-1.0).min(1.0) as f64;
let cos_a: f64 = dot_fast(vb, vc).max(-1.0).min(1.0) as f64;
let cos_b: f64 = dot_fast(vc, va).max(-1.0).min(1.0) as f64;
let cos_c: f64 = dot_fast(va, vb).max(-1.0).min(1.0) as f64;
let sin_a: f64 = (1.0 - (cos_a * cos_a)).sqrt();
let sin_b: f64 = (1.0 - (cos_b * cos_b)).sqrt();
let sin_c: f64 = (1.0 - (cos_c * cos_c)).sqrt();
@ -141,9 +141,9 @@ pub fn uniform_sample_spherical_triangle(
j: f32,
) -> Vector {
// Calculate sines and cosines of the spherical triangle's edge lengths
let cos_a: f64 = dot(vb, vc).max(-1.0).min(1.0) as f64;
let cos_b: f64 = dot(vc, va).max(-1.0).min(1.0) as f64;
let cos_c: f64 = dot(va, vb).max(-1.0).min(1.0) as f64;
let cos_a: f64 = dot_fast(vb, vc).max(-1.0).min(1.0) as f64;
let cos_b: f64 = dot_fast(vc, va).max(-1.0).min(1.0) as f64;
let cos_c: f64 = dot_fast(va, vb).max(-1.0).min(1.0) as f64;
let sin_a: f64 = (1.0 - (cos_a * cos_a)).sqrt();
let sin_b: f64 = (1.0 - (cos_b * cos_b)).sqrt();
let sin_c: f64 = (1.0 - (cos_c * cos_c)).sqrt();
@ -191,10 +191,10 @@ pub fn uniform_sample_spherical_triangle(
let q_bottom = ((v * s) + (u * t)) * sin_va;
let q = q_top / q_bottom;
let vc_2 =
(va * q as f32) + ((vc - (va * dot(vc, va))).normalized() * (1.0 - (q * q)).sqrt() as f32);
let vc_2 = (va * q as f32)
+ ((vc - (va * dot_fast(vc, va))).normalized() * (1.0 - (q * q)).sqrt() as f32);
let z = 1.0 - (j * (1.0 - dot(vc_2, vb)));
let z = 1.0 - (j * (1.0 - dot_fast(vc_2, vb)));
(vb * z) + ((vc_2 - (vb * dot(vc_2, vb))).normalized() * (1.0 - (z * z)).sqrt())
(vb * z) + ((vc_2 - (vb * dot_fast(vc_2, vb))).normalized() * (1.0 - (z * z)).sqrt())
}

View File

@ -10,10 +10,9 @@ use crate::{
color::SpectralSample,
lerp::lerp_slice,
light::SurfaceLight,
math::{Normal, Point, Transform},
math::{Normal, Point, Xform, XformFull},
shading::SurfaceShader,
surface::{Surface, SurfaceIntersection},
transform_stack::TransformStack,
};
#[derive(Copy, Clone, Debug)]
@ -21,7 +20,7 @@ pub struct Assembly<'a> {
// Instance list
pub instances: &'a [Instance],
pub light_instances: &'a [Instance],
pub xforms: &'a [Transform],
pub xforms: &'a [Xform],
// Surface shader list
pub surface_shaders: &'a [&'a dyn SurfaceShader],
@ -45,11 +44,11 @@ impl<'a> Assembly<'a> {
// Returns (light_color, (sample_point, normal, point_err), pdf, selection_pdf)
pub fn sample_lights(
&self,
xform_stack: &mut TransformStack,
n: f32,
uvw: (f32, f32, f32),
wavelength: f32,
time: f32,
space: &XformFull,
intr: &SurfaceIntersection,
) -> Option<(SpectralSample, (Point, Normal, f32), f32, f32)> {
if let SurfaceIntersection::Hit {
@ -57,46 +56,44 @@ impl<'a> Assembly<'a> {
closure,
} = *intr
{
let sel_xform = if !xform_stack.top().is_empty() {
lerp_slice(xform_stack.top(), time)
} else {
Transform::new()
};
if let Some((light_i, sel_pdf, whittled_n)) = self.light_accel.select(
idata.incoming * sel_xform,
idata.pos * sel_xform,
idata.nor * sel_xform,
idata.nor_g * sel_xform,
idata.incoming.xform_inv(space),
idata.pos.xform_inv(space),
idata.nor.xform_inv_fast(space),
idata.nor_g.xform_inv_fast(space),
&closure,
time,
n,
) {
let inst = self.light_instances[light_i];
// Handle transforms.
let local_space = if let Some((a, b)) = inst.transform_indices {
if let Some(new_space) = lerp_slice(&self.xforms[a..b], time)
.compose(&space.fwd)
.to_full()
{
new_space
} else {
// Invalid transform. Give up.
return None;
}
} else {
*space
};
match inst.instance_type {
InstanceType::Object => {
match self.objects[inst.data_index] {
Object::SurfaceLight(light) => {
// Get the world-to-object space transform of the light
let xform = if let Some((a, b)) = inst.transform_indices {
let pxforms = xform_stack.top();
let xform = lerp_slice(&self.xforms[a..b], time);
if !pxforms.is_empty() {
lerp_slice(pxforms, time) * xform
} else {
xform
}
} else {
let pxforms = xform_stack.top();
if !pxforms.is_empty() {
lerp_slice(pxforms, time)
} else {
Transform::new()
}
};
// Sample the light
let (color, sample_geo, pdf) = light.sample_from_point(
&xform, idata.pos, uvw.0, uvw.1, wavelength, time,
&local_space,
idata.pos,
uvw.0,
uvw.1,
wavelength,
time,
);
return Some((color, sample_geo, pdf, sel_pdf));
}
@ -106,27 +103,16 @@ impl<'a> Assembly<'a> {
}
InstanceType::Assembly => {
// Push the world-to-object space transforms of the assembly onto
// the transform stack.
if let Some((a, b)) = inst.transform_indices {
xform_stack.push(&self.xforms[a..b]);
}
// Sample sub-assembly lights
let sample = self.assemblies[inst.data_index].sample_lights(
xform_stack,
whittled_n,
uvw,
wavelength,
time,
&local_space,
intr,
);
// Pop the assembly's transforms off the transform stack.
if inst.transform_indices.is_some() {
xform_stack.pop();
}
// Return sample
return sample.map(|(ss, v, pdf, spdf)| (ss, v, pdf, spdf * sel_pdf));
}
@ -152,11 +138,11 @@ pub struct AssemblyBuilder<'a> {
// Instance list
instances: Vec<Instance>,
xforms: Vec<Transform>,
xforms: Vec<Xform>,
// Shader list
surface_shaders: Vec<&'a dyn SurfaceShader>,
surface_shader_map: HashMap<String, usize>, // map Name -> Index
pub surface_shader_map: HashMap<String, usize>, // map Name -> Index
// Object list
objects: Vec<Object<'a>>,
@ -220,12 +206,7 @@ impl<'a> AssemblyBuilder<'a> {
self.assemblies.push(asmb);
}
pub fn add_instance(
&mut self,
name: &str,
surface_shader_name: Option<&str>,
xforms: Option<&[Transform]>,
) {
pub fn add_instance(&mut self, name: &str, xforms: Option<&[Xform]>) {
// Make sure name exists
if !self.name_exists(name) {
panic!("Attempted to add instance with a name that doesn't exist.");
@ -247,12 +228,6 @@ impl<'a> AssemblyBuilder<'a> {
Instance {
instance_type: InstanceType::Object,
data_index: self.object_map[name],
surface_shader_index: surface_shader_name.map(|name| {
*self
.surface_shader_map
.get(name)
.unwrap_or_else(|| panic!("Unknown surface shader '{}'.", name))
}),
id: self.instances.len(),
transform_indices: xforms
.map(|xf| (self.xforms.len(), self.xforms.len() + xf.len())),
@ -261,12 +236,6 @@ impl<'a> AssemblyBuilder<'a> {
Instance {
instance_type: InstanceType::Assembly,
data_index: self.assembly_map[name],
surface_shader_index: surface_shader_name.map(|name| {
*self
.surface_shader_map
.get(name)
.unwrap_or_else(|| panic!("Unknown surface shader '{}'.", name))
}),
id: self.instances.len(),
transform_indices: xforms
.map(|xf| (self.xforms.len(), self.xforms.len() + xf.len())),
@ -405,7 +374,6 @@ pub enum Object<'a> {
pub struct Instance {
pub instance_type: InstanceType,
pub data_index: usize,
pub surface_shader_index: Option<usize>,
pub id: usize,
pub transform_indices: Option<(usize, usize)>,
}

View File

@ -6,9 +6,9 @@ use crate::{
algorithm::weighted_choice,
camera::Camera,
color::SpectralSample,
math::{Normal, Point, Vector},
light::WorldLightSource,
math::{Normal, Point, Vector, XformFull},
surface::SurfaceIntersection,
transform_stack::TransformStack,
};
pub use self::{
@ -19,19 +19,19 @@ pub use self::{
#[derive(Debug)]
pub struct Scene<'a> {
pub name: Option<String>,
pub camera: Camera<'a>,
pub world: World<'a>,
pub camera: Camera,
pub world: World,
pub root: Assembly<'a>,
}
impl<'a> Scene<'a> {
pub fn sample_lights(
&self,
xform_stack: &mut TransformStack,
n: f32,
uvw: (f32, f32, f32),
wavelength: f32,
time: f32,
space: &XformFull,
intr: &SurfaceIntersection,
) -> SceneLightSample {
// TODO: this just selects between world lights and local lights
@ -68,7 +68,7 @@ impl<'a> Scene<'a> {
if n < wl_prob {
// World lights
let n = n / wl_prob;
let (i, p) = weighted_choice(self.world.lights, n, |l| l.approximate_energy());
let (i, p) = weighted_choice(&self.world.lights, n, |l| l.approximate_energy());
let (ss, sv, pdf) =
self.world.lights[i].sample_from_point(uvw.0, uvw.1, wavelength, time);
return SceneLightSample::Distant {
@ -81,9 +81,9 @@ impl<'a> Scene<'a> {
// Local lights
let n = (n - wl_prob) / (1.0 - wl_prob);
if let Some((ss, sgeo, pdf, spdf)) =
self.root
.sample_lights(xform_stack, n, uvw, wavelength, time, intr)
if let Some((ss, sgeo, pdf, spdf)) = self
.root
.sample_lights(n, uvw, wavelength, time, space, intr)
{
return SceneLightSample::Surface {
color: ss,

View File

@ -1,7 +1,7 @@
use crate::{color::Color, light::WorldLightSource};
use crate::{color::Color, light::DistantDiskLight};
#[derive(Debug)]
pub struct World<'a> {
pub struct World {
pub background_color: Color,
pub lights: &'a [&'a dyn WorldLightSource],
pub lights: Vec<DistantDiskLight>,
}

101
src/scramble.rs Normal file
View File

@ -0,0 +1,101 @@
#![allow(dead_code)]
/// Performs a base-2 Owen scramble on an integer.
pub fn owen2(n: u32, seed: u32) -> u32 {
// Multiply by a large random prime and xor by a random number.
// This is to ensure that the seed doesn't behave poorly with
// e.g. incrementing parameters, and also that zero doesn't
// map to zero in the hash function.
let seed = seed.wrapping_mul(0x68318d2f) ^ 0x5adbc2a7;
let mut result = n;
for i in 0..32 {
result ^= hash((n & (!1 << i)) ^ seed) & (1 << i);
}
result
}
#[inline(always)]
pub fn owen4_fast(mut n: u32, seed: u32) -> u32 {
let scramble = hash(seed);
n = n.reverse_bits();
n ^= n.wrapping_mul(0x3d20adea);
n ^= (n >> 1) & (n << 1) & 0x55555555;
n = n.wrapping_add(scramble);
n = n.wrapping_mul((scramble >> 16) | 1);
n ^= (n >> 1) & (n << 1) & 0x55555555;
n ^= n.wrapping_mul(0x05526c56);
n ^= n.wrapping_mul(0x53a22864);
n.reverse_bits()
}
pub fn owen4(n: u32, seed: u32) -> u32 {
// Bit-packed permutation table.
const PERMUTATION_TABLE: [u8; 24] = [
0 | (1 << 2) | (2 << 4) | (3 << 6), // [0, 1, 2, 3],
0 | (1 << 2) | (3 << 4) | (2 << 6), // [0, 1, 3, 2],
0 | (2 << 2) | (1 << 4) | (3 << 6), // [0, 2, 1, 3],
0 | (2 << 2) | (3 << 4) | (1 << 6), // [0, 2, 3, 1],
0 | (3 << 2) | (1 << 4) | (2 << 6), // [0, 3, 1, 2],
0 | (3 << 2) | (2 << 4) | (1 << 6), // [0, 3, 2, 1],
1 | (0 << 2) | (2 << 4) | (3 << 6), // [1, 0, 2, 3],
1 | (0 << 2) | (3 << 4) | (2 << 6), // [1, 0, 3, 2],
1 | (2 << 2) | (0 << 4) | (3 << 6), // [1, 2, 0, 3],
1 | (2 << 2) | (3 << 4) | (0 << 6), // [1, 2, 3, 0],
1 | (3 << 2) | (0 << 4) | (2 << 6), // [1, 3, 0, 2],
1 | (3 << 2) | (2 << 4) | (0 << 6), // [1, 3, 2, 0],
2 | (0 << 2) | (1 << 4) | (3 << 6), // [2, 0, 1, 3],
2 | (0 << 2) | (3 << 4) | (1 << 6), // [2, 0, 3, 1],
2 | (1 << 2) | (0 << 4) | (3 << 6), // [2, 1, 0, 3],
2 | (1 << 2) | (3 << 4) | (0 << 6), // [2, 1, 3, 0],
2 | (3 << 2) | (0 << 4) | (1 << 6), // [2, 3, 0, 1],
2 | (3 << 2) | (1 << 4) | (0 << 6), // [2, 3, 1, 0],
3 | (0 << 2) | (1 << 4) | (2 << 6), // [3, 0, 1, 2],
3 | (0 << 2) | (2 << 4) | (1 << 6), // [3, 0, 2, 1],
3 | (1 << 2) | (0 << 4) | (2 << 6), // [3, 1, 0, 2],
3 | (1 << 2) | (2 << 4) | (0 << 6), // [3, 1, 2, 0],
3 | (2 << 2) | (0 << 4) | (1 << 6), // [3, 2, 0, 1],
3 | (2 << 2) | (1 << 4) | (0 << 6), // [3, 2, 1, 0],
];
// Multiply by a large random prime and xor by a random number.
// This is to ensure that the seed doesn't behave poorly with
// e.g. incrementing parameters, and also that zero doesn't
// map to zero in the hash function.
let seed = seed.wrapping_mul(0xe8559dcb) ^ 0x372fcdb9;
let mut result = 0;
for i in 0..16 {
let mask = !0b11 << (i * 2);
let perm_entry = PERMUTATION_TABLE[
// The xor with `i` is to ensure runs of zeros in `n` still
// result in different shuffles on each iteration. `i` is
// shifted to avoid interacting poorly with an incrementing
// `n`.
(hash((n & mask) ^ seed ^ (i << 16)) % 24) as usize
];
let perm_cell_idx = ((n >> (i * 2)) & 0b11) as usize;
result |= (((perm_entry >> (perm_cell_idx * 2)) & 0b11) as u32) << (i * 2);
}
result
}
//-------------------------------------------------------------
/// Fast bit-mixing hash for use in the functions above.
#[inline(always)]
pub fn hash(mut n: u32) -> u32 {
// From https://github.com/skeeto/hash-prospector
n ^= n >> 16;
n = n.wrapping_mul(0x21f0aaad);
n ^= n >> 15;
n = n.wrapping_mul(0xd35a2d97);
n ^= n >> 15;
n
}

View File

@ -2,12 +2,10 @@
use std::f32::consts::PI as PI_32;
use glam::Vec4;
use crate::{
color::{Color, SpectralSample},
lerp::{lerp, Lerp},
math::{dot, zup_to_vec, Normal, Vector},
math::{clamp, dot_fast, zup_to_vec, Float4, Normal, Vector},
sampling::cosine_sample_hemisphere,
};
@ -289,7 +287,7 @@ mod lambert_closure {
uv: (f32, f32),
wavelength: f32,
) -> (Vector, SpectralSample, f32) {
let (nn, flipped_nor_g) = if dot(nor_g.into_vector(), inc) <= 0.0 {
let (nn, flipped_nor_g) = if dot_fast(nor_g.into_vector(), inc) <= 0.0 {
(nor.normalized().into_vector(), nor_g.into_vector())
} else {
(-nor.normalized().into_vector(), -nor_g.into_vector())
@ -302,7 +300,7 @@ mod lambert_closure {
let out = zup_to_vec(dir, nn);
// Make sure it's not on the wrong side of the geometric normal.
if dot(flipped_nor_g, out) >= 0.0 {
if dot_fast(flipped_nor_g, out) >= 0.0 {
(out, color.to_spectral_sample(wavelength) * pdf, pdf)
} else {
(out, SpectralSample::new(0.0), 0.0)
@ -317,14 +315,14 @@ mod lambert_closure {
nor_g: Normal,
wavelength: f32,
) -> (SpectralSample, f32) {
let (nn, flipped_nor_g) = if dot(nor_g.into_vector(), inc) <= 0.0 {
let (nn, flipped_nor_g) = if dot_fast(nor_g.into_vector(), inc) <= 0.0 {
(nor.normalized().into_vector(), nor_g.into_vector())
} else {
(-nor.normalized().into_vector(), -nor_g.into_vector())
};
if dot(flipped_nor_g, out) >= 0.0 {
let fac = dot(nn, out.normalized()).max(0.0) * INV_PI;
if dot_fast(flipped_nor_g, out) >= 0.0 {
let fac = dot_fast(nn, out.normalized()).max(0.0) * INV_PI;
(color.to_spectral_sample(wavelength) * fac, fac)
} else {
(SpectralSample::new(0.0), 0.0)
@ -383,14 +381,14 @@ mod lambert_closure {
let cos_theta_max = (1.0 - sin_theta_max2).sqrt();
let v = to_light_center.normalized();
let nn = if dot(nor_g.into_vector(), inc) <= 0.0 {
let nn = if dot_fast(nor_g.into_vector(), inc) <= 0.0 {
nor.normalized()
} else {
-nor.normalized()
}
.into_vector();
let cos_nv = dot(nn, v).max(-1.0).min(1.0);
let cos_nv = dot_fast(nn, v).max(-1.0).min(1.0);
// Alt implementation from the SPI paper.
// Worse sampling, but here for reference.
@ -428,7 +426,7 @@ mod ggx_closure {
wavelength: f32,
) -> (Vector, SpectralSample, f32) {
// Get normalized surface normal
let (nn, flipped_nor_g) = if dot(nor_g.into_vector(), inc) <= 0.0 {
let (nn, flipped_nor_g) = if dot_fast(nor_g.into_vector(), inc) <= 0.0 {
(nor.normalized().into_vector(), nor_g.into_vector())
} else {
(-nor.normalized().into_vector(), -nor_g.into_vector())
@ -442,10 +440,10 @@ mod ggx_closure {
let mut half_dir = Vector::new(angle.cos() * theta_sin, angle.sin() * theta_sin, theta_cos);
half_dir = zup_to_vec(half_dir, nn).normalized();
let out = inc - (half_dir * 2.0 * dot(inc, half_dir));
let out = inc - (half_dir * 2.0 * dot_fast(inc, half_dir));
// Make sure it's not on the wrong side of the geometric normal.
if dot(flipped_nor_g, out) >= 0.0 {
if dot_fast(flipped_nor_g, out) >= 0.0 {
let (filter, pdf) = evaluate(col, roughness, fresnel, inc, out, nor, nor_g, wavelength);
(out, filter, pdf)
} else {
@ -469,23 +467,23 @@ mod ggx_closure {
let hh = (aa + bb).normalized(); // Half-way between aa and bb
// Surface normal
let (nn, flipped_nor_g) = if dot(nor_g.into_vector(), inc) <= 0.0 {
let (nn, flipped_nor_g) = if dot_fast(nor_g.into_vector(), inc) <= 0.0 {
(nor.normalized().into_vector(), nor_g.into_vector())
} else {
(-nor.normalized().into_vector(), -nor_g.into_vector())
};
// Make sure everything's on the correct side of the surface
if dot(nn, aa) < 0.0 || dot(nn, bb) < 0.0 || dot(flipped_nor_g, bb) < 0.0 {
if dot_fast(nn, aa) < 0.0 || dot_fast(nn, bb) < 0.0 || dot_fast(flipped_nor_g, bb) < 0.0 {
return (SpectralSample::new(0.0), 0.0);
}
// Calculate needed dot products
let na = dot(nn, aa).clamp(-1.0, 1.0);
let nb = dot(nn, bb).clamp(-1.0, 1.0);
let ha = dot(hh, aa).clamp(-1.0, 1.0);
let hb = dot(hh, bb).clamp(-1.0, 1.0);
let nh = dot(nn, hh).clamp(-1.0, 1.0);
let na = clamp(dot_fast(nn, aa), -1.0, 1.0);
let nb = clamp(dot_fast(nn, bb), -1.0, 1.0);
let ha = clamp(dot_fast(hh, aa), -1.0, 1.0);
let hb = clamp(dot_fast(hh, bb), -1.0, 1.0);
let nh = clamp(dot_fast(nn, hh), -1.0, 1.0);
// Calculate F - Fresnel
let col_f = {
@ -512,7 +510,7 @@ mod ggx_closure {
rev_fresnel,
);
SpectralSample::from_parts(Vec4::new(c0, c1, c2, c3), wavelength)
SpectralSample::from_parts(Float4::new(c0, c1, c2, c3), wavelength)
};
// Calculate everything else
@ -556,7 +554,7 @@ mod ggx_closure {
assert!(cos_theta_max <= 1.0);
// Surface normal
let nn = if dot(nor.into_vector(), inc) < 0.0 {
let nn = if dot_fast(nor.into_vector(), inc) < 0.0 {
nor.normalized()
} else {
-nor.normalized() // If back-facing, flip normal
@ -574,9 +572,9 @@ mod ggx_closure {
// let vv = Halton::sample(1, i);
// let mut samp = uniform_sample_cone(uu, vv, cos_theta_max);
// samp = zup_to_vec(samp, bb).normalized();
// if dot(nn, samp) > 0.0 {
// if dot_fast(nn, samp) > 0.0 {
// let hh = (aa+samp).normalized();
// fac += ggx_d(dot(nn, hh), roughness);
// fac += ggx_d(dot_fast(nn, hh), roughness);
// }
//}
//fac /= N * N;
@ -584,7 +582,7 @@ mod ggx_closure {
// Approximate method
let theta = cos_theta_max.acos();
let hh = (aa + bb).normalized();
let nh = dot(nn, hh).clamp(-1.0, 1.0);
let nh = clamp(dot_fast(nn, hh), -1.0, 1.0);
let fac = ggx_d(nh, (1.0f32).min(roughness.sqrt() + (2.0 * theta / PI_32)));
fac * (1.0f32).min(1.0 - cos_theta_max) * INV_PI

216
src/space_fill.rs Normal file
View File

@ -0,0 +1,216 @@
//! Space-filling curves and other related functionality.
#![allow(dead_code)]
pub mod hilbert {
const N: u32 = 1 << 16;
/// Convert (x,y) to hilbert curve index.
///
/// x: The x coordinate. Must be no greater than 2^16-1.
/// y: The y coordinate. Must be no greater than 2^16-1.
/// n: Basically the "resolution" of the curve, on one side.
///
/// Returns the hilbert curve index corresponding to the (x,y) coordinates given.
pub fn encode(x: u32, y: u32, n: u32) -> u32 {
assert!(x < N);
assert!(y < N);
let (mut x, mut y) = (x, y);
let mut d = 0;
let mut s = n >> 1;
while s > 0 {
let rx = if (x & s) > 0 { 1 } else { 0 };
let ry = if (y & s) > 0 { 1 } else { 0 };
d += s * s * ((3 * rx) ^ ry);
(x, y) = hilbert_rotate(s, rx, ry, x, y);
s >>= 1
}
d
}
/// Convert hilbert curve index to (x,y).
///
/// d: The hilbert curve index.
/// n: Basically the "resolution" of the curve, on one side.
///
/// Returns the (x, y) coords at the given index.
pub fn decode(d: u32, n: u32) -> (u32, u32) {
let (mut x, mut y) = (0, 0);
let mut s = 1;
let mut t = d;
while s < n {
let rx = 1 & (t >> 1);
let ry = 1 & (t ^ rx);
(x, y) = hilbert_rotate(s, rx, ry, x, y);
x += s * rx;
y += s * ry;
t >>= 2;
s <<= 1;
}
(x, y)
}
//------------
// Utilities.
fn hilbert_rotate(n: u32, rx: u32, ry: u32, x: u32, y: u32) -> (u32, u32) {
if ry == 0 {
if rx == 1 {
((n - 1).wrapping_sub(y), (n - 1).wrapping_sub(x))
} else {
(y, x)
}
} else {
(x, y)
}
}
}
pub mod morton {
const N: u32 = 1 << 16;
/// Convert (x,y) to morton curve index.
///
/// x: The x coordinate. Should be no greater than 2^16-1.
/// y: The y coordinate. Should be no greater than 2^16-1.
///
/// Returns the morton curve index corresponding to the (x,y) coordinates given.
pub fn encode(x: u32, y: u32) -> u32 {
debug_assert!(x < N);
debug_assert!(y < N);
part_1_by_1(x) | (part_1_by_1(y) << 1)
}
/// Convert morton curve index to (x,y).
///
/// i: The morton curve index.
///
/// Returns the (x, y) coords at the given index.
pub fn decode(i: u32) -> (u32, u32) {
(compact_1_by_1(i), compact_1_by_1(i >> 1))
}
//------------
// Utilities.
#[inline(always)]
fn part_1_by_1(mut x: u32) -> u32 {
x &= 0x0000ffff;
x = (x ^ (x << 8)) & 0x00ff00ff;
x = (x ^ (x << 4)) & 0x0f0f0f0f;
x = (x ^ (x << 2)) & 0x33333333;
x = (x ^ (x << 1)) & 0x55555555;
x
}
#[inline(always)]
fn compact_1_by_1(mut x: u32) -> u32 {
x &= 0x55555555;
x = (x ^ (x >> 1)) & 0x33333333;
x = (x ^ (x >> 2)) & 0x0f0f0f0f;
x = (x ^ (x >> 4)) & 0x00ff00ff;
x = (x ^ (x >> 8)) & 0x0000ffff;
x
}
}
/// Yields coordinates in outward spiral, but incorporating a Hilbert
/// curve at the smaller scales.
pub mod hilbert_spiral {
/// Convert from hilbert-spiral index to (x,y).
///
/// Note: this returns both negative and positive coordinates.
/// It starts at 0,0 and spirals outwards.
///
/// i: The hilbert-spiral index.
/// hilbert_size: the size of the hulbert blocks on a side. Will be
/// rounded down to the nearest power of two.
///
/// Returns the (x, y) coords at the given index.
pub fn decode(i: u32, hilbert_size: u32) -> (i32, i32) {
assert!(hilbert_size > 0);
let hilbert_size = 1 << (31 - u32::leading_zeros(hilbert_size));
let hilbert_cells = hilbert_size * hilbert_size;
let hilbert_i = i % hilbert_cells;
let spiral_i = i / hilbert_cells;
let (mut sx, mut sy, section) = decode_spiral(spiral_i);
sx = (sx * hilbert_size as i32) - (hilbert_size / 2) as i32;
sy = (sy * hilbert_size as i32) - (hilbert_size / 2) as i32;
let (hx, hy) = {
let (hx, hy) = super::hilbert::decode(hilbert_i, hilbert_size);
let a = hilbert_size - 1;
match section {
0 => (hy, hx),
1 => (a - hx, a - hy),
2 => (a - hy, a - hx),
3 => (hx, hy),
_ => unreachable!(),
}
};
(sx + hx as i32, sy + hy as i32)
}
pub fn decode_spiral(i: u32) -> (i32, i32, u32) {
if i == 0 {
return (0, 0, 3);
}
// 0 = first ring outside of center, 1 = second, and so on.
let ring = (((i as f64).sqrt() - 1.0) / 2.0) as u32;
// The size of the ring along one side.
let size = 1 + ((ring + 1) * 2);
let n = i - ((size - 2) * (size - 2)); // The zero-indexed cell of the ring.
let arm = n / (size - 1); // The arm of the ring.
let arm_n = n % (size - 1); // The index within the arm of the ring.
// The two coordinates. They just need to be flipped around depending on the arm.
let radius = ring as i32 + 1;
let d = -(size as i32 / 2) + 1 + arm_n as i32;
match arm {
0 => (radius, d, 0),
1 => (-d, radius, if arm_n == (size - 2) { 2 } else { 1 }),
2 => (-radius, -d, 2),
3 => (d, -radius, 3),
_ => unreachable!(),
}
}
}
//-------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn hilbert_reversible() {
let i = 0x4c8587a2;
let (x, y) = hilbert::decode(i, 1 << 16);
let i2 = hilbert::encode(x, y, 1 << 16);
assert_eq!(i, i2);
}
#[test]
fn morton_reversible() {
let i = 0x4c8587a2;
let (x, y) = morton::decode(i);
let i2 = morton::encode(x, y);
assert_eq!(i, i2);
}
}

View File

@ -9,7 +9,7 @@ use crate::{
bbox::BBox,
boundable::Boundable,
lerp::lerp_slice,
math::{cross, dot, Normal, Point, Transform},
math::{cross, dot, Normal, Point, Xform},
ray::{RayBatch, RayStack},
shading::SurfaceClosure,
};
@ -150,13 +150,13 @@ impl<'a> MicropolyBatch<'a> {
rays: &mut RayBatch,
ray_stack: &mut RayStack,
isects: &mut [SurfaceIntersection],
space: &[Transform],
space: &[Xform],
) {
// Precalculate transform for non-motion blur cases
let static_mat_space = if space.len() == 1 {
lerp_slice(space, 0.0).inverse()
space[0]
} else {
Transform::new()
Xform::identity()
};
self.accel
@ -182,11 +182,11 @@ impl<'a> MicropolyBatch<'a> {
);
if !space.is_empty() {
(*tri_cache[i].as_mut_ptr()).0 =
(*tri_cache[i].as_mut_ptr()).0 * static_mat_space;
(*tri_cache[i].as_mut_ptr()).0.xform(&static_mat_space);
(*tri_cache[i].as_mut_ptr()).1 =
(*tri_cache[i].as_mut_ptr()).1 * static_mat_space;
(*tri_cache[i].as_mut_ptr()).1.xform(&static_mat_space);
(*tri_cache[i].as_mut_ptr()).2 =
(*tri_cache[i].as_mut_ptr()).2 * static_mat_space;
(*tri_cache[i].as_mut_ptr()).2.xform(&static_mat_space);
}
}
}
@ -205,7 +205,7 @@ impl<'a> MicropolyBatch<'a> {
// Calculate the ray space, if necessary.
let mat_space = if space.len() > 1 {
// Per-ray transform, for motion blur
lerp_slice(space, ray_time).inverse()
lerp_slice(space, ray_time)
} else {
static_mat_space
};
@ -251,9 +251,9 @@ impl<'a> MicropolyBatch<'a> {
};
if !space.is_empty() {
tri.0 = tri.0 * mat_space;
tri.1 = tri.1 * mat_space;
tri.2 = tri.2 * mat_space;
tri.0 = tri.0.xform(&mat_space);
tri.1 = tri.1.xform(&mat_space);
tri.2 = tri.2.xform(&mat_space);
}
tri
@ -284,6 +284,13 @@ impl<'a> MicropolyBatch<'a> {
// Calculate intersection data if necessary.
if non_shadow_hit {
// Get the full space data.
let mat_space = if let Some(space) = mat_space.to_full() {
space
} else {
return;
};
let hit_tri = unsafe { hit_tri.assume_init() };
let hit_tri_indices = unsafe { hit_tri_indices.assume_init() };
let (t, b0, b1, b2) = unsafe { hit_tri_data.assume_init() };
@ -311,7 +318,7 @@ impl<'a> MicropolyBatch<'a> {
let n1 = lerp_slice(n1_slice, ray_time).normalized();
let n2 = lerp_slice(n2_slice, ray_time).normalized();
let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)) * mat_space;
let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)).xform_fast(&mat_space);
if dot(s_nor, geo_normal) >= 0.0 {
s_nor
} else {

View File

@ -2,7 +2,6 @@
// pub mod micropoly_batch;
pub mod bilinear_patch;
pub mod micropoly_batch;
pub mod triangle;
pub mod triangle_mesh;
@ -10,8 +9,8 @@ use std::fmt::Debug;
use crate::{
boundable::Boundable,
math::{Normal, Point, Transform, Vector},
ray::{RayBatch, RayStack},
math::{Normal, Point, Vector, XformFull},
ray::{LocalRay, Ray},
shading::surface_closure::SurfaceClosure,
shading::SurfaceShader,
};
@ -19,13 +18,13 @@ use crate::{
const MAX_EDGE_DICE: u32 = 128;
pub trait Surface: Boundable + Debug + Sync {
fn intersect_rays(
fn intersect_ray(
&self,
rays: &mut RayBatch,
ray_stack: &mut RayStack,
isects: &mut [SurfaceIntersection],
shader: &dyn SurfaceShader,
space: &[Transform],
ray: &mut Ray,
local_ray: &LocalRay,
space: &XformFull,
isect: &mut SurfaceIntersection,
shaders: &[&dyn SurfaceShader],
);
}
@ -80,13 +79,13 @@ pub enum SurfaceIntersection {
#[derive(Debug, Copy, Clone)]
pub struct SurfaceIntersectionData {
pub incoming: Vector, // Direction of the incoming ray
pub pos: Point, // Position of the intersection
pub incoming: Vector, // Direction of the incoming ray.
pub pos: Point, // Position of the intersection.
pub pos_err: f32, // Error magnitude of the intersection position. Imagine
// a cube centered around `pos` with dimensions of `2 * pos_err`.
pub nor: Normal, // Shading normal
pub nor_g: Normal, // True geometric normal
pub local_space: Transform, // Matrix from global space to local space
pub t: f32, // Ray t-value at the intersection point
pub sample_pdf: f32, // The PDF of getting this point by explicitly sampling the surface
pub nor: Normal, // Shading normal.
pub nor_g: Normal, // True geometric normal.
pub local_space: XformFull, // Matrix from local to world space.
pub t: f32, // Ray t-value at the intersection point.
pub sample_pdf: f32, // The PDF of getting this point by explicitly sampling the surface.
}

View File

@ -162,7 +162,7 @@ pub fn surface_point(tri: (Point, Point, Point), bary: (f32, f32, f32)) -> (Poin
+ (tri.1.into_vector().abs() * bary.1)
+ (tri.2.into_vector().abs() * bary.2))
* fp_gamma(7))
.co
.0
.max_element();
(pos, pos_err)

View File

@ -6,10 +6,11 @@ use crate::{
accel::BVH4,
bbox::BBox,
boundable::Boundable,
color::Color,
lerp::lerp_slice,
math::{cross, dot, Normal, Point, Transform},
ray::{RayBatch, RayStack},
shading::SurfaceShader,
math::{cross, dot, Normal, Point, XformFull},
ray::{LocalRay, Ray},
shading::{SimpleSurfaceShader, SurfaceShader},
};
use super::{triangle, Surface, SurfaceIntersection, SurfaceIntersectionData};
@ -18,6 +19,7 @@ const MAX_LEAF_TRIANGLE_COUNT: usize = 3;
#[derive(Copy, Clone, Debug)]
pub struct TriangleMesh<'a> {
pub shader_idx: Option<usize>,
time_sample_count: usize,
vertices: &'a [Point], // Vertices, with the time samples for each vertex stored contiguously
normals: Option<&'a [Normal]>, // Vertex normals, organized the same as `vertices`
@ -28,6 +30,7 @@ pub struct TriangleMesh<'a> {
impl<'a> TriangleMesh<'a> {
pub fn from_verts_and_indices<'b>(
arena: &'b Arena,
shader_idx: Option<usize>,
verts: &[Vec<Point>],
vert_normals: &Option<Vec<Vec<Normal>>>,
tri_indices: &[(usize, usize, usize)],
@ -106,6 +109,7 @@ impl<'a> TriangleMesh<'a> {
});
TriangleMesh {
shader_idx: shader_idx,
time_sample_count: time_sample_count,
vertices: vertices,
normals: normals,
@ -122,202 +126,135 @@ impl<'a> Boundable for TriangleMesh<'a> {
}
impl<'a> Surface for TriangleMesh<'a> {
fn intersect_rays(
fn intersect_ray(
&self,
rays: &mut RayBatch,
ray_stack: &mut RayStack,
isects: &mut [SurfaceIntersection],
shader: &dyn SurfaceShader,
space: &[Transform],
ray: &mut Ray,
local_ray: &LocalRay,
space: &XformFull,
isect: &mut SurfaceIntersection,
shaders: &[&dyn SurfaceShader],
) {
// Precalculate transform for non-motion blur cases
let static_mat_space = if space.len() == 1 {
lerp_slice(space, 0.0).inverse()
} else {
Transform::new()
let unassigned_shader = SimpleSurfaceShader::Emit {
color: Color::new_xyz(color::rec709_to_xyz((1.0, 0.0, 1.0))),
};
self.accel
.traverse(rays, ray_stack, |idx_range, rays, ray_stack| {
let tri_count = idx_range.end - idx_range.start;
let shader = if let Some(idx) = self.shader_idx {
shaders[idx]
} else {
&unassigned_shader
};
// Build the triangle cache if we can!
let is_cached = ray_stack.ray_count_in_next_task() >= tri_count
&& self.time_sample_count == 1
&& space.len() <= 1;
let mut tri_cache = [std::mem::MaybeUninit::uninit(); MAX_LEAF_TRIANGLE_COUNT];
if is_cached {
for tri_idx in idx_range.clone() {
let i = tri_idx - idx_range.start;
let tri_indices = self.indices[tri_idx];
self.accel.traverse(ray, local_ray, |idx_range, ray| {
// Iterate through the triangles and test the ray against them.
let mut non_shadow_hit = false;
let mut hit_tri = std::mem::MaybeUninit::uninit();
let mut hit_tri_indices = std::mem::MaybeUninit::uninit();
let mut hit_tri_data = std::mem::MaybeUninit::uninit();
let ray_pre = triangle::RayTriPrecompute::new(ray.dir);
for tri_idx in idx_range.clone() {
let tri_indices = self.indices[tri_idx];
// For static triangles with static transforms, cache them.
// Get triangle.
let mut tri = if self.time_sample_count == 1 {
// No deformation motion blur, so fast-path it.
(
self.vertices[tri_indices.0 as usize],
self.vertices[tri_indices.1 as usize],
self.vertices[tri_indices.2 as usize],
)
} else {
// Deformation motion blur, need to interpolate.
let p0_slice = &self.vertices[(tri_indices.0 as usize * self.time_sample_count)
..((tri_indices.0 as usize + 1) * self.time_sample_count)];
let p1_slice = &self.vertices[(tri_indices.1 as usize * self.time_sample_count)
..((tri_indices.1 as usize + 1) * self.time_sample_count)];
let p2_slice = &self.vertices[(tri_indices.2 as usize * self.time_sample_count)
..((tri_indices.2 as usize + 1) * self.time_sample_count)];
let p0 = lerp_slice(p0_slice, ray.time);
let p1 = lerp_slice(p1_slice, ray.time);
let p2 = lerp_slice(p2_slice, ray.time);
(p0, p1, p2)
};
// Transform triangle into world space.
tri.0 = tri.0.xform(space);
tri.1 = tri.1.xform(space);
tri.2 = tri.2.xform(space);
// Test ray against triangle
if let Some((t, b0, b1, b2)) =
triangle::intersect_ray(ray.orig, ray_pre, ray.max_t, tri)
{
if ray.is_occlusion() {
*isect = SurfaceIntersection::Occlude;
ray.mark_done();
break;
} else {
non_shadow_hit = true;
ray.max_t = t;
unsafe {
*tri_cache[i].as_mut_ptr() = (
self.vertices[tri_indices.0 as usize],
self.vertices[tri_indices.1 as usize],
self.vertices[tri_indices.2 as usize],
);
if !space.is_empty() {
(*tri_cache[i].as_mut_ptr()).0 =
(*tri_cache[i].as_mut_ptr()).0 * static_mat_space;
(*tri_cache[i].as_mut_ptr()).1 =
(*tri_cache[i].as_mut_ptr()).1 * static_mat_space;
(*tri_cache[i].as_mut_ptr()).2 =
(*tri_cache[i].as_mut_ptr()).2 * static_mat_space;
}
*hit_tri.as_mut_ptr() = tri;
*hit_tri_indices.as_mut_ptr() = tri_indices;
*hit_tri_data.as_mut_ptr() = (t, b0, b1, b2);
}
}
}
}
// Test each ray against the triangles.
ray_stack.do_next_task(|ray_idx| {
let ray_idx = ray_idx as usize;
// Calculate intersection data if necessary.
if non_shadow_hit {
let hit_tri = unsafe { hit_tri.assume_init() };
let (t, b0, b1, b2) = unsafe { hit_tri_data.assume_init() };
if rays.is_done(ray_idx) {
return;
}
// Calculate intersection point and error magnitudes
let (pos, pos_err) = triangle::surface_point(hit_tri, (b0, b1, b2));
let ray_time = rays.time(ray_idx);
// Calculate geometric surface normal
let geo_normal = cross(hit_tri.0 - hit_tri.1, hit_tri.0 - hit_tri.2).into_normal();
// Calculate the ray space, if necessary.
let mat_space = if space.len() > 1 {
// Per-ray transform, for motion blur
lerp_slice(space, ray_time).inverse()
// Calculate interpolated surface normal, if any
let shading_normal = if let Some(normals) = self.normals {
let hit_tri_indices = unsafe { hit_tri_indices.assume_init() };
let n0_slice = &normals[(hit_tri_indices.0 as usize * self.time_sample_count)
..((hit_tri_indices.0 as usize + 1) * self.time_sample_count)];
let n1_slice = &normals[(hit_tri_indices.1 as usize * self.time_sample_count)
..((hit_tri_indices.1 as usize + 1) * self.time_sample_count)];
let n2_slice = &normals[(hit_tri_indices.2 as usize * self.time_sample_count)
..((hit_tri_indices.2 as usize + 1) * self.time_sample_count)];
let n0 = lerp_slice(n0_slice, ray.time).normalized();
let n1 = lerp_slice(n1_slice, ray.time).normalized();
let n2 = lerp_slice(n2_slice, ray.time).normalized();
let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)).xform_fast(&space);
if dot(s_nor, geo_normal) >= 0.0 {
s_nor
} else {
static_mat_space
};
// Iterate through the triangles and test the ray against them.
let mut non_shadow_hit = false;
let mut hit_tri = std::mem::MaybeUninit::uninit();
let mut hit_tri_indices = std::mem::MaybeUninit::uninit();
let mut hit_tri_data = std::mem::MaybeUninit::uninit();
let ray_pre = triangle::RayTriPrecompute::new(rays.dir(ray_idx));
for tri_idx in idx_range.clone() {
let tri_indices = self.indices[tri_idx];
// Get triangle if necessary
let tri = if is_cached {
let i = tri_idx - idx_range.start;
unsafe { tri_cache[i].assume_init() }
} else {
let mut tri = if self.time_sample_count == 1 {
// No deformation motion blur, so fast-path it.
(
self.vertices[tri_indices.0 as usize],
self.vertices[tri_indices.1 as usize],
self.vertices[tri_indices.2 as usize],
)
} else {
// Deformation motion blur, need to interpolate.
let p0_slice = &self.vertices[(tri_indices.0 as usize
* self.time_sample_count)
..((tri_indices.0 as usize + 1) * self.time_sample_count)];
let p1_slice = &self.vertices[(tri_indices.1 as usize
* self.time_sample_count)
..((tri_indices.1 as usize + 1) * self.time_sample_count)];
let p2_slice = &self.vertices[(tri_indices.2 as usize
* self.time_sample_count)
..((tri_indices.2 as usize + 1) * self.time_sample_count)];
let p0 = lerp_slice(p0_slice, ray_time);
let p1 = lerp_slice(p1_slice, ray_time);
let p2 = lerp_slice(p2_slice, ray_time);
(p0, p1, p2)
};
if !space.is_empty() {
tri.0 = tri.0 * mat_space;
tri.1 = tri.1 * mat_space;
tri.2 = tri.2 * mat_space;
}
tri
};
// Test ray against triangle
if let Some((t, b0, b1, b2)) = triangle::intersect_ray(
rays.orig(ray_idx),
ray_pre,
rays.max_t(ray_idx),
tri,
) {
if rays.is_occlusion(ray_idx) {
isects[ray_idx] = SurfaceIntersection::Occlude;
rays.mark_done(ray_idx);
break;
} else {
non_shadow_hit = true;
rays.set_max_t(ray_idx, t);
unsafe {
*hit_tri.as_mut_ptr() = tri;
*hit_tri_indices.as_mut_ptr() = tri_indices;
*hit_tri_data.as_mut_ptr() = (t, b0, b1, b2);
}
}
}
-s_nor
}
} else {
geo_normal
};
// Calculate intersection data if necessary.
if non_shadow_hit {
let hit_tri = unsafe { hit_tri.assume_init() };
let (t, b0, b1, b2) = unsafe { hit_tri_data.assume_init() };
let intersection_data = SurfaceIntersectionData {
incoming: ray.dir,
t: t,
pos: pos,
pos_err: pos_err,
nor: shading_normal,
nor_g: geo_normal,
local_space: *space,
sample_pdf: 0.0,
};
// Calculate intersection point and error magnitudes
let (pos, pos_err) = triangle::surface_point(hit_tri, (b0, b1, b2));
// Calculate geometric surface normal
let geo_normal =
cross(hit_tri.0 - hit_tri.1, hit_tri.0 - hit_tri.2).into_normal();
// Calculate interpolated surface normal, if any
let shading_normal = if let Some(normals) = self.normals {
let hit_tri_indices = unsafe { hit_tri_indices.assume_init() };
let n0_slice = &normals[(hit_tri_indices.0 as usize
* self.time_sample_count)
..((hit_tri_indices.0 as usize + 1) * self.time_sample_count)];
let n1_slice = &normals[(hit_tri_indices.1 as usize
* self.time_sample_count)
..((hit_tri_indices.1 as usize + 1) * self.time_sample_count)];
let n2_slice = &normals[(hit_tri_indices.2 as usize
* self.time_sample_count)
..((hit_tri_indices.2 as usize + 1) * self.time_sample_count)];
let n0 = lerp_slice(n0_slice, ray_time).normalized();
let n1 = lerp_slice(n1_slice, ray_time).normalized();
let n2 = lerp_slice(n2_slice, ray_time).normalized();
let s_nor = ((n0 * b0) + (n1 * b1) + (n2 * b2)) * mat_space;
if dot(s_nor, geo_normal) >= 0.0 {
s_nor
} else {
-s_nor
}
} else {
geo_normal
};
let intersection_data = SurfaceIntersectionData {
incoming: rays.dir(ray_idx),
t: t,
pos: pos,
pos_err: pos_err,
nor: shading_normal,
nor_g: geo_normal,
local_space: mat_space,
sample_pdf: 0.0,
};
// Fill in intersection data
isects[ray_idx] = SurfaceIntersection::Hit {
intersection_data: intersection_data,
closure: shader.shade(&intersection_data, ray_time),
};
}
});
ray_stack.pop_task();
});
// Fill in intersection data
*isect = SurfaceIntersection::Hit {
intersection_data: intersection_data,
closure: shader.shade(&intersection_data, ray.time),
};
}
});
}
}

View File

@ -1,190 +1,119 @@
use std::iter;
use crate::{
accel::ray_code,
color::{rec709_to_xyz, Color},
lerp::lerp_slice,
math::Transform,
ray::{RayBatch, RayStack},
math::XformFull,
ray::{LocalRay, Ray},
scene::{Assembly, InstanceType, Object},
shading::{SimpleSurfaceShader, SurfaceShader},
shading::SurfaceShader,
surface::SurfaceIntersection,
transform_stack::TransformStack,
};
pub struct Tracer<'a> {
root: &'a Assembly<'a>,
ray_trace_count: u64,
ray_stack: RayStack,
inner: TracerInner<'a>,
}
impl<'a> Tracer<'a> {
pub fn from_assembly(assembly: &'a Assembly) -> Tracer<'a> {
Tracer {
root: assembly,
ray_trace_count: 0,
ray_stack: RayStack::new(),
inner: TracerInner {
root: assembly,
xform_stack: TransformStack::new(),
isects: Vec::new(),
},
}
}
pub fn trace<'b>(&'b mut self, rays: &mut RayBatch) -> &'b [SurfaceIntersection] {
self.ray_trace_count += rays.len() as u64;
self.inner.trace(rays, &mut self.ray_stack)
}
pub fn rays_traced(&self) -> u64 {
self.ray_trace_count
}
}
struct TracerInner<'a> {
root: &'a Assembly<'a>,
xform_stack: TransformStack,
isects: Vec<SurfaceIntersection>,
}
pub fn trace(&mut self, mut ray: Ray) -> SurfaceIntersection {
self.ray_trace_count += 1;
impl<'a> TracerInner<'a> {
fn trace<'b>(
&'b mut self,
rays: &mut RayBatch,
ray_stack: &mut RayStack,
) -> &'b [SurfaceIntersection] {
ray_stack.clear();
let local_ray = ray.to_local();
let space = XformFull::identity();
let mut isect = SurfaceIntersection::Miss;
// Ready the isects
self.isects.clear();
self.isects.reserve(rays.len());
self.isects
.extend(iter::repeat(SurfaceIntersection::Miss).take(rays.len()));
self.trace_assembly(self.root, &mut ray, &local_ray, &space, &mut isect);
// Prep the accel part of the rays.
{
let ident = Transform::new();
for i in 0..rays.len() {
rays.update_local(i, &ident);
}
}
// Divide the rays into 8 different lanes by direction.
ray_stack.ensure_lane_count(8);
for i in 0..rays.len() {
ray_stack.push_ray_index(i, ray_code(rays.dir(i)));
}
ray_stack.push_lanes_to_tasks(&[0, 1, 2, 3, 4, 5, 6, 7]);
// Trace each of the 8 lanes separately.
while !ray_stack.is_empty() {
self.trace_assembly(self.root, rays, ray_stack);
}
&self.isects
isect
}
fn trace_assembly<'b>(
&'b mut self,
fn trace_assembly(
&mut self,
assembly: &Assembly,
rays: &mut RayBatch,
ray_stack: &mut RayStack,
ray: &mut Ray,
local_ray: &LocalRay,
space: &XformFull,
isect: &mut SurfaceIntersection,
) {
assembly
.object_accel
.traverse(rays, ray_stack, |idx_range, rays, ray_stack| {
let inst = &assembly.instances[idx_range.start];
.traverse(ray, local_ray, |idx_range, ray| {
for inst_idx in idx_range {
let inst = &assembly.instances[inst_idx];
// Transform rays if needed
if let Some((xstart, xend)) = inst.transform_indices {
// Push transforms to stack
self.xform_stack.push(&assembly.xforms[xstart..xend]);
// Handle transforms if needed.
let (local_space, local_ray) = if let Some((xstart, xend)) =
inst.transform_indices
{
let instance_xform = lerp_slice(&assembly.xforms[xstart..xend], ray.time);
let combined_xform = instance_xform.compose(&space.fwd);
// Do transforms
// TODO: re-divide rays based on direction (maybe?).
let xforms = self.xform_stack.top();
ray_stack.do_next_task(|ray_idx| {
let t = rays.time(ray_idx);
rays.update_local(ray_idx, &lerp_slice(xforms, t));
});
ray_stack.duplicate_next_task();
}
// Trace rays
match inst.instance_type {
InstanceType::Object => {
self.trace_object(
&assembly.objects[inst.data_index],
inst.surface_shader_index
.map(|i| assembly.surface_shaders[i]),
rays,
ray_stack,
);
}
InstanceType::Assembly => {
self.trace_assembly(&assembly.assemblies[inst.data_index], rays, ray_stack);
}
}
// Un-transform rays if needed
if inst.transform_indices.is_some() {
// Pop transforms off stack
self.xform_stack.pop();
// Undo transforms
let xforms = self.xform_stack.top();
if !xforms.is_empty() {
ray_stack.pop_do_next_task(|ray_idx| {
let t = rays.time(ray_idx);
rays.update_local(ray_idx, &lerp_slice(xforms, t));
});
if let Some(xform) = combined_xform.to_full() {
(xform, ray.to_local_xform(&xform))
} else {
// Invalid transform, so skip traversing into this instance.
continue;
}
} else {
let ident = Transform::new();
ray_stack.pop_do_next_task(|ray_idx| {
rays.update_local(ray_idx, &ident);
});
(*space, *local_ray)
};
// Trace ray.
match inst.instance_type {
InstanceType::Object => {
self.trace_object(
&assembly.objects[inst.data_index],
ray,
&local_ray,
&local_space,
isect,
assembly.surface_shaders,
);
}
InstanceType::Assembly => {
self.trace_assembly(
&assembly.assemblies[inst.data_index],
ray,
&local_ray,
&local_space,
isect,
);
}
}
if ray.is_done() {
return;
}
}
});
}
fn trace_object<'b>(
&'b mut self,
&mut self,
obj: &Object,
surface_shader: Option<&dyn SurfaceShader>,
rays: &mut RayBatch,
ray_stack: &mut RayStack,
ray: &mut Ray,
local_ray: &LocalRay,
space: &XformFull,
isect: &mut SurfaceIntersection,
shaders: &[&dyn SurfaceShader],
) {
match *obj {
Object::Surface(surface) => {
let unassigned_shader = SimpleSurfaceShader::Emit {
color: Color::new_xyz(rec709_to_xyz((1.0, 0.0, 1.0))),
};
let shader = surface_shader.unwrap_or(&unassigned_shader);
surface.intersect_rays(
rays,
ray_stack,
&mut self.isects,
shader,
self.xform_stack.top(),
);
surface.intersect_ray(ray, local_ray, space, isect, shaders);
}
Object::SurfaceLight(surface) => {
// Lights don't use shaders
let bogus_shader = SimpleSurfaceShader::Emit {
color: Color::new_xyz(rec709_to_xyz((1.0, 0.0, 1.0))),
};
surface.intersect_rays(
rays,
ray_stack,
&mut self.isects,
&bogus_shader,
self.xform_stack.top(),
);
surface.intersect_ray(ray, local_ray, space, isect, shaders);
}
}
}

View File

@ -1,83 +1,30 @@
use std::{
cmp,
mem::{transmute, MaybeUninit},
};
use crate::{algorithm::merge_slices_to, math::Transform};
use crate::math::Xform;
pub struct TransformStack {
stack: Vec<MaybeUninit<Transform>>,
stack_indices: Vec<usize>,
stack: Vec<Xform>,
}
impl TransformStack {
pub fn new() -> TransformStack {
let mut ts = TransformStack {
stack: Vec::new(),
stack_indices: Vec::new(),
};
ts.stack_indices.push(0);
ts.stack_indices.push(0);
ts
TransformStack { stack: Vec::new() }
}
pub fn clear(&mut self) {
self.stack.clear();
self.stack_indices.clear();
self.stack_indices.push(0);
self.stack_indices.push(0);
}
pub fn push(&mut self, xforms: &[Transform]) {
assert!(!xforms.is_empty());
if self.stack.is_empty() {
let xforms: &[MaybeUninit<Transform>] = unsafe { transmute(xforms) };
self.stack.extend(xforms);
} else {
let sil = self.stack_indices.len();
let i1 = self.stack_indices[sil - 2];
let i2 = self.stack_indices[sil - 1];
// Reserve stack space for the new transforms.
// Note this leaves exposed uninitialized memory. The subsequent call to
// merge_slices_to() fills that memory in.
{
let maxlen = cmp::max(xforms.len(), i2 - i1);
self.stack.reserve(maxlen);
let l = self.stack.len();
unsafe { self.stack.set_len(l + maxlen) };
}
let (xfs1, xfs2) = self.stack.split_at_mut(i2);
merge_slices_to(
unsafe { transmute(&xfs1[i1..i2]) },
xforms,
xfs2,
|xf1, xf2| *xf1 * *xf2,
);
pub fn push(&mut self, xform: Xform) {
match self.stack.last() {
None => self.stack.push(xform),
Some(prev_xform) => self.stack.push(xform.compose(prev_xform)),
}
self.stack_indices.push(self.stack.len());
}
pub fn pop(&mut self) {
assert!(self.stack_indices.len() > 2);
let sl = self.stack.len();
let sil = self.stack_indices.len();
let i1 = self.stack_indices[sil - 2];
let i2 = self.stack_indices[sil - 1];
self.stack.truncate(sl - (i2 - i1));
self.stack_indices.pop();
pub fn pop(&mut self) -> Option<Xform> {
self.stack.pop()
}
pub fn top(&self) -> &[Transform] {
let sil = self.stack_indices.len();
let i1 = self.stack_indices[sil - 2];
let i2 = self.stack_indices[sil - 1];
unsafe { transmute(&self.stack[i1..i2]) }
pub fn top(&self) -> Option<&Xform> {
self.stack.last()
}
}

View File

@ -6,6 +6,9 @@ edition = "2018"
license = "MIT, Apache 2.0"
build = "build.rs"
[build-dependencies]
colorbox = "0.3"
[lib]
name = "color"
path = "src/lib.rs"

View File

@ -1,76 +1,46 @@
use std::{env, fs::File, io::Write, path::Path};
#[derive(Copy, Clone)]
struct Chromaticities {
r: (f64, f64),
g: (f64, f64),
b: (f64, f64),
w: (f64, f64),
}
use colorbox::{
chroma::{self, Chromaticities},
matrix::{invert, rgb_to_xyz_matrix, xyz_chromatic_adaptation_matrix, AdaptationMethod},
matrix_compose,
};
fn main() {
let out_dir = env::var("OUT_DIR").unwrap();
// Rec709
{
let chroma = Chromaticities {
r: (0.640, 0.330),
g: (0.300, 0.600),
b: (0.150, 0.060),
w: (0.3127, 0.3290),
};
let dest_path = Path::new(&out_dir).join("rec709_inc.rs");
let mut f = File::create(&dest_path).unwrap();
write_conversion_functions("rec709", chroma, &mut f);
write_conversion_functions("rec709", chroma::REC709, &mut f);
}
// Rec2020
{
let chroma = Chromaticities {
r: (0.708, 0.292),
g: (0.170, 0.797),
b: (0.131, 0.046),
w: (0.3127, 0.3290),
};
let dest_path = Path::new(&out_dir).join("rec2020_inc.rs");
let mut f = File::create(&dest_path).unwrap();
write_conversion_functions("rec2020", chroma, &mut f);
write_conversion_functions("rec2020", chroma::REC2020, &mut f);
}
// ACES AP0
{
let chroma = Chromaticities {
r: (0.73470, 0.26530),
g: (0.00000, 1.00000),
b: (0.00010, -0.07700),
w: (0.32168, 0.33767),
};
let dest_path = Path::new(&out_dir).join("aces_ap0_inc.rs");
let mut f = File::create(&dest_path).unwrap();
write_conversion_functions("aces_ap0", chroma, &mut f);
write_conversion_functions("aces_ap0", chroma::ACES_AP0, &mut f);
}
// ACES AP1
{
let chroma = Chromaticities {
r: (0.713, 0.293),
g: (0.165, 0.830),
b: (0.128, 0.044),
w: (0.32168, 0.33767),
};
let dest_path = Path::new(&out_dir).join("aces_ap1_inc.rs");
let mut f = File::create(&dest_path).unwrap();
write_conversion_functions("aces_ap1", chroma, &mut f);
write_conversion_functions("aces_ap1", chroma::ACES_AP1, &mut f);
}
}
/// Generates conversion functions for the given rgb to xyz transform matrix.
fn write_conversion_functions(space_name: &str, chroma: Chromaticities, f: &mut File) {
let to_xyz = rgb_to_xyz(chroma, 1.0);
let to_xyz = rgb_to_xyz_matrix(chroma);
f.write_all(
format!(
@ -99,7 +69,7 @@ pub fn {}_to_xyz(rgb: (f32, f32, f32)) -> (f32, f32, f32) {{
)
.unwrap();
let inv = inverse(to_xyz);
let inv = invert(to_xyz).unwrap();
f.write_all(
format!(
r#"
@ -127,12 +97,14 @@ pub fn xyz_to_{}(xyz: (f32, f32, f32)) -> (f32, f32, f32) {{
)
.unwrap();
let e_chroma = {
let mut e_chroma = chroma;
e_chroma.w = (1.0 / 3.0, 1.0 / 3.0);
e_chroma
};
let e_to_xyz = rgb_to_xyz(e_chroma, 1.0);
let e_to_xyz = matrix_compose!(
rgb_to_xyz_matrix(chroma),
xyz_chromatic_adaptation_matrix(
chroma.w,
(1.0 / 3.0, 1.0 / 3.0),
AdaptationMethod::Bradford,
),
);
f.write_all(
format!(
r#"
@ -160,7 +132,7 @@ pub fn {}_e_to_xyz(rgb: (f32, f32, f32)) -> (f32, f32, f32) {{
)
.unwrap();
let inv_e = inverse(e_to_xyz);
let inv_e = invert(e_to_xyz).unwrap();
f.write_all(
format!(
r#"
@ -188,135 +160,3 @@ pub fn xyz_to_{}_e(xyz: (f32, f32, f32)) -> (f32, f32, f32) {{
)
.unwrap();
}
/// Port of the RGBtoXYZ function from the ACES CTL reference implementation.
/// See lib/IlmCtlMath/CtlColorSpace.cpp in the CTL reference implementation.
///
/// This takes the chromaticities of an RGB colorspace and generates a
/// transform matrix from that space to XYZ.
///
/// * `chroma` is the chromaticities.
/// * `y` is the XYZ "Y" value that should map to RGB (1,1,1)
fn rgb_to_xyz(chroma: Chromaticities, y: f64) -> [[f64; 3]; 3] {
// X and Z values of RGB value (1, 1, 1), or "white"
let x = chroma.w.0 * y / chroma.w.1;
let z = (1.0 - chroma.w.0 - chroma.w.1) * y / chroma.w.1;
// Scale factors for matrix rows
let d = chroma.r.0 * (chroma.b.1 - chroma.g.1)
+ chroma.b.0 * (chroma.g.1 - chroma.r.1)
+ chroma.g.0 * (chroma.r.1 - chroma.b.1);
let sr = (x * (chroma.b.1 - chroma.g.1)
- chroma.g.0 * (y * (chroma.b.1 - 1.0) + chroma.b.1 * (x + z))
+ chroma.b.0 * (y * (chroma.g.1 - 1.0) + chroma.g.1 * (x + z)))
/ d;
let sg = (x * (chroma.r.1 - chroma.b.1)
+ chroma.r.0 * (y * (chroma.b.1 - 1.0) + chroma.b.1 * (x + z))
- chroma.b.0 * (y * (chroma.r.1 - 1.0) + chroma.r.1 * (x + z)))
/ d;
let sb = (x * (chroma.g.1 - chroma.r.1)
- chroma.r.0 * (y * (chroma.g.1 - 1.0) + chroma.g.1 * (x + z))
+ chroma.g.0 * (y * (chroma.r.1 - 1.0) + chroma.r.1 * (x + z)))
/ d;
// Assemble the matrix
let mut mat = [[0.0; 3]; 3];
mat[0][0] = sr * chroma.r.0;
mat[0][1] = sg * chroma.g.0;
mat[0][2] = sb * chroma.b.0;
mat[1][0] = sr * chroma.r.1;
mat[1][1] = sg * chroma.g.1;
mat[1][2] = sb * chroma.b.1;
mat[2][0] = sr * (1.0 - chroma.r.0 - chroma.r.1);
mat[2][1] = sg * (1.0 - chroma.g.0 - chroma.g.1);
mat[2][2] = sb * (1.0 - chroma.b.0 - chroma.b.1);
mat
}
/// Calculates the inverse of the given 3x3 matrix.
///
/// Ported to Rust from `gjInverse()` in IlmBase's Imath/ImathMatrix.h
fn inverse(m: [[f64; 3]; 3]) -> [[f64; 3]; 3] {
let mut s = [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]];
let mut t = m;
// Forward elimination
for i in 0..2 {
let mut pivot = i;
let mut pivotsize = t[i][i];
if pivotsize < 0.0 {
pivotsize = -pivotsize;
}
for j in (i + 1)..3 {
let mut tmp = t[j][i];
if tmp < 0.0 {
tmp = -tmp;
}
if tmp > pivotsize {
pivot = j;
pivotsize = tmp;
}
}
if pivotsize == 0.0 {
panic!("Cannot invert singular matrix.");
}
if pivot != i {
for j in 0..3 {
let mut tmp = t[i][j];
t[i][j] = t[pivot][j];
t[pivot][j] = tmp;
tmp = s[i][j];
s[i][j] = s[pivot][j];
s[pivot][j] = tmp;
}
}
for j in (i + 1)..3 {
let f = t[j][i] / t[i][i];
for k in 0..3 {
t[j][k] -= f * t[i][k];
s[j][k] -= f * s[i][k];
}
}
}
// Backward substitution
for i in (0..3).rev() {
let f = t[i][i];
if t[i][i] == 0.0 {
panic!("Cannot invert singular matrix.");
}
for j in 0..3 {
t[i][j] /= f;
s[i][j] /= f;
}
for j in 0..i {
let f = t[j][i];
for k in 0..3 {
t[j][k] -= f * t[i][k];
s[j][k] -= f * s[i][k];
}
}
}
s
}

View File

@ -0,0 +1,10 @@
[package]
name = "data_tree"
version = "0.1.0"
authors = ["Nathan Vegdahl <cessen@cessen.com>"]
edition = "2018"
license = "MIT"
[lib]
name = "data_tree"
path = "src/lib.rs"

View File

@ -0,0 +1,199 @@
#![allow(clippy::redundant_field_names)]
#![allow(clippy::needless_lifetimes)]
mod parse;
use parse::{ParseError, ParseEvent, Parser};
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Event<'a> {
InnerOpen {
type_name: &'a str,
byte_offset: usize,
},
InnerClose {
byte_offset: usize,
},
Leaf {
type_name: &'a str,
contents: &'a str,
byte_offset: usize,
},
EOF,
}
//----------------------------------------------------------------------------
#[derive(Debug)]
pub enum Error {
ExpectedNameOrClose(usize),
ExpectedOpen(usize),
UnexpectedClose(usize),
UnexpectedEOF,
IO(std::io::Error),
}
impl std::error::Error for Error {}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
write!(f, "{:?}", self)
}
}
impl From<ParseError> for Error {
fn from(e: ParseError) -> Self {
match e {
ParseError::ExpectedNameOrClose(byte_offset) => Error::ExpectedNameOrClose(byte_offset),
ParseError::ExpectedOpen(byte_offset) => Error::ExpectedOpen(byte_offset),
ParseError::UnexpectedClose(byte_offset) => Error::UnexpectedClose(byte_offset),
}
}
}
impl From<std::io::Error> for Error {
fn from(e: std::io::Error) -> Self {
Error::IO(e)
}
}
//-------------------------------------------------------------
#[derive(Debug)]
pub struct DataTreeReader<R: std::io::BufRead> {
parser: Parser,
reader: R,
buf: String,
eof: bool,
}
impl<R: std::io::BufRead> DataTreeReader<R> {
pub fn new(reader: R) -> Self {
Self {
parser: Parser::new(),
reader: reader,
buf: String::new(),
eof: false,
}
}
pub fn next_event<'a>(&'a mut self) -> Result<Event<'a>, Error> {
loop {
let valid_end = match self.parser.next_event()? {
ParseEvent::ValidEnd => true,
ParseEvent::NeedMoreInput => false,
// The transmutes below are because the borrow checker is
// over-conservative about this. It thinks
// the liftime isn't valid, but since we aren't
// mutating self after returning (and in fact
// can't because of the borrow) there's no way for
// the references in this to become invalid.
ParseEvent::InnerOpen {
type_name,
byte_offset,
} => {
return Ok(unsafe {
std::mem::transmute::<Event, Event>(Event::InnerOpen {
type_name,
byte_offset,
})
});
}
ParseEvent::InnerClose { byte_offset } => {
return Ok(unsafe {
std::mem::transmute::<Event, Event>(Event::InnerClose { byte_offset })
});
}
ParseEvent::Leaf {
type_name,
contents,
byte_offset,
} => {
return Ok(unsafe {
std::mem::transmute::<Event, Event>(Event::Leaf {
type_name,
contents,
byte_offset,
})
});
}
};
if !self.eof {
self.buf.clear();
let read = self.reader.read_line(&mut self.buf)?;
self.parser.push_data(&self.buf);
if read == 0 {
self.eof = true;
}
} else if !valid_end {
return Err(Error::UnexpectedEOF);
} else {
return Ok(Event::EOF);
}
}
}
pub fn peek_event<'a>(&'a mut self) -> Result<Event<'a>, Error> {
loop {
let valid_end = match self.parser.peek_event()? {
ParseEvent::ValidEnd => true,
ParseEvent::NeedMoreInput => false,
// The transmutes below are because the borrow checker is
// over-conservative about this. It thinks
// the liftime isn't valid, but since we aren't
// mutating self after returning (and in fact
// can't because of the borrow) there's no way for
// the references in this to become invalid.
ParseEvent::InnerOpen {
type_name,
byte_offset,
} => {
return Ok(unsafe {
std::mem::transmute::<Event, Event>(Event::InnerOpen {
type_name,
byte_offset,
})
});
}
ParseEvent::InnerClose { byte_offset } => {
return Ok(unsafe {
std::mem::transmute::<Event, Event>(Event::InnerClose { byte_offset })
});
}
ParseEvent::Leaf {
type_name,
contents,
byte_offset,
} => {
return Ok(unsafe {
std::mem::transmute::<Event, Event>(Event::Leaf {
type_name,
contents,
byte_offset,
})
});
}
};
if !self.eof {
self.buf.clear();
let read = self.reader.read_line(&mut self.buf)?;
self.parser.push_data(&self.buf);
if read == 0 {
self.eof = true;
}
} else if !valid_end {
return Err(Error::UnexpectedEOF);
} else {
return Ok(Event::EOF);
}
}
}
pub fn byte_offset(&self) -> usize {
self.parser.byte_offset()
}
}

View File

@ -0,0 +1,762 @@
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ParseError {
ExpectedNameOrClose(usize),
ExpectedOpen(usize),
UnexpectedClose(usize),
}
impl std::error::Error for ParseError {}
impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
write!(f, "{:?}", self)
}
}
//---------------------------------------------------------------------
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum ParseEvent<'a> {
InnerOpen {
type_name: &'a str,
byte_offset: usize,
},
InnerClose {
byte_offset: usize,
},
Leaf {
type_name: &'a str,
contents: &'a str,
byte_offset: usize,
},
NeedMoreInput,
ValidEnd, // All data so far is consumed, and this is a
// valid place to finish the parse.
}
impl<'a> ParseEvent<'a> {
fn add_to_byte_offset(&self, offset: usize) -> ParseEvent<'a> {
match *self {
ParseEvent::InnerOpen {
type_name,
byte_offset,
} => ParseEvent::InnerOpen {
type_name: type_name,
byte_offset: byte_offset + offset,
},
ParseEvent::InnerClose { byte_offset } => ParseEvent::InnerClose {
byte_offset: byte_offset + offset,
},
ParseEvent::Leaf {
type_name,
contents,
byte_offset,
} => ParseEvent::Leaf {
type_name: type_name,
contents: contents,
byte_offset: byte_offset + offset,
},
ParseEvent::NeedMoreInput => *self,
ParseEvent::ValidEnd => *self,
}
}
}
//---------------------------------------------------------------------
#[derive(Debug)]
pub struct Parser {
buffer: String,
buf_consumed_idx: usize,
total_bytes_processed: usize,
inner_opens: usize,
}
impl Parser {
pub fn new() -> Parser {
Parser {
buffer: String::with_capacity(1024),
buf_consumed_idx: 0,
total_bytes_processed: 0,
inner_opens: 0,
}
}
pub fn push_data(&mut self, text: &str) {
// Remove any consumed data.
if self.buf_consumed_idx > 0 {
self.buffer.replace_range(..self.buf_consumed_idx, "");
self.buf_consumed_idx = 0;
}
// Add the new data.
self.buffer.push_str(text);
}
pub fn next_event<'a>(&'a mut self) -> Result<ParseEvent<'a>, ParseError> {
// Remove any consumed data.
if self.buf_consumed_idx > 0 {
self.buffer.replace_range(..self.buf_consumed_idx, "");
self.buf_consumed_idx = 0;
}
// Try to parse an event from the valid prefix.
match try_parse_event(&self.buffer) {
ParseEventParse::Ok(event, bytes_consumed) => {
// Update internal state.
if let ParseEvent::InnerOpen { .. } = event {
self.inner_opens += 1;
} else if let ParseEvent::InnerClose { byte_offset, .. } = event {
if self.inner_opens == 0 {
return Err(ParseError::UnexpectedClose(
byte_offset + self.total_bytes_processed,
));
} else {
self.inner_opens -= 1;
}
}
self.buf_consumed_idx += bytes_consumed;
self.total_bytes_processed += bytes_consumed;
Ok(event.add_to_byte_offset(self.total_bytes_processed - self.buf_consumed_idx))
}
ParseEventParse::ReachedEnd => {
// If we consumed all data, then if all nodes are properly
// closed we're done. Otherwise we need more input.
if self.inner_opens == 0 {
Ok(ParseEvent::ValidEnd)
} else {
Ok(ParseEvent::NeedMoreInput)
}
}
ParseEventParse::IncompleteData => Ok(ParseEvent::NeedMoreInput),
// Hard errors.
ParseEventParse::ExpectedNameOrInnerClose(byte_offset) => Err(
ParseError::ExpectedNameOrClose(byte_offset + self.total_bytes_processed),
),
ParseEventParse::ExpectedOpen(byte_offset) => Err(ParseError::ExpectedOpen(
byte_offset + self.total_bytes_processed,
)),
}
}
pub fn peek_event<'a>(&'a mut self) -> Result<ParseEvent<'a>, ParseError> {
// Remove any consumed data.
if self.buf_consumed_idx > 0 {
self.buffer.replace_range(..self.buf_consumed_idx, "");
self.buf_consumed_idx = 0;
}
// Try to parse an event from the valid prefix.
match try_parse_event(&self.buffer) {
ParseEventParse::Ok(event, _bytes_consumed) => {
if let ParseEvent::InnerClose { byte_offset, .. } = event {
if self.inner_opens == 0 {
return Err(ParseError::UnexpectedClose(
byte_offset + self.total_bytes_processed,
));
}
}
Ok(event.add_to_byte_offset(self.total_bytes_processed))
}
ParseEventParse::ReachedEnd => {
// If we consumed all data, then if all nodes are properly
// closed we're done. Otherwise we need more input.
if self.inner_opens == 0 {
Ok(ParseEvent::ValidEnd)
} else {
Ok(ParseEvent::NeedMoreInput)
}
}
ParseEventParse::IncompleteData => Ok(ParseEvent::NeedMoreInput),
// Hard errors.
ParseEventParse::ExpectedNameOrInnerClose(byte_offset) => Err(
ParseError::ExpectedNameOrClose(byte_offset + self.total_bytes_processed),
),
ParseEventParse::ExpectedOpen(byte_offset) => Err(ParseError::ExpectedOpen(
byte_offset + self.total_bytes_processed,
)),
}
}
pub fn byte_offset(&self) -> usize {
self.total_bytes_processed + self.buf_consumed_idx
}
}
//--------------------------------------------------------------------------
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
enum ParseEventParse<'a> {
Ok(ParseEvent<'a>, usize), // (event, bytes consumed)
ReachedEnd, // Reached the end of the buffer in a valid state, with no event.
IncompleteData, // Need more data to parse.
// ParseErrors.
ExpectedNameOrInnerClose(usize),
ExpectedOpen(usize),
}
fn try_parse_event<'a>(text: &'a str) -> ParseEventParse<'a> {
// Remove leading whitespace and comments.
let mut source_text = skip_ws_and_comments((0, text));
let start_idx = source_text.0;
// First token.
let type_name = match next_token(source_text) {
// Type name, record and continue.
(Token::Name(tn), tail) => {
source_text = tail;
tn
}
// Closing tag for inner node. Return.
(Token::CloseInner, tail) => {
return ParseEventParse::Ok(
ParseEvent::InnerClose {
byte_offset: start_idx,
},
tail.0,
);
}
// We consumed everything as whitespace and/or
// comments. Return.
(Token::End, _) => {
return ParseEventParse::ReachedEnd;
}
// Invalid.
_ => return ParseEventParse::ExpectedNameOrInnerClose(start_idx),
};
// Skip whitespace and comments to get the start of
// where there should be an open tag, for use later in error.
source_text = skip_ws_and_comments(source_text);
let open_start_idx = source_text.0;
// Last part of the event.
match next_token(source_text) {
// Begining of an inner node.
(Token::OpenInner, tail) => ParseEventParse::Ok(
ParseEvent::InnerOpen {
type_name: type_name,
byte_offset: start_idx,
},
tail.0,
),
// Try to parse entire leaf node.
(Token::OpenLeaf, tail) => {
// Get contents.
let (contents, tail2) = parse_leaf_content(tail);
source_text = tail2;
// Try to get closing tag.
match next_token(source_text) {
// If it's a leaf closing tag, we're done!
// Return the leaf event.
(Token::CloseLeaf, tail) => ParseEventParse::Ok(
ParseEvent::Leaf {
type_name: type_name,
contents: contents,
byte_offset: start_idx,
},
tail.0,
),
// Otherwise...
_ => {
if source_text.1.is_empty() {
// If there's no text left, we're just incomplete.
ParseEventParse::IncompleteData
} else {
// Otherwise, this would be a parse error...
// except that this shouldn't be reachable,
// since everything should be consumable for
// leaf content up until a close tag.
unreachable!("Expected leaf close tag.")
}
}
}
}
// We consumed everything else as whitespace
// and/or comments, so we're incomplete. Return.
(Token::End, _) => ParseEventParse::IncompleteData,
// Invalid.
_ => ParseEventParse::ExpectedOpen(open_start_idx),
}
}
fn parse_leaf_content(source_text: (usize, &str)) -> (&str, (usize, &str)) {
let mut si = 1;
let mut escaped = false;
let mut reached_end = true;
for (i, c) in source_text.1.char_indices() {
si = i;
if escaped {
escaped = false;
} else if c == '\\' {
escaped = true;
} else if c == ']' {
reached_end = false;
break;
}
}
if reached_end {
si = source_text.1.len();
}
(
&source_text.1[0..si],
(source_text.0 + si, &source_text.1[si..]),
)
}
//--------------------------------------------------------------------------
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum Token<'a> {
OpenInner,
CloseInner,
OpenLeaf,
CloseLeaf,
Name(&'a str),
End,
Unknown,
}
fn next_token<'a>(source_text: (usize, &'a str)) -> (Token<'a>, (usize, &'a str)) {
let text1 = skip_ws_and_comments(source_text);
if let Some(c) = text1.1.chars().next() {
let text2 = (text1.0 + c.len_utf8(), &text1.1[c.len_utf8()..]);
match c {
'{' => (Token::OpenInner, text2),
'}' => (Token::CloseInner, text2),
'[' => (Token::OpenLeaf, text2),
']' => (Token::CloseLeaf, text2),
_ => {
if is_ident_char(c) {
// Parse type
let mut si = 0;
let mut reached_end = true;
for (i, c) in text1.1.char_indices() {
si = i;
if !is_ident_char(c) {
reached_end = false;
break;
}
}
if reached_end {
si = text1.1.len();
}
(Token::Name(&text1.1[0..si]), (text1.0 + si, &text1.1[si..]))
} else {
(Token::Unknown, text1)
}
}
}
} else {
(Token::End, text1)
}
}
fn is_ws(c: char) -> bool {
matches!(c, '\n' | '\r' | '\t' | ' ')
}
fn is_nl(c: char) -> bool {
c == '\n'
}
fn is_ident_char(c: char) -> bool {
c.is_alphanumeric() || c == '-' || c == '_'
}
fn skip_ws(text: &str) -> &str {
let mut si = 0;
let mut reached_end = true;
for (i, c) in text.char_indices() {
si = i;
if !is_ws(c) {
reached_end = false;
break;
}
}
if reached_end {
si = text.len();
}
&text[si..]
}
fn skip_comment(text: &str) -> &str {
let mut si = 0;
if text.starts_with('#') {
let mut reached_end = true;
for (i, c) in text.char_indices() {
si = i;
if is_nl(c) {
reached_end = false;
break;
}
}
if reached_end {
si = text.len();
}
}
&text[si..]
}
fn skip_ws_and_comments(text: (usize, &str)) -> (usize, &str) {
let mut remaining_text = text.1;
loop {
let tmp = skip_comment(skip_ws(remaining_text));
if tmp.len() == remaining_text.len() {
break;
} else {
remaining_text = tmp;
}
}
let offset = text.0 + text.1.len() - remaining_text.len();
(offset, remaining_text)
}
//--------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use super::{next_token, Token};
#[test]
fn tokenize_01() {
let input = (0, "Thing");
assert_eq!(next_token(input), (Token::Name("Thing"), (5, "")));
}
#[test]
fn tokenize_02() {
let input = (0, " \n# gdfgdf gfdg dggdf\\sg dfgsd \n Thing");
assert_eq!(next_token(input), (Token::Name("Thing"), (41, "")));
}
#[test]
fn tokenize_03() {
let input1 = (0, " Thing { }");
let (token1, input2) = next_token(input1);
let (token2, input3) = next_token(input2);
let (token3, input4) = next_token(input3);
assert_eq!((token1, input2.1), (Token::Name("Thing"), " { }"));
assert_eq!((token2, input3.1), (Token::OpenInner, " }"));
assert_eq!((token3, input4.1), (Token::CloseInner, ""));
}
#[test]
fn tokenize_04() {
let input1 = (0, " hi the[re");
let (token1, input2) = next_token(input1);
let (token2, input3) = next_token(input2);
let (token3, input4) = next_token(input3);
let (token4, input5) = next_token(input4);
let (token5, input6) = next_token(input5);
assert_eq!((token1, input2), (Token::Name("hi"), (3, " the[re")));
assert_eq!((token2, input3), (Token::Name("the"), (7, "[re")));
assert_eq!((token3, input4), (Token::OpenLeaf, (8, "re")));
assert_eq!((token4, input5), (Token::Name("re"), (10, "")));
assert_eq!((token5, input6), (Token::End, (10, "")));
}
#[test]
fn tokenize_05() {
let input1 = (0, "Thing { # A comment\n\tThing2 []\n}");
let (token1, input2) = next_token(input1);
let (token2, input3) = next_token(input2);
let (token3, input4) = next_token(input3);
let (token4, input5) = next_token(input4);
let (token5, input6) = next_token(input5);
let (token6, input7) = next_token(input6);
let (token7, input8) = next_token(input7);
assert_eq!(
(token1, input2),
(Token::Name("Thing"), (5, " { # A comment\n\tThing2 []\n}",))
);
assert_eq!(
(token2, input3),
(Token::OpenInner, (7, " # A comment\n\tThing2 []\n}",))
);
assert_eq!((token3, input4), (Token::Name("Thing2"), (27, " []\n}")));
assert_eq!((token4, input5), (Token::OpenLeaf, (29, "]\n}")));
assert_eq!((token5, input6), (Token::CloseLeaf, (30, "\n}")));
assert_eq!((token6, input7), (Token::CloseInner, (32, "")));
assert_eq!((token7, input8), (Token::End, (32, "")));
}
#[test]
fn try_parse_event_01() {
assert_eq!(try_parse_event("H"), ParseEventParse::IncompleteData,);
}
#[test]
fn try_parse_event_02() {
assert_eq!(try_parse_event("Hello "), ParseEventParse::IncompleteData,);
}
#[test]
fn try_parse_event_03() {
assert_eq!(
try_parse_event("Hello {"),
ParseEventParse::Ok(
ParseEvent::InnerOpen {
type_name: "Hello",
byte_offset: 0,
},
7
),
);
}
#[test]
fn try_parse_event_04() {
assert_eq!(
try_parse_event(" Hello {"),
ParseEventParse::Ok(
ParseEvent::InnerOpen {
type_name: "Hello",
byte_offset: 2,
},
9
),
);
}
#[test]
fn try_parse_event_05() {
assert_eq!(
try_parse_event("Hello { "),
ParseEventParse::Ok(
ParseEvent::InnerOpen {
type_name: "Hello",
byte_offset: 0,
},
7
),
);
}
#[test]
fn try_parse_event_06() {
assert_eq!(try_parse_event("Hello ["), ParseEventParse::IncompleteData,);
}
#[test]
fn try_parse_event_07() {
assert_eq!(
try_parse_event("Hello [some contents"),
ParseEventParse::IncompleteData,
);
}
#[test]
fn try_parse_event_08() {
assert_eq!(
try_parse_event("Hello [some contents]"),
ParseEventParse::Ok(
ParseEvent::Leaf {
type_name: "Hello",
contents: "some contents",
byte_offset: 0,
},
21
),
);
}
#[test]
fn try_parse_event_09() {
assert_eq!(
try_parse_event("Hello [some contents] "),
ParseEventParse::Ok(
ParseEvent::Leaf {
type_name: "Hello",
contents: "some contents",
byte_offset: 0,
},
21
),
);
}
#[test]
fn try_parse_event_10() {
assert_eq!(
try_parse_event(r#"Hello [some \co\]ntents]"#),
ParseEventParse::Ok(
ParseEvent::Leaf {
type_name: "Hello",
contents: r#"some \co\]ntents"#,
byte_offset: 0,
},
24
),
);
}
#[test]
fn try_parse_event_11() {
assert_eq!(
try_parse_event(" # A comment\n\n "),
ParseEventParse::ReachedEnd,
);
}
#[test]
fn parser_01() {
let mut parser = Parser::new();
parser.push_data("Hello");
assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput));
parser.push_data("{");
assert_eq!(
parser.next_event(),
Ok(ParseEvent::InnerOpen {
type_name: "Hello",
byte_offset: 0,
})
);
assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput));
parser.push_data("}");
assert_eq!(
parser.next_event(),
Ok(ParseEvent::InnerClose { byte_offset: 6 })
);
assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd));
}
#[test]
fn parser_02() {
let mut parser = Parser::new();
parser.push_data("Hello");
assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput));
parser.push_data("[");
assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput));
parser.push_data("1.0 2.0 3.");
assert_eq!(parser.next_event(), Ok(ParseEvent::NeedMoreInput));
parser.push_data("0]");
assert_eq!(
parser.next_event(),
Ok(ParseEvent::Leaf {
type_name: "Hello",
contents: "1.0 2.0 3.0",
byte_offset: 0,
})
);
assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd));
}
#[test]
fn parser_03() {
let mut parser = Parser::new();
parser.push_data("Hello { World [1.0 2.0 3.0] }");
assert_eq!(
parser.next_event(),
Ok(ParseEvent::InnerOpen {
type_name: "Hello",
byte_offset: 0,
})
);
assert_eq!(
parser.next_event(),
Ok(ParseEvent::Leaf {
type_name: "World",
contents: "1.0 2.0 3.0",
byte_offset: 8,
})
);
assert_eq!(
parser.next_event(),
Ok(ParseEvent::InnerClose { byte_offset: 28 })
);
// Make sure repeated calls are stable.
assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd));
assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd));
assert_eq!(parser.next_event(), Ok(ParseEvent::ValidEnd));
}
#[test]
fn parser_04() {
let mut parser = Parser::new();
parser.push_data("$%^&");
assert_eq!(parser.next_event(), Err(ParseError::ExpectedNameOrClose(0)));
}
#[test]
fn parser_05() {
let mut parser = Parser::new();
parser.push_data("Hello]");
assert_eq!(parser.next_event(), Err(ParseError::ExpectedOpen(5)));
}
#[test]
fn parser_06() {
let mut parser = Parser::new();
parser.push_data("Hello}");
assert_eq!(parser.next_event(), Err(ParseError::ExpectedOpen(5)));
}
#[test]
fn parser_07() {
let mut parser = Parser::new();
parser.push_data("Hello $*@^ [");
assert_eq!(parser.next_event(), Err(ParseError::ExpectedOpen(6)));
}
#[test]
fn parser_08() {
let mut parser = Parser::new();
parser.push_data("}");
assert_eq!(parser.next_event(), Err(ParseError::UnexpectedClose(0)));
}
}

View File

@ -1,28 +0,0 @@
#![allow(dead_code)]
mod normal;
mod point;
mod transform;
mod vector;
pub use self::{normal::Normal, point::Point, transform::Transform, vector::Vector};
/// Trait for calculating dot products.
pub trait DotProduct {
fn dot(self, other: Self) -> f32;
}
#[inline]
pub fn dot<T: DotProduct>(a: T, b: T) -> f32 {
a.dot(b)
}
/// Trait for calculating cross products.
pub trait CrossProduct {
fn cross(self, other: Self) -> Self;
}
#[inline]
pub fn cross<T: CrossProduct>(a: T, b: T) -> T {
a.cross(b)
}

View File

@ -1,270 +0,0 @@
#![allow(dead_code)]
use std::{
cmp::PartialEq,
ops::{Add, Div, Mul, Neg, Sub},
};
use glam::Vec3A;
use super::{CrossProduct, DotProduct, Transform, Vector};
/// A surface normal in 3d homogeneous space.
#[derive(Debug, Copy, Clone)]
pub struct Normal {
pub co: Vec3A,
}
impl Normal {
#[inline(always)]
pub fn new(x: f32, y: f32, z: f32) -> Normal {
Normal {
co: Vec3A::new(x, y, z),
}
}
#[inline(always)]
pub fn length(&self) -> f32 {
self.co.length()
}
#[inline(always)]
pub fn length2(&self) -> f32 {
self.co.length_squared()
}
#[inline(always)]
pub fn normalized(&self) -> Normal {
Normal {
co: self.co.normalize(),
}
}
#[inline(always)]
pub fn into_vector(self) -> Vector {
Vector { co: self.co }
}
#[inline(always)]
pub fn get_n(&self, n: usize) -> f32 {
match n {
0 => self.x(),
1 => self.y(),
2 => self.z(),
_ => panic!("Attempt to access dimension beyond z."),
}
}
#[inline(always)]
pub fn x(&self) -> f32 {
self.co[0]
}
#[inline(always)]
pub fn y(&self) -> f32 {
self.co[1]
}
#[inline(always)]
pub fn z(&self) -> f32 {
self.co[2]
}
#[inline(always)]
pub fn set_x(&mut self, x: f32) {
self.co[0] = x;
}
#[inline(always)]
pub fn set_y(&mut self, y: f32) {
self.co[1] = y;
}
#[inline(always)]
pub fn set_z(&mut self, z: f32) {
self.co[2] = z;
}
}
impl PartialEq for Normal {
#[inline(always)]
fn eq(&self, other: &Normal) -> bool {
self.co == other.co
}
}
impl Add for Normal {
type Output = Normal;
#[inline(always)]
fn add(self, other: Normal) -> Normal {
Normal {
co: self.co + other.co,
}
}
}
impl Sub for Normal {
type Output = Normal;
#[inline(always)]
fn sub(self, other: Normal) -> Normal {
Normal {
co: self.co - other.co,
}
}
}
impl Mul<f32> for Normal {
type Output = Normal;
#[inline(always)]
fn mul(self, other: f32) -> Normal {
Normal {
co: self.co * other,
}
}
}
impl Mul<Transform> for Normal {
type Output = Normal;
#[inline]
fn mul(self, other: Transform) -> Normal {
Normal {
co: other.0.matrix3.inverse().transpose().mul_vec3a(self.co),
}
}
}
impl Div<f32> for Normal {
type Output = Normal;
#[inline(always)]
fn div(self, other: f32) -> Normal {
Normal {
co: self.co / other,
}
}
}
impl Neg for Normal {
type Output = Normal;
#[inline(always)]
fn neg(self) -> Normal {
Normal { co: self.co * -1.0 }
}
}
impl DotProduct for Normal {
#[inline(always)]
fn dot(self, other: Normal) -> f32 {
self.co.dot(other.co)
}
}
impl CrossProduct for Normal {
#[inline]
fn cross(self, other: Normal) -> Normal {
Normal {
co: self.co.cross(other.co),
}
}
}
#[cfg(test)]
mod tests {
use super::super::{CrossProduct, DotProduct, Transform};
use super::*;
use approx::assert_ulps_eq;
#[test]
fn add() {
let v1 = Normal::new(1.0, 2.0, 3.0);
let v2 = Normal::new(1.5, 4.5, 2.5);
let v3 = Normal::new(2.5, 6.5, 5.5);
assert_eq!(v3, v1 + v2);
}
#[test]
fn sub() {
let v1 = Normal::new(1.0, 2.0, 3.0);
let v2 = Normal::new(1.5, 4.5, 2.5);
let v3 = Normal::new(-0.5, -2.5, 0.5);
assert_eq!(v3, v1 - v2);
}
#[test]
fn mul_scalar() {
let v1 = Normal::new(1.0, 2.0, 3.0);
let v2 = 2.0;
let v3 = Normal::new(2.0, 4.0, 6.0);
assert_eq!(v3, v1 * v2);
}
#[test]
fn mul_matrix_1() {
let n = Normal::new(1.0, 2.5, 4.0);
let m = Transform::new_from_values(
1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
);
let nm = n * m;
let nm2 = Normal::new(-4.0625, 1.78125, -0.03125);
for i in 0..3 {
assert_ulps_eq!(nm.co[i], nm2.co[i], max_ulps = 4);
}
}
#[test]
fn div() {
let v1 = Normal::new(1.0, 2.0, 3.0);
let v2 = 2.0;
let v3 = Normal::new(0.5, 1.0, 1.5);
assert_eq!(v3, v1 / v2);
}
#[test]
fn length() {
let n = Normal::new(1.0, 2.0, 3.0);
assert!((n.length() - 3.7416573867739413).abs() < 0.000001);
}
#[test]
fn length2() {
let n = Normal::new(1.0, 2.0, 3.0);
assert_eq!(n.length2(), 14.0);
}
#[test]
fn normalized() {
let n1 = Normal::new(1.0, 2.0, 3.0);
let n2 = Normal::new(0.2672612419124244, 0.5345224838248488, 0.8017837257372732);
let n3 = n1.normalized();
assert!((n3.x() - n2.x()).abs() < 0.000001);
assert!((n3.y() - n2.y()).abs() < 0.000001);
assert!((n3.z() - n2.z()).abs() < 0.000001);
}
#[test]
fn dot_test() {
let v1 = Normal::new(1.0, 2.0, 3.0);
let v2 = Normal::new(1.5, 4.5, 2.5);
let v3 = 18.0f32;
assert_eq!(v3, v1.dot(v2));
}
#[test]
fn cross_test() {
let v1 = Normal::new(1.0, 0.0, 0.0);
let v2 = Normal::new(0.0, 1.0, 0.0);
let v3 = Normal::new(0.0, 0.0, 1.0);
assert_eq!(v3, v1.cross(v2));
}
}

View File

@ -1,202 +0,0 @@
#![allow(dead_code)]
use std::{
cmp::PartialEq,
ops::{Add, Mul, Sub},
};
use glam::Vec3A;
use super::{Transform, Vector};
/// A position in 3d homogeneous space.
#[derive(Debug, Copy, Clone)]
pub struct Point {
pub co: Vec3A,
}
impl Point {
#[inline(always)]
pub fn new(x: f32, y: f32, z: f32) -> Point {
Point {
co: Vec3A::new(x, y, z),
}
}
#[inline(always)]
pub fn min(&self, other: Point) -> Point {
let n1 = self;
let n2 = other;
Point {
co: n1.co.min(n2.co),
}
}
#[inline(always)]
pub fn max(&self, other: Point) -> Point {
let n1 = self;
let n2 = other;
Point {
co: n1.co.max(n2.co),
}
}
#[inline(always)]
pub fn into_vector(self) -> Vector {
Vector { co: self.co }
}
#[inline(always)]
pub fn get_n(&self, n: usize) -> f32 {
match n {
0 => self.x(),
1 => self.y(),
2 => self.z(),
_ => panic!("Attempt to access dimension beyond z."),
}
}
#[inline(always)]
pub fn x(&self) -> f32 {
self.co[0]
}
#[inline(always)]
pub fn y(&self) -> f32 {
self.co[1]
}
#[inline(always)]
pub fn z(&self) -> f32 {
self.co[2]
}
#[inline(always)]
pub fn set_x(&mut self, x: f32) {
self.co[0] = x;
}
#[inline(always)]
pub fn set_y(&mut self, y: f32) {
self.co[1] = y;
}
#[inline(always)]
pub fn set_z(&mut self, z: f32) {
self.co[2] = z;
}
}
impl PartialEq for Point {
#[inline(always)]
fn eq(&self, other: &Point) -> bool {
self.co == other.co
}
}
impl Add<Vector> for Point {
type Output = Point;
#[inline(always)]
fn add(self, other: Vector) -> Point {
Point {
co: self.co + other.co,
}
}
}
impl Sub for Point {
type Output = Vector;
#[inline(always)]
fn sub(self, other: Point) -> Vector {
Vector {
co: self.co - other.co,
}
}
}
impl Sub<Vector> for Point {
type Output = Point;
#[inline(always)]
fn sub(self, other: Vector) -> Point {
Point {
co: self.co - other.co,
}
}
}
impl Mul<Transform> for Point {
type Output = Point;
#[inline]
fn mul(self, other: Transform) -> Point {
Point {
co: other.0.transform_point3a(self.co),
}
}
}
#[cfg(test)]
mod tests {
use super::super::{Transform, Vector};
use super::*;
#[test]
fn add() {
let p1 = Point::new(1.0, 2.0, 3.0);
let v1 = Vector::new(1.5, 4.5, 2.5);
let p2 = Point::new(2.5, 6.5, 5.5);
assert_eq!(p2, p1 + v1);
}
#[test]
fn sub() {
let p1 = Point::new(1.0, 2.0, 3.0);
let p2 = Point::new(1.5, 4.5, 2.5);
let v1 = Vector::new(-0.5, -2.5, 0.5);
assert_eq!(v1, p1 - p2);
}
#[test]
fn mul_matrix_1() {
let p = Point::new(1.0, 2.5, 4.0);
let m = Transform::new_from_values(
1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
);
let pm = Point::new(15.5, 54.0, 70.0);
assert_eq!(p * m, pm);
}
#[test]
fn mul_matrix_2() {
let p = Point::new(1.0, 2.5, 4.0);
let m = Transform::new_from_values(
1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
);
let pm = Point::new(15.5, 54.0, 70.0);
assert_eq!(p * m, pm);
}
#[test]
fn mul_matrix_3() {
// Make sure matrix multiplication composes the way one would expect
let p = Point::new(1.0, 2.5, 4.0);
let m1 = Transform::new_from_values(
1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
);
let m2 =
Transform::new_from_values(4.0, 1.0, 2.0, 3.5, 3.0, 6.0, 5.0, 2.0, 2.0, 2.0, 4.0, 12.0);
println!("{:?}", m1 * m2);
let pmm1 = p * (m1 * m2);
let pmm2 = (p * m1) * m2;
assert!((pmm1 - pmm2).length2() <= 0.00001); // Assert pmm1 and pmm2 are roughly equal
}
}

View File

@ -1,178 +0,0 @@
#![allow(dead_code)]
use std::ops::{Add, Mul};
use approx::relative_eq;
use glam::{Affine3A, Mat3, Mat4, Vec3};
use super::Point;
/// A 4x3 affine transform matrix, used for transforms.
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct Transform(pub Affine3A);
impl Transform {
/// Creates a new identity matrix
#[inline]
pub fn new() -> Transform {
Transform(Affine3A::IDENTITY)
}
/// Creates a new matrix with the specified values:
/// a b c d
/// e f g h
/// i j k l
/// m n o p
#[inline]
#[allow(clippy::many_single_char_names)]
#[allow(clippy::too_many_arguments)]
pub fn new_from_values(
a: f32,
b: f32,
c: f32,
d: f32,
e: f32,
f: f32,
g: f32,
h: f32,
i: f32,
j: f32,
k: f32,
l: f32,
) -> Transform {
Transform(Affine3A::from_mat3_translation(
Mat3::from_cols(Vec3::new(a, e, i), Vec3::new(b, f, j), Vec3::new(c, g, k)),
Vec3::new(d, h, l),
))
}
#[inline]
pub fn from_location(loc: Point) -> Transform {
Transform(Affine3A::from_translation(loc.co.into()))
}
/// Returns whether the matrices are approximately equal to each other.
/// Each corresponding element in the matrices cannot have a relative
/// error exceeding epsilon.
#[inline]
pub fn aprx_eq(&self, other: Transform, epsilon: f32) -> bool {
let mut eq = true;
for c in 0..3 {
for r in 0..3 {
let a = self.0.matrix3.col(c)[r];
let b = other.0.matrix3.col(c)[r];
eq &= relative_eq!(a, b, epsilon = epsilon);
}
}
for i in 0..3 {
let a = self.0.translation[i];
let b = other.0.translation[i];
eq &= relative_eq!(a, b, epsilon = epsilon);
}
eq
}
/// Returns the inverse of the Matrix
#[inline]
pub fn inverse(&self) -> Transform {
Transform(self.0.inverse())
}
}
impl Default for Transform {
fn default() -> Self {
Self::new()
}
}
/// Multiply two matrices together
impl Mul for Transform {
type Output = Self;
#[inline]
fn mul(self, other: Self) -> Self {
Self(other.0 * self.0)
}
}
/// Multiply a matrix by a f32
impl Mul<f32> for Transform {
type Output = Self;
#[inline]
fn mul(self, other: f32) -> Self {
Self(Affine3A::from_mat4(Mat4::from(self.0) * other))
}
}
/// Add two matrices together
impl Add for Transform {
type Output = Self;
#[inline]
fn add(self, other: Self) -> Self {
Self(Affine3A::from_mat4(
Mat4::from(self.0) + Mat4::from(other.0),
))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn equality_test() {
let a = Transform::new();
let b = Transform::new();
let c =
Transform::new_from_values(1.1, 0.0, 0.0, 0.0, 0.0, 1.1, 0.0, 0.0, 0.0, 0.0, 1.1, 0.0);
assert_eq!(a, b);
assert!(a != c);
}
#[test]
fn approximate_equality_test() {
let a = Transform::new();
let b = Transform::new_from_values(
1.000001, 0.0, 0.0, 0.0, 0.0, 1.000001, 0.0, 0.0, 0.0, 0.0, 1.000001, 0.0,
);
let c = Transform::new_from_values(
1.000003, 0.0, 0.0, 0.0, 0.0, 1.000003, 0.0, 0.0, 0.0, 0.0, 1.000003, 0.0,
);
let d = Transform::new_from_values(
-1.000001, 0.0, 0.0, 0.0, 0.0, -1.000001, 0.0, 0.0, 0.0, 0.0, -1.000001, 0.0,
);
assert!(a.aprx_eq(b, 0.000001));
assert!(!a.aprx_eq(c, 0.000001));
assert!(!a.aprx_eq(d, 0.000001));
}
#[test]
fn multiply_test() {
let a = Transform::new_from_values(
1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
);
let b = Transform::new_from_values(
1.0, 5.0, 9.0, 13.0, 2.0, 6.0, 10.0, 14.0, 3.0, 7.0, 11.0, 15.0,
);
let c = Transform::new_from_values(
97.0, 50.0, 136.0, 162.5, 110.0, 60.0, 156.0, 185.0, 123.0, 70.0, 176.0, 207.5,
);
assert_eq!(a * b, c);
}
#[test]
fn inverse_test() {
let a = Transform::new_from_values(
1.0, 0.33, 0.0, -2.0, 0.0, 1.0, 0.0, 0.0, 2.1, 0.7, 1.3, 0.0,
);
let b = a.inverse();
let c = Transform::new();
assert!((dbg!(a * b)).aprx_eq(dbg!(c), 0.0000001));
}
}

View File

@ -1,286 +0,0 @@
#![allow(dead_code)]
use std::{
cmp::PartialEq,
ops::{Add, Div, Mul, Neg, Sub},
};
use glam::Vec3A;
use super::{CrossProduct, DotProduct, Normal, Point, Transform};
/// A direction vector in 3d homogeneous space.
#[derive(Debug, Copy, Clone)]
pub struct Vector {
pub co: Vec3A,
}
impl Vector {
#[inline(always)]
pub fn new(x: f32, y: f32, z: f32) -> Vector {
Vector {
co: Vec3A::new(x, y, z),
}
}
#[inline(always)]
pub fn length(&self) -> f32 {
self.co.length()
}
#[inline(always)]
pub fn length2(&self) -> f32 {
self.co.length_squared()
}
#[inline(always)]
pub fn normalized(&self) -> Vector {
Vector {
co: self.co.normalize(),
}
}
#[inline(always)]
pub fn abs(&self) -> Vector {
Vector {
co: self.co * self.co.signum(),
}
}
#[inline(always)]
pub fn into_point(self) -> Point {
Point { co: self.co }
}
#[inline(always)]
pub fn into_normal(self) -> Normal {
Normal { co: self.co }
}
#[inline(always)]
pub fn get_n(&self, n: usize) -> f32 {
match n {
0 => self.x(),
1 => self.y(),
2 => self.z(),
_ => panic!("Attempt to access dimension beyond z."),
}
}
#[inline(always)]
pub fn x(&self) -> f32 {
self.co[0]
}
#[inline(always)]
pub fn y(&self) -> f32 {
self.co[1]
}
#[inline(always)]
pub fn z(&self) -> f32 {
self.co[2]
}
#[inline(always)]
pub fn set_x(&mut self, x: f32) {
self.co[0] = x;
}
#[inline(always)]
pub fn set_y(&mut self, y: f32) {
self.co[1] = y;
}
#[inline(always)]
pub fn set_z(&mut self, z: f32) {
self.co[2] = z;
}
}
impl PartialEq for Vector {
#[inline(always)]
fn eq(&self, other: &Vector) -> bool {
self.co == other.co
}
}
impl Add for Vector {
type Output = Vector;
#[inline(always)]
fn add(self, other: Vector) -> Vector {
Vector {
co: self.co + other.co,
}
}
}
impl Sub for Vector {
type Output = Vector;
#[inline(always)]
fn sub(self, other: Vector) -> Vector {
Vector {
co: self.co - other.co,
}
}
}
impl Mul<f32> for Vector {
type Output = Vector;
#[inline(always)]
fn mul(self, other: f32) -> Vector {
Vector {
co: self.co * other,
}
}
}
impl Mul<Transform> for Vector {
type Output = Vector;
#[inline]
fn mul(self, other: Transform) -> Vector {
Vector {
co: other.0.transform_vector3a(self.co),
}
}
}
impl Div<f32> for Vector {
type Output = Vector;
#[inline(always)]
fn div(self, other: f32) -> Vector {
Vector {
co: self.co / other,
}
}
}
impl Neg for Vector {
type Output = Vector;
#[inline(always)]
fn neg(self) -> Vector {
Vector { co: self.co * -1.0 }
}
}
impl DotProduct for Vector {
#[inline(always)]
fn dot(self, other: Vector) -> f32 {
self.co.dot(other.co)
}
}
impl CrossProduct for Vector {
#[inline]
fn cross(self, other: Vector) -> Vector {
Vector {
co: self.co.cross(other.co),
}
}
}
#[cfg(test)]
mod tests {
use super::super::{CrossProduct, DotProduct, Transform};
use super::*;
#[test]
fn add() {
let v1 = Vector::new(1.0, 2.0, 3.0);
let v2 = Vector::new(1.5, 4.5, 2.5);
let v3 = Vector::new(2.5, 6.5, 5.5);
assert_eq!(v3, v1 + v2);
}
#[test]
fn sub() {
let v1 = Vector::new(1.0, 2.0, 3.0);
let v2 = Vector::new(1.5, 4.5, 2.5);
let v3 = Vector::new(-0.5, -2.5, 0.5);
assert_eq!(v3, v1 - v2);
}
#[test]
fn mul_scalar() {
let v1 = Vector::new(1.0, 2.0, 3.0);
let v2 = 2.0;
let v3 = Vector::new(2.0, 4.0, 6.0);
assert_eq!(v3, v1 * v2);
}
#[test]
fn mul_matrix_1() {
let v = Vector::new(1.0, 2.5, 4.0);
let m = Transform::new_from_values(
1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
);
assert_eq!(v * m, Vector::new(14.0, 46.0, 58.0));
}
#[test]
fn mul_matrix_2() {
let v = Vector::new(1.0, 2.5, 4.0);
let m = Transform::new_from_values(
1.0, 2.0, 2.0, 1.5, 3.0, 6.0, 7.0, 8.0, 9.0, 2.0, 11.0, 12.0,
);
assert_eq!(v * m, Vector::new(14.0, 46.0, 58.0));
}
#[test]
fn div() {
let v1 = Vector::new(1.0, 2.0, 3.0);
let v2 = 2.0;
let v3 = Vector::new(0.5, 1.0, 1.5);
assert_eq!(v3, v1 / v2);
}
#[test]
fn length() {
let v = Vector::new(1.0, 2.0, 3.0);
assert!((v.length() - 3.7416573867739413).abs() < 0.000001);
}
#[test]
fn length2() {
let v = Vector::new(1.0, 2.0, 3.0);
assert_eq!(v.length2(), 14.0);
}
#[test]
fn normalized() {
let v1 = Vector::new(1.0, 2.0, 3.0);
let v2 = Vector::new(0.2672612419124244, 0.5345224838248488, 0.8017837257372732);
let v3 = v1.normalized();
assert!((v3.x() - v2.x()).abs() < 0.000001);
assert!((v3.y() - v2.y()).abs() < 0.000001);
assert!((v3.z() - v2.z()).abs() < 0.000001);
}
#[test]
fn dot_test() {
let v1 = Vector::new(1.0, 2.0, 3.0);
let v2 = Vector::new(1.5, 4.5, 2.5);
let v3 = 18.0f32;
assert_eq!(v3, v1.dot(v2));
}
#[test]
fn cross_test() {
let v1 = Vector::new(1.0, 0.0, 0.0);
let v2 = Vector::new(0.0, 1.0, 0.0);
let v3 = Vector::new(0.0, 0.0, 1.0);
assert_eq!(v3, v1.cross(v2));
}
}

View File

@ -1,15 +1,18 @@
[package]
name = "math3d"
name = "rmath"
version = "0.1.0"
authors = ["Nathan Vegdahl <cessen@cessen.com>"]
edition = "2018"
license = "MIT, Apache 2.0"
[lib]
name = "math3d"
name = "rmath"
path = "src/lib.rs"
# Local crate dependencies
[dependencies]
glam = "0.15"
approx = "0.4"
[dev-dependencies]
bencher = "0.1.5"
rand = "0.6"
[[bench]]
name = "bench"
harness = false

View File

@ -0,0 +1,202 @@
use bencher::{benchmark_group, benchmark_main, black_box, Bencher};
use rand::{rngs::SmallRng, FromEntropy, Rng};
use rmath::{CrossProduct, DotProduct, Normal, Point, Vector, Xform, XformFull};
//----
fn vector_cross_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let v1 = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
let v2 = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
for _ in 0..10000 {
black_box(black_box(v1).cross(black_box(v2)));
}
});
}
fn vector_dot_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let v1 = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
let v2 = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
for _ in 0..10000 {
black_box(black_box(v1).dot(black_box(v2)));
}
});
}
fn xform_vector_mul_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let v = Vector::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
let x = Xform::new(
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
);
for _ in 0..10000 {
black_box(black_box(v).xform(black_box(&x)));
}
});
}
fn xform_point_mul_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let p = Point::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
let x = Xform::new(
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
);
for _ in 0..10000 {
black_box(black_box(p).xform(black_box(&x)));
}
});
}
fn xform_point_mul_inv_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let p = Point::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
let x = Xform::new(
1.0,
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
1.0,
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
1.0,
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
)
.to_full()
.unwrap();
for _ in 0..10000 {
black_box(black_box(p).xform_inv(black_box(&x)));
}
});
}
fn xform_normal_mul_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let n = Normal::new(rng.gen::<f32>(), rng.gen::<f32>(), rng.gen::<f32>());
let x = Xform::new(
1.0,
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
1.0,
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
1.0,
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
)
.to_full()
.unwrap();
for _ in 0..10000 {
black_box(black_box(n).xform(black_box(&x)));
}
});
}
fn xform_xform_mul_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let x1 = Xform::new(
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
);
let x2 = Xform::new(
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
);
for _ in 0..10000 {
black_box(black_box(x1).compose(black_box(&x2)));
}
});
}
fn xform_to_xformfull_10000(bench: &mut Bencher) {
let mut rng = SmallRng::from_entropy();
bench.iter(|| {
let x = Xform::new(
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
rng.gen::<f32>(),
);
for _ in 0..10000 {
black_box(black_box(x).to_full());
}
});
}
//----
benchmark_group!(
benches,
vector_cross_10000,
vector_dot_10000,
xform_vector_mul_10000,
xform_point_mul_10000,
xform_point_mul_inv_10000,
xform_normal_mul_10000,
xform_xform_mul_10000,
xform_to_xformfull_10000,
);
benchmark_main!(benches);

View File

@ -0,0 +1,268 @@
use rand::{rngs::SmallRng, FromEntropy, Rng};
use rmath::{utils::ulp_diff, wide4::Float4};
type D4 = [f64; 4];
fn main() {
let mut rng = SmallRng::from_entropy();
// Convenience functions for generating random Float4's.
let mut rf4 = || {
let mut rf = || {
let range = 268435456.0;
let n = rng.gen::<f64>();
((n * range * 2.0) - range) as f32
};
Float4::new(rf(), rf(), rf(), rf())
};
// Dot product test.
println!("Dot product:");
{
let mut max_ulp_diff = 0u32;
for _ in 0..10000000 {
let v1 = rf4();
let v2 = rf4();
let dpa = Float4::dot_3(v1, v2);
let dpb = dot_3(f4_to_d4(v1), f4_to_d4(v2));
let ud = ulp_diff(dpa, dpb as f32);
max_ulp_diff = max_ulp_diff.max(ud);
}
println!(" Max error (ulps):\n {:?}\n", max_ulp_diff);
}
// Cross product test.
println!("Cross product:");
{
let mut max_ulp_diff = [0u32; 4];
for _ in 0..10000000 {
let v1 = rf4();
let v2 = rf4();
let v3a = Float4::cross_3(v1, v2);
let v3b = cross_3(f4_to_d4(v1), f4_to_d4(v2));
let ud = ulp_diff_f4d4(v3a, v3b);
for i in 0..4 {
max_ulp_diff[i] = max_ulp_diff[i].max(ud[i]);
}
}
println!(" Max error (ulps):\n {:?}\n", max_ulp_diff);
}
// Matrix inversion test.
println!("Matrix inversion:");
{
let mut max_ulp_diff = [[0u32; 4]; 3];
let mut det_ulp_hist = [0u32; 9];
for _ in 0..2000000 {
let m = [rf4(), rf4(), rf4()];
let ima = Float4::invert_3x3_w_det(&m);
let imb = invert_3x3([f4_to_d4(m[0]), f4_to_d4(m[1]), f4_to_d4(m[2])]);
if let (Some((ima, deta)), Some((imb, detb))) = (ima, imb) {
let det_ulp_diff = ulp_diff(deta, detb as f32);
let mut hist_upper = 0;
for i in 0..det_ulp_hist.len() {
if det_ulp_diff <= hist_upper {
det_ulp_hist[i] += 1;
break;
}
if hist_upper == 0 {
hist_upper += 1;
} else {
hist_upper *= 10;
}
}
if det_ulp_diff == 0 {
for i in 0..3 {
let ud = ulp_diff_f4d4(ima[i], imb[i]);
for j in 0..4 {
max_ulp_diff[i][j] = max_ulp_diff[i][j].max(ud[j]);
}
}
}
}
}
println!(
" Max error when determinant has 0-ulp error (ulps):\n {:?}",
max_ulp_diff
);
let total: u32 = det_ulp_hist.iter().sum();
let mut ulp = 0;
let mut sum = 0;
println!(" Determinant error distribution:");
for h in det_ulp_hist.iter() {
sum += *h;
println!(
" {:.8}% <= {} ulps",
sum as f64 / total as f64 * 100.0,
ulp
);
if ulp == 0 {
ulp += 1;
} else {
ulp *= 10;
}
}
println!();
}
}
//-------------------------------------------------------------
fn f4_to_d4(v: Float4) -> D4 {
[v.a() as f64, v.b() as f64, v.c() as f64, v.d() as f64]
}
fn ulp_diff_f4d4(a: Float4, b: D4) -> [u32; 4] {
[
ulp_diff(a.a(), b[0] as f32),
ulp_diff(a.b(), b[1] as f32),
ulp_diff(a.c(), b[2] as f32),
ulp_diff(a.d(), b[3] as f32),
]
}
//-------------------------------------------------------------
fn dot_3(a: D4, b: D4) -> f64 {
// Products.
let (x, x_err) = two_prod(a[0], b[0]);
let (y, y_err) = two_prod(a[1], b[1]);
let (z, z_err) = two_prod(a[2], b[2]);
// Sums.
let (s1, s1_err) = two_sum(x, y);
let err1 = x_err + (y_err + s1_err);
let (s2, s2_err) = two_sum(s1, z);
let err2 = z_err + (err1 + s2_err);
// Final result with rounding error compensation.
s2 + err2
}
fn cross_3(a: D4, b: D4) -> D4 {
[
difference_of_products(a[1], b[2], a[2], b[1]),
difference_of_products(a[2], b[0], a[0], b[2]),
difference_of_products(a[0], b[1], a[1], b[0]),
difference_of_products(a[3], b[3], a[3], b[3]),
]
}
fn invert_3x3(m: [D4; 3]) -> Option<([D4; 3], f64)> {
let m0_bca = [m[0][1], m[0][2], m[0][0], m[0][3]];
let m1_bca = [m[1][1], m[1][2], m[1][0], m[1][3]];
let m2_bca = [m[2][1], m[2][2], m[2][0], m[2][3]];
let m0_cab = [m[0][2], m[0][0], m[0][1], m[0][3]];
let m1_cab = [m[1][2], m[1][0], m[1][1], m[1][3]];
let m2_cab = [m[2][2], m[2][0], m[2][1], m[2][3]];
let abc = [
difference_of_products(m1_bca[0], m2_cab[0], m1_cab[0], m2_bca[0]),
difference_of_products(m1_bca[1], m2_cab[1], m1_cab[1], m2_bca[1]),
difference_of_products(m1_bca[2], m2_cab[2], m1_cab[2], m2_bca[2]),
difference_of_products(m1_bca[3], m2_cab[3], m1_cab[3], m2_bca[3]),
];
let def = [
difference_of_products(m2_bca[0], m0_cab[0], m2_cab[0], m0_bca[0]),
difference_of_products(m2_bca[1], m0_cab[1], m2_cab[1], m0_bca[1]),
difference_of_products(m2_bca[2], m0_cab[2], m2_cab[2], m0_bca[2]),
difference_of_products(m2_bca[3], m0_cab[3], m2_cab[3], m0_bca[3]),
];
let ghi = [
difference_of_products(m0_bca[0], m1_cab[0], m0_cab[0], m1_bca[0]),
difference_of_products(m0_bca[1], m1_cab[1], m0_cab[1], m1_bca[1]),
difference_of_products(m0_bca[2], m1_cab[2], m0_cab[2], m1_bca[2]),
difference_of_products(m0_bca[3], m1_cab[3], m0_cab[3], m1_bca[3]),
];
let det = dot_3(
[abc[0], def[0], ghi[0], 0.0],
[m[0][0], m[1][0], m[2][0], 0.0],
);
if det == 0.0 {
None
} else {
Some((
[
[abc[0] / det, def[0] / det, ghi[0] / det, 0.0],
[abc[1] / det, def[1] / det, ghi[1] / det, 0.0],
[abc[2] / det, def[2] / det, ghi[2] / det, 0.0],
],
// [
// [abc[0], def[0], ghi[0], 0.0],
// [abc[1], def[1], ghi[1], 0.0],
// [abc[2], def[2], ghi[2], 0.0],
// ],
det,
))
}
}
fn rel_diff(a: f64, b: f64) -> f64 {
(a - b).abs() / a.abs().max(b.abs())
}
//-------------------------------------------------------------
/// `(a * b) - (c * d)` but done with high precision via floating point tricks.
///
/// See https://pharr.org/matt/blog/2019/11/03/difference-of-floats
#[inline(always)]
fn difference_of_products(a: f64, b: f64, c: f64, d: f64) -> f64 {
let cd = c * d;
let dop = a.mul_add(b, -cd);
let err = (-c).mul_add(d, cd);
dop + err
}
/// `(a * b) + (c * d)` but done with high precision via floating point tricks.
#[inline(always)]
fn sum_of_products(a: f64, b: f64, c: f64, d: f64) -> f64 {
let cd = c * d;
let sop = a.mul_add(b, cd);
let err = c.mul_add(d, -cd);
sop + err
}
/// `a * b` but also returns a rounding error for precise composition
/// with other operations.
#[inline(always)]
fn two_prod(a: f64, b: f64) -> (f64, f64)
// (product, rounding_err)
{
let ab = a * b;
(ab, a.mul_add(b, -ab))
}
/// `a + b` but also returns a rounding error for precise composition
/// with other operations.
#[inline(always)]
fn two_sum(a: f64, b: f64) -> (f64, f64)
// (sum, rounding_err)
{
let sum = a + b;
let delta = sum - a;
(sum, (a - (sum - delta)) + (b - delta))
}
#[inline(always)]
fn two_diff(a: f64, b: f64) -> (f64, f64)
// (diff, rounding_err)
{
let diff = a - b;
let delta = diff - a;
(diff, (a - (diff - delta)) - (b + delta))
}

129
sub_crates/rmath/src/lib.rs Normal file
View File

@ -0,0 +1,129 @@
//! RMath: a math library for building CPU-based renderers.
#![allow(dead_code)]
mod normal;
mod point;
mod sealed;
pub mod utils;
mod vector;
pub mod wide4;
mod xform;
use std::ops::{Add, Mul, Neg, Sub};
pub use self::{
normal::Normal, point::Point, vector::Vector, xform::AsXform, xform::Xform, xform::XformFull,
};
/// Trait for calculating dot products.
pub trait DotProduct {
fn dot(self, other: Self) -> f32;
fn dot_fast(self, other: Self) -> f32;
}
#[inline(always)]
pub fn dot<T: DotProduct>(a: T, b: T) -> f32 {
a.dot(b)
}
#[inline(always)]
pub fn dot_fast<T: DotProduct>(a: T, b: T) -> f32 {
a.dot_fast(b)
}
/// Trait for calculating cross products.
pub trait CrossProduct {
fn cross(self, other: Self) -> Self;
fn cross_fast(self, other: Self) -> Self;
}
#[inline(always)]
pub fn cross<T: CrossProduct>(a: T, b: T) -> T {
a.cross(b)
}
#[inline(always)]
pub fn cross_fast<T: CrossProduct>(a: T, b: T) -> T {
a.cross_fast(b)
}
//-------------------------------------------------------------
/// Trait representing types that can do fused multiply-add.
trait FMulAdd {
/// `(self * b) + c` with only one floating point rounding error.
fn fma(self, b: Self, c: Self) -> Self;
}
impl FMulAdd for f32 {
fn fma(self, b: Self, c: Self) -> Self {
self.mul_add(b, c)
}
}
/// `(a * b) - (c * d)` but done with high precision via floating point tricks.
///
/// See https://pharr.org/matt/blog/2019/11/03/difference-of-floats
#[inline(always)]
fn difference_of_products<T>(a: T, b: T, c: T, d: T) -> T
where
T: Copy + FMulAdd + Add<Output = T> + Mul<Output = T> + Neg<Output = T>,
{
let cd = c * d;
let dop = a.fma(b, -cd);
let err = (-c).fma(d, cd);
dop + err
}
/// `(a * b) + (c * d)` but done with high precision via floating point tricks.
#[inline(always)]
fn sum_of_products<T>(a: T, b: T, c: T, d: T) -> T
where
T: Copy + FMulAdd + Add<Output = T> + Mul<Output = T> + Neg<Output = T>,
{
let cd = c * d;
let sop = a.fma(b, cd);
let err = c.fma(d, -cd);
sop + err
}
/// `a * b` but also returns a rounding error for precise composition
/// with other operations.
#[inline(always)]
fn two_prod<T>(a: T, b: T) -> (T, T)
// (product, rounding_err)
where
T: Copy + FMulAdd + Mul<Output = T> + Neg<Output = T>,
{
let ab = a * b;
(ab, a.fma(b, -ab))
}
/// `a + b` but also returns a rounding error for precise composition
/// with other operations.
#[inline(always)]
fn two_sum<T>(a: T, b: T) -> (T, T)
// (sum, rounding_err)
where
T: Copy + Add<Output = T> + Sub<Output = T>,
{
let sum = a + b;
let delta = sum - a;
(sum, (a - (sum - delta)) + (b - delta))
}
/// `a - b` but also returns a rounding error for precise composition
/// with other operations.
#[inline(always)]
fn two_diff<T>(a: T, b: T) -> (T, T)
// (diff, rounding_err)
where
T: Copy + Add<Output = T> + Sub<Output = T>,
{
let diff = a - b;
let delta = diff - a;
(diff, (a - (diff - delta)) - (b + delta))
}

View File

@ -0,0 +1,318 @@
#![allow(dead_code)]
use std::cmp::PartialEq;
use std::ops::{Add, Div, Mul, Neg, Sub};
use crate::wide4::Float4;
use crate::xform::{AsXform, XformFull};
use crate::Vector;
use crate::{CrossProduct, DotProduct};
/// A surface normal in 3D space.
#[derive(Debug, Copy, Clone)]
#[repr(transparent)]
pub struct Normal(pub Float4);
impl Normal {
#[inline(always)]
pub fn new(x: f32, y: f32, z: f32) -> Self {
Self(Float4::new(x, y, z, 0.0))
}
#[inline(always)]
pub fn length(self) -> f32 {
self.length2().sqrt()
}
#[inline(always)]
pub fn length2(self) -> f32 {
let sqr = self.0 * self.0;
sqr.a() + sqr.b() + sqr.c()
}
#[inline(always)]
#[must_use]
pub fn normalized(self) -> Self {
Self(self.0 / self.length())
}
#[inline(always)]
pub fn into_vector(self) -> Vector {
Vector(self.0)
}
#[inline(always)]
pub fn x(self) -> f32 {
self.0.a()
}
#[inline(always)]
pub fn y(self) -> f32 {
self.0.b()
}
#[inline(always)]
pub fn z(self) -> f32 {
self.0.c()
}
#[inline(always)]
pub fn get_n(self, i: usize) -> f32 {
match i {
0 => self.x(),
1 => self.y(),
2 => self.z(),
_ => panic!("Out of bounds index into 3D vector."),
}
}
#[inline(always)]
#[must_use]
pub fn set_x(self, x: f32) -> Self {
Self(self.0.set_a(x))
}
#[inline(always)]
#[must_use]
pub fn set_y(self, y: f32) -> Self {
Self(self.0.set_b(y))
}
#[inline(always)]
#[must_use]
pub fn set_z(self, z: f32) -> Self {
Self(self.0.set_c(z))
}
//-------------
// Transforms.
/// Forward-transform the normal.
#[inline(always)]
pub fn xform(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_3x3(&Float4::transpose_3x3(&xform.inv_m)))
}
/// Inverse-transform the normal.
#[inline(always)]
pub fn xform_inv<T: AsXform>(self, xform: &T) -> Self {
Self(
self.0
.vec_mul_3x3(&Float4::transpose_3x3(&xform.as_xform().m)),
)
}
/// Faster but less precise version of `xform()`.
#[inline(always)]
pub fn xform_fast(self, xform: &XformFull) -> Self {
Self(
self.0
.vec_mul_3x3_fast(&Float4::transpose_3x3(&xform.inv_m)),
)
}
/// Faster but less precise version of `xform_inv()`.
#[inline(always)]
pub fn xform_inv_fast<T: AsXform>(self, xform: &T) -> Self {
Self(
self.0
.vec_mul_3x3_fast(&Float4::transpose_3x3(&xform.as_xform().m)),
)
}
}
impl Add for Normal {
type Output = Self;
#[inline(always)]
fn add(self, other: Self) -> Self {
Self(self.0 + other.0)
}
}
impl Sub for Normal {
type Output = Self;
#[inline(always)]
fn sub(self, other: Self) -> Self {
Self(self.0 - other.0)
}
}
impl Mul<f32> for Normal {
type Output = Self;
#[inline(always)]
fn mul(self, other: f32) -> Self {
Self(self.0 * other)
}
}
impl Div<f32> for Normal {
type Output = Self;
#[inline(always)]
fn div(self, other: f32) -> Self {
Self(self.0 / other)
}
}
impl Neg for Normal {
type Output = Self;
#[inline(always)]
fn neg(self) -> Self {
Self(-self.0)
}
}
impl PartialEq for Normal {
#[inline(always)]
fn eq(&self, rhs: &Self) -> bool {
self.0.a() == rhs.0.a() && self.0.b() == rhs.0.b() && self.0.c() == rhs.0.c()
}
}
impl DotProduct for Normal {
#[inline(always)]
fn dot(self, other: Self) -> f32 {
Float4::dot_3(self.0, other.0)
}
#[inline(always)]
fn dot_fast(self, other: Self) -> f32 {
Float4::dot_3_fast(self.0, other.0)
}
}
impl CrossProduct for Normal {
#[inline(always)]
fn cross(self, other: Self) -> Self {
Self(Float4::cross_3(self.0, other.0))
}
#[inline(always)]
fn cross_fast(self, other: Self) -> Self {
Self(Float4::cross_3_fast(self.0, other.0))
}
}
//-------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::{CrossProduct, DotProduct, Xform};
#[test]
fn add() {
let v1 = Normal::new(1.0, 2.0, 3.0);
let v2 = Normal::new(1.5, 4.5, 2.5);
let v3 = Normal::new(2.5, 6.5, 5.5);
assert_eq!(v3, v1 + v2);
}
#[test]
fn sub() {
let v1 = Normal::new(1.0, 2.0, 3.0);
let v2 = Normal::new(1.5, 4.5, 2.5);
let v3 = Normal::new(-0.5, -2.5, 0.5);
assert_eq!(v3, v1 - v2);
}
#[test]
fn mul_scalar() {
let v1 = Normal::new(1.0, 2.0, 3.0);
let v2 = 2.0;
let v3 = Normal::new(2.0, 4.0, 6.0);
assert_eq!(v3, v1 * v2);
}
#[test]
fn xform() {
let n = Normal::new(1.0, 2.5, 4.0);
let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
.to_full()
.unwrap();
assert_eq!(n.xform(&m), Normal::new(-4.0625, 1.78125, -0.03125));
assert_eq!(n.xform(&m).xform_inv(&m), n);
}
#[test]
fn xform_fast() {
let n = Normal::new(1.0, 2.5, 4.0);
let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
.to_full()
.unwrap();
assert_eq!(n.xform_fast(&m), Normal::new(-4.0625, 1.78125, -0.03125));
assert_eq!(n.xform_fast(&m).xform_inv_fast(&m), n);
}
#[test]
fn div() {
let v1 = Normal::new(1.0, 2.0, 3.0);
let v2 = 2.0;
let v3 = Normal::new(0.5, 1.0, 1.5);
assert_eq!(v3, v1 / v2);
}
#[test]
fn length() {
let n = Normal::new(1.0, 2.0, 3.0);
assert!((n.length() - 3.7416573867739413).abs() < 0.000001);
}
#[test]
fn length2() {
let n = Normal::new(1.0, 2.0, 3.0);
assert_eq!(n.length2(), 14.0);
}
#[test]
fn normalized() {
let n1 = Normal::new(1.0, 2.0, 3.0);
let n2 = Normal::new(0.2672612419124244, 0.5345224838248488, 0.8017837257372732);
let n3 = n1.normalized();
assert!((n3.x() - n2.x()).abs() < 0.000001);
assert!((n3.y() - n2.y()).abs() < 0.000001);
assert!((n3.z() - n2.z()).abs() < 0.000001);
}
#[test]
fn dot() {
let v1 = Normal::new(1.0, 2.0, 3.0);
let v2 = Normal::new(1.5, 4.5, 2.5);
assert_eq!(v1.dot(v2), 18.0);
}
#[test]
fn dot_fast() {
let v1 = Normal::new(1.0, 2.0, 3.0);
let v2 = Normal::new(1.5, 4.5, 2.5);
assert_eq!(v1.dot_fast(v2), 18.0);
}
#[test]
fn cross() {
let v1 = Normal::new(1.0, 0.0, 0.0);
let v2 = Normal::new(0.0, 1.0, 0.0);
assert_eq!(v1.cross(v2), Normal::new(0.0, 0.0, 1.0));
}
#[test]
fn cross_fast() {
let v1 = Normal::new(1.0, 0.0, 0.0);
let v2 = Normal::new(0.0, 1.0, 0.0);
assert_eq!(v1.cross_fast(v2), Normal::new(0.0, 0.0, 1.0));
}
}

View File

@ -0,0 +1,186 @@
#![allow(dead_code)]
use std::cmp::PartialEq;
use std::ops::{Add, Sub};
use crate::vector::Vector;
use crate::wide4::Float4;
use crate::xform::{AsXform, XformFull};
/// A position in 3D space.
#[derive(Debug, Copy, Clone)]
#[repr(transparent)]
pub struct Point(pub Float4);
impl Point {
#[inline(always)]
pub fn new(x: f32, y: f32, z: f32) -> Self {
Self(Float4::new(x, y, z, 0.0))
}
#[inline(always)]
pub fn min(self, other: Self) -> Self {
Self(self.0.min(other.0))
}
#[inline(always)]
pub fn max(self, other: Self) -> Self {
Self(self.0.max(other.0))
}
#[inline(always)]
pub fn into_vector(self) -> Vector {
Vector(self.0)
}
#[inline(always)]
pub fn x(self) -> f32 {
self.0.a()
}
#[inline(always)]
pub fn y(self) -> f32 {
self.0.b()
}
#[inline(always)]
pub fn z(self) -> f32 {
self.0.c()
}
#[inline(always)]
pub fn get_n(self, i: usize) -> f32 {
match i {
0 => self.x(),
1 => self.y(),
2 => self.z(),
_ => panic!("Out of bounds index into 3D vector."),
}
}
#[inline(always)]
#[must_use]
pub fn set_x(self, x: f32) -> Self {
Self(self.0.set_a(x))
}
#[inline(always)]
#[must_use]
pub fn set_y(self, y: f32) -> Self {
Self(self.0.set_b(y))
}
#[inline(always)]
#[must_use]
pub fn set_z(self, z: f32) -> Self {
Self(self.0.set_c(z))
}
//-------------
// Transforms.
/// Forward-transform the point.
#[inline(always)]
pub fn xform<T: AsXform>(self, xform: &T) -> Self {
let xform = xform.as_xform();
Self(self.0.vec_mul_affine(&xform.m, xform.t))
}
/// Inverse-transform the point.
#[inline(always)]
pub fn xform_inv(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_affine_rev(&xform.inv_m, xform.fwd.t))
}
/// Faster but less precise version of `xform()`.
#[inline(always)]
pub fn xform_fast<T: AsXform>(self, xform: &T) -> Self {
let xform = xform.as_xform();
Self(self.0.vec_mul_affine_fast(&xform.m, xform.t))
}
/// Faster but less precise version of `xform_inv()`.
#[inline(always)]
pub fn xform_inv_fast(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_affine_rev_fast(&xform.inv_m, xform.fwd.t))
}
}
impl Add<Vector> for Point {
type Output = Self;
#[inline(always)]
fn add(self, other: Vector) -> Self {
Self(self.0 + other.0)
}
}
impl Sub for Point {
type Output = Vector;
#[inline(always)]
fn sub(self, other: Self) -> Vector {
Vector(self.0 - other.0)
}
}
impl Sub<Vector> for Point {
type Output = Self;
#[inline(always)]
fn sub(self, other: Vector) -> Self {
Self(self.0 - other.0)
}
}
impl PartialEq for Point {
#[inline(always)]
fn eq(&self, rhs: &Self) -> bool {
self.0.a() == rhs.0.a() && self.0.b() == rhs.0.b() && self.0.c() == rhs.0.c()
}
}
//-------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::{Vector, Xform};
#[test]
fn add() {
let p1 = Point::new(1.0, 2.0, 3.0);
let v1 = Vector::new(1.5, 4.5, 2.5);
let p2 = Point::new(2.5, 6.5, 5.5);
assert_eq!(p2, p1 + v1);
}
#[test]
fn sub() {
let p1 = Point::new(1.0, 2.0, 3.0);
let p2 = Point::new(1.5, 4.5, 2.5);
let v1 = Vector::new(-0.5, -2.5, 0.5);
assert_eq!(v1, p1 - p2);
}
#[test]
fn xform() {
let p = Point::new(1.0, 2.5, 4.0);
let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
.to_full()
.unwrap();
assert_eq!(p.xform(&m), Point::new(15.5, 54.0, 70.0));
assert_eq!(p.xform(&m).xform_inv(&m), p);
}
#[test]
fn xform_fast() {
let p = Point::new(1.0, 2.5, 4.0);
let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
.to_full()
.unwrap();
assert_eq!(p.xform_fast(&m), Point::new(15.5, 54.0, 70.0));
assert_eq!(p.xform_fast(&m).xform_inv_fast(&m), p);
}
}

View File

@ -0,0 +1,5 @@
/// For sealing other traits.
///
/// Even though this is marked as public, this module isn't, and
/// therefore this trait is not available outside the crate.
pub trait Sealed {}

View File

@ -0,0 +1,196 @@
/// Compute how different two floats are in ulps.
///
/// Notes:
/// - Treats 0.0 and -0.0 as zero ulps apart, and extends the
/// implications of that to the rest of the numbers. E.g. the numbers
/// just before and after 0.0/-0.0 are only two ulps apart, not three.
/// - Infinity is one ulp past float max, and converse for -infinity.
/// - If either number is NaN, returns `u32::MAX`.
#[inline(always)]
pub fn ulp_diff(a: f32, b: f32) -> u32 {
const SIGN_BIT: u32 = 1 << 31;
const INFINITY: u32 = 0x7f800000;
let a = a.to_bits();
let b = b.to_bits();
let a_sign = a & SIGN_BIT;
let b_sign = b & SIGN_BIT;
let a_abs = a & !SIGN_BIT;
let b_abs = b & !SIGN_BIT;
if a_abs > INFINITY || b_abs > INFINITY {
// NaNs always return maximum ulps apart.
u32::MAX
} else if a_sign == b_sign {
a_abs.max(b_abs) - a_abs.min(b_abs)
} else {
a_abs + b_abs
}
}
/// Checks if two floats are approximately equal, within `max_ulps`.
#[inline(always)]
pub fn ulps_eq(a: f32, b: f32, max_ulps: u32) -> bool {
// The minimum ensures that NaNs never return true.
ulp_diff(a, b) <= max_ulps.min(u32::MAX - 1)
}
/// Increments to the next representable floating point number.
///
/// Notes:
/// - 0.0 and -0.0 are treated as the same value. E.g. starting from the
/// number just before -0.0, it only takes two increments to get to the
/// number just after 0.0.
/// - Infinity, NaN, and their negative counterparts are returned
/// unchanged.
/// - Incrementing `f32::MAX` results in infinity.
#[inline(always)]
pub fn increment_ulp(v: f32) -> f32 {
if v.is_finite() {
if v > 0.0 {
f32::from_bits(v.to_bits() + 1)
} else if v < -0.0 {
f32::from_bits(v.to_bits() - 1)
} else {
f32::from_bits(1)
}
} else {
// Infinity or NaN.
v
}
}
/// Decrements to the previous representable floating point number.
///
/// Notes:
/// - 0.0 and -0.0 are treated as the same value. E.g. starting from the
/// number just after 0.0, it only takes two decrements to get to the
/// number just before -0.0.
/// - Infinity, NaN, and their negative counterparts are returned
/// unchanged.
/// - Decrementing `-f32::MAX` results in -infinity.
#[inline(always)]
pub fn decrement_ulp(v: f32) -> f32 {
if v.is_finite() {
if v > 0.0 {
f32::from_bits(v.to_bits() - 1)
} else if v < -0.0 {
f32::from_bits(v.to_bits() + 1)
} else {
f32::from_bits(0x80000001)
}
} else {
// Infinity or NaN.
v
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ulp_diff_test() {
assert_eq!(ulp_diff(1.0, 1.0), 0);
assert_eq!(ulp_diff(0.0, 0.0), 0);
assert_eq!(ulp_diff(0.0, -0.0), 0);
assert_eq!(ulp_diff(1.0, 2.0), 1 << 23);
assert_eq!(ulp_diff(2.0, 4.0), 1 << 23);
assert_eq!(ulp_diff(-1.0, -2.0), 1 << 23);
assert_eq!(ulp_diff(-2.0, -4.0), 1 << 23);
assert_eq!(ulp_diff(-1.0, 1.0), 0x7f000000);
assert_eq!(ulp_diff(0.0, 1.0), 0x3f800000);
assert_eq!(ulp_diff(-0.0, 1.0), 0x3f800000);
assert_eq!(ulp_diff(0.0, -1.0), 0x3f800000);
assert_eq!(ulp_diff(-0.0, -1.0), 0x3f800000);
assert_eq!(ulp_diff(f32::INFINITY, -f32::INFINITY), 0xff000000);
assert_eq!(ulp_diff(f32::NAN, f32::NAN), 0xffffffff);
assert_eq!(ulp_diff(f32::NAN, 1.0), 0xffffffff);
assert_eq!(ulp_diff(1.0, f32::NAN), 0xffffffff);
assert_eq!(ulp_diff(-f32::NAN, 1.0), 0xffffffff);
assert_eq!(ulp_diff(1.0, -f32::NAN), 0xffffffff);
assert_eq!(ulp_diff(0.0, f32::from_bits(0.0f32.to_bits() + 1)), 1);
assert_eq!(ulp_diff(-0.0, f32::from_bits(0.0f32.to_bits() + 1)), 1);
}
#[test]
fn ulps_eq_test() {
assert!(ulps_eq(1.0, 1.0, 0));
assert!(ulps_eq(1.0, 1.0, 1));
assert!(ulps_eq(0.0, 0.0, 0));
assert!(ulps_eq(0.0, -0.0, 0));
assert!(ulps_eq(1.0, 2.0, 1 << 23));
assert!(!ulps_eq(1.0, 2.0, (1 << 23) - 1));
assert!(ulps_eq(0.0, f32::from_bits(0.0f32.to_bits() + 1), 1));
assert!(!ulps_eq(0.0, f32::from_bits(0.0f32.to_bits() + 1), 0));
assert!(ulps_eq(-0.0, f32::from_bits(0.0f32.to_bits() + 1), 1));
assert!(!ulps_eq(-0.0, f32::from_bits(0.0f32.to_bits() + 1), 0));
assert!(ulps_eq(std::f32::INFINITY, -std::f32::INFINITY, 0xff000000));
assert!(!ulps_eq(
std::f32::INFINITY,
-std::f32::INFINITY,
0xff000000 - 1
));
assert!(!ulps_eq(std::f32::NAN, std::f32::NAN, 0));
assert!(!ulps_eq(-std::f32::NAN, -std::f32::NAN, 0));
assert!(!ulps_eq(std::f32::NAN, std::f32::NAN, u32::MAX));
assert!(!ulps_eq(std::f32::NAN, std::f32::INFINITY, 1 << 31));
assert!(!ulps_eq(std::f32::INFINITY, std::f32::NAN, 1 << 31));
}
#[test]
fn inc_ulp() {
assert!(increment_ulp(1.0) > 1.0);
assert!(increment_ulp(-1.0) > -1.0);
assert!(increment_ulp(0.0) > 0.0);
assert!(increment_ulp(0.0) > -0.0);
assert!(increment_ulp(-0.0) > 0.0);
assert!(increment_ulp(-0.0) > -0.0);
assert!(increment_ulp(f32::MAX) == f32::INFINITY);
assert!(increment_ulp(f32::INFINITY) == f32::INFINITY);
assert!(increment_ulp(-f32::INFINITY) == -f32::INFINITY);
assert!(increment_ulp(f32::NAN).is_nan());
assert!(increment_ulp(-f32::NAN).is_nan());
}
#[test]
fn dec_ulp() {
assert!(decrement_ulp(1.0) < 1.0);
assert!(decrement_ulp(-1.0) < -1.0);
assert!(decrement_ulp(0.0) < 0.0);
assert!(decrement_ulp(0.0) < -0.0);
assert!(decrement_ulp(-0.0) < 0.0);
assert!(decrement_ulp(-0.0) < -0.0);
assert!(decrement_ulp(f32::MIN) == -f32::INFINITY);
assert!(decrement_ulp(f32::INFINITY) == f32::INFINITY);
assert!(decrement_ulp(-f32::INFINITY) == -f32::INFINITY);
assert!(decrement_ulp(f32::NAN).is_nan());
assert!(decrement_ulp(-f32::NAN).is_nan());
}
#[test]
fn inc_dec_ulp() {
assert_eq!(decrement_ulp(increment_ulp(0.0)), 0.0);
assert_eq!(decrement_ulp(increment_ulp(-0.0)), 0.0);
assert_eq!(decrement_ulp(increment_ulp(1.0)), 1.0);
assert_eq!(decrement_ulp(increment_ulp(-1.0)), -1.0);
assert_eq!(decrement_ulp(increment_ulp(1.2)), 1.2);
assert_eq!(decrement_ulp(increment_ulp(-1.2)), -1.2);
}
#[test]
fn dec_inc_ulp() {
assert_eq!(increment_ulp(decrement_ulp(0.0)), 0.0);
assert_eq!(increment_ulp(decrement_ulp(-0.0)), 0.0);
assert_eq!(increment_ulp(decrement_ulp(1.0)), 1.0);
assert_eq!(increment_ulp(decrement_ulp(-1.0)), -1.0);
assert_eq!(increment_ulp(decrement_ulp(1.2)), 1.2);
assert_eq!(increment_ulp(decrement_ulp(-1.2)), -1.2);
}
}

View File

@ -0,0 +1,325 @@
#![allow(dead_code)]
use std::cmp::PartialEq;
use std::ops::{Add, Div, Mul, Neg, Sub};
use crate::normal::Normal;
use crate::point::Point;
use crate::wide4::Float4;
use crate::xform::{AsXform, XformFull};
use crate::{CrossProduct, DotProduct};
/// A direction vector in 3D space.
#[derive(Debug, Copy, Clone)]
#[repr(transparent)]
pub struct Vector(pub Float4);
impl Vector {
#[inline(always)]
pub fn new(x: f32, y: f32, z: f32) -> Self {
Self(Float4::new(x, y, z, 0.0))
}
#[inline(always)]
pub fn length(self) -> f32 {
self.length2().sqrt()
}
#[inline(always)]
pub fn length2(self) -> f32 {
let sqr = self.0 * self.0;
sqr.a() + sqr.b() + sqr.c()
}
#[inline(always)]
#[must_use]
pub fn normalized(self) -> Self {
Self(self.0 / self.length())
}
#[inline(always)]
pub fn abs(self) -> Self {
Self(self.0.abs())
}
#[inline(always)]
pub fn recip(self) -> Self {
Self(self.0.recip())
}
#[inline(always)]
pub fn into_point(self) -> Point {
Point(self.0)
}
#[inline(always)]
pub fn into_normal(self) -> Normal {
Normal(self.0)
}
#[inline(always)]
pub fn x(self) -> f32 {
self.0.a()
}
#[inline(always)]
pub fn y(self) -> f32 {
self.0.b()
}
#[inline(always)]
pub fn z(self) -> f32 {
self.0.c()
}
#[inline(always)]
pub fn get_n(self, i: usize) -> f32 {
match i {
0 => self.x(),
1 => self.y(),
2 => self.z(),
_ => panic!("Out of bounds index into 3D vector."),
}
}
#[inline(always)]
#[must_use]
pub fn set_x(self, x: f32) -> Self {
Self(self.0.set_a(x))
}
#[inline(always)]
#[must_use]
pub fn set_y(self, y: f32) -> Self {
Self(self.0.set_b(y))
}
#[inline(always)]
#[must_use]
pub fn set_z(self, z: f32) -> Self {
Self(self.0.set_c(z))
}
//-------------
// Transforms.
/// Forward-transform the vector.
#[inline(always)]
pub fn xform<T: AsXform>(self, xform: &T) -> Self {
Self(self.0.vec_mul_3x3(&xform.as_xform().m))
}
/// Inverse-transform the vector.
#[inline(always)]
pub fn xform_inv(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_3x3(&xform.inv_m))
}
/// Faster but less precise version of `xform()`.
#[inline(always)]
pub fn xform_fast<T: AsXform>(self, xform: &T) -> Self {
Self(self.0.vec_mul_3x3_fast(&xform.as_xform().m))
}
/// Faster but less precise version of `xform_inv()`.
#[inline(always)]
pub fn xform_inv_fast(self, xform: &XformFull) -> Self {
Self(self.0.vec_mul_3x3_fast(&xform.inv_m))
}
}
impl Add for Vector {
type Output = Self;
#[inline(always)]
fn add(self, other: Self) -> Self {
Self(self.0 + other.0)
}
}
impl Sub for Vector {
type Output = Self;
#[inline(always)]
fn sub(self, other: Self) -> Self {
Self(self.0 - other.0)
}
}
impl Mul<f32> for Vector {
type Output = Self;
#[inline(always)]
fn mul(self, other: f32) -> Self {
Self(self.0 * other)
}
}
impl Div<f32> for Vector {
type Output = Self;
#[inline(always)]
fn div(self, other: f32) -> Self {
Self(self.0 / other)
}
}
impl Neg for Vector {
type Output = Self;
#[inline(always)]
fn neg(self) -> Self {
Self(-self.0)
}
}
impl PartialEq for Vector {
#[inline(always)]
fn eq(&self, rhs: &Self) -> bool {
self.0.a() == rhs.0.a() && self.0.b() == rhs.0.b() && self.0.c() == rhs.0.c()
}
}
impl DotProduct for Vector {
#[inline(always)]
fn dot(self, other: Self) -> f32 {
Float4::dot_3(self.0, other.0)
}
#[inline(always)]
fn dot_fast(self, other: Self) -> f32 {
Float4::dot_3_fast(self.0, other.0)
}
}
impl CrossProduct for Vector {
#[inline(always)]
fn cross(self, other: Self) -> Self {
Self(Float4::cross_3(self.0, other.0))
}
#[inline(always)]
fn cross_fast(self, other: Self) -> Self {
Self(Float4::cross_3_fast(self.0, other.0))
}
}
//-------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::{CrossProduct, DotProduct, Xform};
#[test]
fn add() {
let v1 = Vector::new(1.0, 2.0, 3.0);
let v2 = Vector::new(1.5, 4.5, 2.5);
let v3 = Vector::new(2.5, 6.5, 5.5);
assert_eq!(v3, v1 + v2);
}
#[test]
fn sub() {
let v1 = Vector::new(1.0, 2.0, 3.0);
let v2 = Vector::new(1.5, 4.5, 2.5);
let v3 = Vector::new(-0.5, -2.5, 0.5);
assert_eq!(v3, v1 - v2);
}
#[test]
fn mul_scalar() {
let v1 = Vector::new(1.0, 2.0, 3.0);
let v2 = 2.0;
let v3 = Vector::new(2.0, 4.0, 6.0);
assert_eq!(v3, v1 * v2);
}
#[test]
fn xform() {
let v = Vector::new(1.0, 2.5, 4.0);
let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
.to_full()
.unwrap();
assert_eq!(v.xform(&m), Vector::new(14.0, 46.0, 58.0));
assert_eq!(v.xform(&m).xform_inv(&m), v);
}
#[test]
fn xform_fast() {
let v = Vector::new(1.0, 2.5, 4.0);
let m = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0)
.to_full()
.unwrap();
assert_eq!(v.xform_fast(&m), Vector::new(14.0, 46.0, 58.0));
assert_eq!(v.xform_fast(&m).xform_inv_fast(&m), v);
}
#[test]
fn div() {
let v1 = Vector::new(1.0, 2.0, 3.0);
let v2 = 2.0;
let v3 = Vector::new(0.5, 1.0, 1.5);
assert_eq!(v3, v1 / v2);
}
#[test]
fn length() {
let v = Vector::new(1.0, 2.0, 3.0);
assert!((v.length() - 3.7416573867739413).abs() < 0.000001);
}
#[test]
fn length2() {
let v = Vector::new(1.0, 2.0, 3.0);
assert_eq!(v.length2(), 14.0);
}
#[test]
fn normalized() {
let v1 = Vector::new(1.0, 2.0, 3.0);
let v2 = Vector::new(0.2672612419124244, 0.5345224838248488, 0.8017837257372732);
let v3 = v1.normalized();
assert!((v3.x() - v2.x()).abs() < 0.000001);
assert!((v3.y() - v2.y()).abs() < 0.000001);
assert!((v3.z() - v2.z()).abs() < 0.000001);
}
#[test]
fn dot() {
let v1 = Vector::new(1.0, 2.0, 3.0);
let v2 = Vector::new(1.5, 4.5, 2.5);
assert_eq!(v1.dot(v2), 18.0);
}
#[test]
fn dot_fast() {
let v1 = Vector::new(1.0, 2.0, 3.0);
let v2 = Vector::new(1.5, 4.5, 2.5);
assert_eq!(v1.dot_fast(v2), 18.0);
}
#[test]
fn cross() {
let v1 = Vector::new(1.0, 0.0, 0.0);
let v2 = Vector::new(0.0, 1.0, 0.0);
assert_eq!(v1.cross(v2), Vector::new(0.0, 0.0, 1.0));
}
#[test]
fn cross_fast() {
let v1 = Vector::new(1.0, 0.0, 0.0);
let v2 = Vector::new(0.0, 1.0, 0.0);
assert_eq!(v1.cross_fast(v2), Vector::new(0.0, 0.0, 1.0));
}
}

View File

@ -0,0 +1,462 @@
use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Index, Mul, Neg, Not, Sub};
use crate::FMulAdd;
//=============================================================
// Float4
#[derive(Debug, Copy, Clone)]
#[repr(C, align(16))]
pub struct Float4([f32; 4]);
impl Float4 {
/// Create a new `Float4` with the given components.
#[inline(always)]
pub fn new(a: f32, b: f32, c: f32, d: f32) -> Self {
Self([a, b, c, d])
}
/// Create a new `Float4` with all elements set to `n`.
#[inline(always)]
pub fn splat(n: f32) -> Self {
Self([n, n, n, n])
}
/// Component-wise fused multiply-add.
///
/// `(self * a) + b` with only one rounding error.
#[inline(always)]
pub fn mul_add(self, a: Self, b: Self) -> Self {
Self([
self.0[0].mul_add(a.0[0], b.0[0]),
self.0[1].mul_add(a.0[1], b.0[1]),
self.0[2].mul_add(a.0[2], b.0[2]),
self.0[3].mul_add(a.0[3], b.0[3]),
])
}
/// Vertical minimum.
#[inline(always)]
pub fn min(self, a: Self) -> Self {
// Custom min to match behavior of SSE.
#[inline(always)]
pub fn minf(a: f32, b: f32) -> f32 {
if a < b {
a
} else {
b
}
}
Self([
minf(self.0[0], a.0[0]),
minf(self.0[1], a.0[1]),
minf(self.0[2], a.0[2]),
minf(self.0[3], a.0[3]),
])
}
/// Vertical maximum.
#[inline(always)]
pub fn max(self, a: Self) -> Self {
// Custom max to match behavior of SSE.
#[inline(always)]
pub fn maxf(a: f32, b: f32) -> f32 {
if a > b {
a
} else {
b
}
}
Self([
maxf(self.0[0], a.0[0]),
maxf(self.0[1], a.0[1]),
maxf(self.0[2], a.0[2]),
maxf(self.0[3], a.0[3]),
])
}
/// Horizontal minimum.
#[inline(always)]
pub fn min_element(self) -> f32 {
let a = self.0[0].min(self.0[1]);
let b = self.0[2].min(self.0[3]);
a.min(b)
}
/// Horizontal maximum.
#[inline(always)]
pub fn max_element(self) -> f32 {
let a = self.0[0].max(self.0[1]);
let b = self.0[2].max(self.0[3]);
a.max(b)
}
/// 1.0 / self
#[inline(always)]
pub fn recip(self) -> Self {
Float4::splat(1.0) / self
}
#[inline(always)]
pub fn abs(self) -> Self {
Float4::new(
self.a().abs(),
self.b().abs(),
self.c().abs(),
self.d().abs(),
)
}
//-----------------------------------------------------
// Comparisons.
/// Less than.
#[inline(always)]
pub fn cmplt(self, rhs: Self) -> Bool4 {
Bool4([
self.0[0] < rhs.0[0],
self.0[1] < rhs.0[1],
self.0[2] < rhs.0[2],
self.0[3] < rhs.0[3],
])
}
/// Less than or equal.
#[inline(always)]
pub fn cmplte(self, rhs: Self) -> Bool4 {
Bool4([
self.0[0] <= rhs.0[0],
self.0[1] <= rhs.0[1],
self.0[2] <= rhs.0[2],
self.0[3] <= rhs.0[3],
])
}
/// Greater than.
#[inline(always)]
pub fn cmpgt(self, rhs: Self) -> Bool4 {
Bool4([
self.0[0] > rhs.0[0],
self.0[1] > rhs.0[1],
self.0[2] > rhs.0[2],
self.0[3] > rhs.0[3],
])
}
/// Greater than or equal.
#[inline(always)]
pub fn cmpgte(self, rhs: Self) -> Bool4 {
Bool4([
self.0[0] >= rhs.0[0],
self.0[1] >= rhs.0[1],
self.0[2] >= rhs.0[2],
self.0[3] >= rhs.0[3],
])
}
/// Equal.
#[inline(always)]
pub fn cmpeq(self, rhs: Self) -> Bool4 {
Bool4([
self.0[0] == rhs.0[0],
self.0[1] == rhs.0[1],
self.0[2] == rhs.0[2],
self.0[3] == rhs.0[3],
])
}
//-----------------------------------------------------
// Individual components.
#[inline(always)]
pub fn a(self) -> f32 {
self.0[0]
}
#[inline(always)]
pub fn b(self) -> f32 {
self.0[1]
}
#[inline(always)]
pub fn c(self) -> f32 {
self.0[2]
}
#[inline(always)]
pub fn d(self) -> f32 {
self.0[3]
}
#[inline(always)]
#[must_use]
pub fn set_a(self, n: f32) -> Self {
Self([n, self.0[1], self.0[2], self.0[3]])
}
#[inline(always)]
#[must_use]
pub fn set_b(self, n: f32) -> Self {
Self([self.0[0], n, self.0[2], self.0[3]])
}
#[inline(always)]
#[must_use]
pub fn set_c(self, n: f32) -> Self {
Self([self.0[0], self.0[1], n, self.0[3]])
}
#[inline(always)]
#[must_use]
pub fn set_d(self, n: f32) -> Self {
Self([self.0[0], self.0[1], self.0[2], n])
}
//-----------------------------------------------------
// Shuffles.
#[inline(always)]
pub fn aaaa(self) -> Self {
let a = self.0[0];
Self([a, a, a, a])
}
#[inline(always)]
pub fn bbbb(self) -> Self {
let b = self.0[1];
Self([b, b, b, b])
}
#[inline(always)]
pub fn cccc(self) -> Self {
let c = self.0[2];
Self([c, c, c, c])
}
#[inline(always)]
pub fn dddd(self) -> Self {
let d = self.0[3];
Self([d, d, d, d])
}
#[inline(always)]
pub fn bcad(self) -> Self {
let a = self.0[0];
let b = self.0[1];
let c = self.0[2];
let d = self.0[3];
Self([b, c, a, d])
}
#[inline(always)]
pub fn cabd(self) -> Self {
let a = self.0[0];
let b = self.0[1];
let c = self.0[2];
let d = self.0[3];
Self([c, a, b, d])
}
}
impl Index<usize> for Float4 {
type Output = f32;
#[inline(always)]
fn index(&self, idx: usize) -> &f32 {
&self.0[idx]
}
}
impl Add for Float4 {
type Output = Self;
#[inline(always)]
fn add(self, rhs: Self) -> Self {
Self([
self.0[0] + rhs.0[0],
self.0[1] + rhs.0[1],
self.0[2] + rhs.0[2],
self.0[3] + rhs.0[3],
])
}
}
impl Sub for Float4 {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: Self) -> Self {
Self([
self.0[0] - rhs.0[0],
self.0[1] - rhs.0[1],
self.0[2] - rhs.0[2],
self.0[3] - rhs.0[3],
])
}
}
impl Mul for Float4 {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: Self) -> Self {
Self([
self.0[0] * rhs.0[0],
self.0[1] * rhs.0[1],
self.0[2] * rhs.0[2],
self.0[3] * rhs.0[3],
])
}
}
impl Mul<f32> for Float4 {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: f32) -> Self {
Self([
self.0[0] * rhs,
self.0[1] * rhs,
self.0[2] * rhs,
self.0[3] * rhs,
])
}
}
impl Div for Float4 {
type Output = Self;
#[inline(always)]
fn div(self, rhs: Self) -> Self {
Self([
self.0[0] / rhs.0[0],
self.0[1] / rhs.0[1],
self.0[2] / rhs.0[2],
self.0[3] / rhs.0[3],
])
}
}
impl Div<f32> for Float4 {
type Output = Self;
#[inline(always)]
fn div(self, rhs: f32) -> Self {
Self([
self.0[0] / rhs,
self.0[1] / rhs,
self.0[2] / rhs,
self.0[3] / rhs,
])
}
}
impl Neg for Float4 {
type Output = Self;
#[inline(always)]
fn neg(self) -> Self {
Self([-self.0[0], -self.0[1], -self.0[2], -self.0[3]])
}
}
impl FMulAdd for Float4 {
#[inline(always)]
fn fma(self, b: Self, c: Self) -> Self {
self.mul_add(b, c)
}
}
//=============================================================
// Bool4
#[derive(Copy, Clone)]
#[repr(transparent)]
pub struct Bool4([bool; 4]);
impl Bool4 {
#[inline(always)]
pub fn new(a: bool, b: bool, c: bool, d: bool) -> Self {
Bool4([a, b, c, d])
}
#[inline(always)]
pub fn new_false() -> Self {
Self([false, false, false, false])
}
#[inline(always)]
pub fn to_bools(self) -> [bool; 4] {
self.0
}
/// Note: `a` goes to the least significant bit.
#[inline(always)]
pub fn bitmask(self) -> u8 {
self.0[0] as u8
| ((self.0[1] as u8) << 1)
| ((self.0[2] as u8) << 2)
| ((self.0[3] as u8) << 3)
}
#[inline(always)]
pub fn any(self) -> bool {
self.0[0] | &self.0[1] | self.0[2] | self.0[3]
}
#[inline(always)]
pub fn all(self) -> bool {
self.0[0] & &self.0[1] & self.0[2] & self.0[3]
}
}
impl BitAnd for Bool4 {
type Output = Self;
#[inline(always)]
fn bitand(self, rhs: Self) -> Self {
Self([
self.0[0] & rhs.0[0],
self.0[1] & rhs.0[1],
self.0[2] & rhs.0[2],
self.0[3] & rhs.0[3],
])
}
}
impl BitOr for Bool4 {
type Output = Self;
#[inline(always)]
fn bitor(self, rhs: Self) -> Self {
Self([
self.0[0] | rhs.0[0],
self.0[1] | rhs.0[1],
self.0[2] | rhs.0[2],
self.0[3] | rhs.0[3],
])
}
}
impl BitXor for Bool4 {
type Output = Self;
#[inline(always)]
fn bitxor(self, rhs: Self) -> Self {
Self([
self.0[0] ^ rhs.0[0],
self.0[1] ^ rhs.0[1],
self.0[2] ^ rhs.0[2],
self.0[3] ^ rhs.0[3],
])
}
}
impl Not for Bool4 {
type Output = Self;
#[inline(always)]
fn not(self) -> Self {
Self([!self.0[0], !self.0[1], !self.0[2], !self.0[3]])
}
}

View File

@ -0,0 +1,812 @@
use std::{
cmp::{Eq, PartialEq},
ops::{AddAssign, BitAndAssign, BitOrAssign, BitXorAssign, DivAssign, MulAssign, SubAssign},
};
use crate::utils::ulps_eq;
use crate::{difference_of_products, two_diff, two_prod, two_sum};
//-------------------------------------------------------------
// Which implementation to use.
mod fallback;
#[cfg(not(any(target_arch = "x86_64")))]
pub use fallback::{Bool4, Float4};
#[cfg(target_arch = "x86_64")]
mod sse;
#[cfg(target_arch = "x86_64")]
pub use sse::{Bool4, Float4};
//-------------------------------------------------------------
impl Float4 {
/// 3D dot product (only uses the first 3 components).
#[inline(always)]
pub fn dot_3(a: Self, b: Self) -> f32 {
let (p, p_err) = two_prod(a, b);
// Products.
let (x, x_err) = (p.a(), p_err.a());
let (y, y_err) = (p.b(), p_err.b());
let (z, z_err) = (p.c(), p_err.c());
// Sums.
let (s1, s1_err) = two_sum(x, y);
let err1 = x_err + (y_err + s1_err);
let (s2, s2_err) = two_sum(s1, z);
let err2 = z_err + (err1 + s2_err);
// Final result with rounding error compensation.
s2 + err2
}
/// Faster but less precise version of `dot_3()`.
#[inline(always)]
pub fn dot_3_fast(a: Self, b: Self) -> f32 {
let c = a * b;
c.a() + c.b() + c.c()
}
/// 3D cross product (only uses the first 3 components).
#[inline(always)]
pub fn cross_3(a: Self, b: Self) -> Self {
difference_of_products(a.bcad(), b.cabd(), a.cabd(), b.bcad())
}
/// Faster but less precise version `cross_3()`.
#[inline(always)]
pub fn cross_3_fast(a: Self, b: Self) -> Self {
(a.bcad() * b.cabd()) - (a.cabd() * b.bcad())
}
#[inline(always)]
pub fn transpose_3x3(m: &[Self; 3]) -> [Self; 3] {
[
// The fourth component in each row below is arbitrary,
// but in this case chosen so that it matches the
// behavior of the SSE version of transpose_3x3.
Self::new(m[0].a(), m[1].a(), m[2].a(), m[2].d()),
Self::new(m[0].b(), m[1].b(), m[2].b(), m[2].d()),
Self::new(m[0].c(), m[1].c(), m[2].c(), m[2].d()),
]
}
/// Invert a 3x3 matrix.
///
/// Returns `None` if not invertible.
#[inline]
pub fn invert_3x3(m: &[Self; 3]) -> Option<[Self; 3]> {
let m0_bca = m[0].bcad();
let m1_bca = m[1].bcad();
let m2_bca = m[2].bcad();
let m0_cab = m[0].cabd();
let m1_cab = m[1].cabd();
let m2_cab = m[2].cabd();
let abc = difference_of_products(m1_bca, m2_cab, m1_cab, m2_bca);
let def = difference_of_products(m2_bca, m0_cab, m2_cab, m0_bca);
let ghi = difference_of_products(m0_bca, m1_cab, m0_cab, m1_bca);
let det = Self::dot_3(
Self::new(abc.a(), def.a(), ghi.a(), 0.0),
Self::new(m[0].a(), m[1].a(), m[2].a(), 0.0),
);
if det == 0.0 {
None
} else {
Some(Self::transpose_3x3(&[abc / det, def / det, ghi / det]))
}
}
/// Invert a 3x3 matrix, and also return the computed determinant.
///
/// Returns `None` if not invertible.
#[inline]
pub fn invert_3x3_w_det(m: &[Self; 3]) -> Option<([Self; 3], f32)> {
let m0_bca = m[0].bcad();
let m1_bca = m[1].bcad();
let m2_bca = m[2].bcad();
let m0_cab = m[0].cabd();
let m1_cab = m[1].cabd();
let m2_cab = m[2].cabd();
let abc = difference_of_products(m1_bca, m2_cab, m1_cab, m2_bca);
let def = difference_of_products(m2_bca, m0_cab, m2_cab, m0_bca);
let ghi = difference_of_products(m0_bca, m1_cab, m0_cab, m1_bca);
let det = Self::dot_3(
Self::new(abc.a(), def.a(), ghi.a(), 0.0),
Self::new(m[0].a(), m[1].a(), m[2].a(), 0.0),
);
if det == 0.0 {
None
} else {
Some((Self::transpose_3x3(&[abc / det, def / det, ghi / det]), det))
}
}
/// Faster but less precise version of `invert_3x3()`.
#[inline]
pub fn invert_3x3_fast(m: &[Self; 3]) -> Option<[Self; 3]> {
let m0_bca = m[0].bcad();
let m1_bca = m[1].bcad();
let m2_bca = m[2].bcad();
let m0_cab = m[0].cabd();
let m1_cab = m[1].cabd();
let m2_cab = m[2].cabd();
let abc = (m1_bca * m2_cab) - (m1_cab * m2_bca);
let def = (m2_bca * m0_cab) - (m2_cab * m0_bca);
let ghi = (m0_bca * m1_cab) - (m0_cab * m1_bca);
let det = Self::dot_3_fast(
Self::new(abc.a(), def.a(), ghi.a(), 0.0),
Self::new(m[0].a(), m[1].a(), m[2].a(), 0.0),
);
if det == 0.0 {
None
} else {
Some(Self::transpose_3x3(&[abc / det, def / det, ghi / det]))
}
}
/// Multiplies a 3D vector with a 3x3 matrix.
#[inline]
pub fn vec_mul_3x3(self, m: &[Self; 3]) -> Self {
let x = self.aaaa();
let y = self.bbbb();
let z = self.cccc();
// Products.
let (a, a_err) = two_prod(x, m[0]);
let (b, b_err) = two_prod(y, m[1]);
let (c, c_err) = two_prod(z, m[2]);
// Sums.
let (s1, s1_err) = two_sum(a, b);
let err1 = a_err + (b_err + s1_err);
let (s2, s2_err) = two_sum(c, s1);
let err2 = c_err + (err1 + s2_err);
s2 + err2
}
/// Faster but less precise version of `vec_mul_3x3()`.
#[inline]
pub fn vec_mul_3x3_fast(self, m: &[Self; 3]) -> Self {
let x = self.aaaa();
let y = self.bbbb();
let z = self.cccc();
(x * m[0]) + (y * m[1]) + (z * m[2])
}
/// Transforms a 3d point by an affine transform.
///
/// `m` is the 3x3 part of the affine transform, `t` is the translation part.
#[inline]
pub fn vec_mul_affine(self, m: &[Self; 3], t: Self) -> Self {
let x = self.aaaa();
let y = self.bbbb();
let z = self.cccc();
// Products.
let (a, a_err) = two_prod(x, m[0]);
let (b, b_err) = two_prod(y, m[1]);
let (c, c_err) = two_prod(z, m[2]);
// Sums.
let (s1, s1_err) = two_sum(a, b);
let err1 = a_err + (b_err + s1_err);
let (s2, s2_err) = two_sum(c, s1);
let err2 = c_err + (err1 + s2_err);
let (s3, s3_err) = two_sum(t, s2);
let err3 = err2 + s3_err;
s3 + err3
}
/// Faster but less precise version of `vec_mul_affine()`.
#[inline]
pub fn vec_mul_affine_fast(self, m: &[Self; 3], t: Self) -> Self {
let x = self.aaaa();
let y = self.bbbb();
let z = self.cccc();
(x * m[0]) + (y * m[1]) + (z * m[2]) + t
}
/// Transforms a 3d point by an affine transform, except it does
/// `(vec - t) * inv_m` instead of `vec * m + t`.
///
/// This is useful for performing efficient inverse transforms while
/// only having to invert the 3x3 part of the transform itself.
///
/// `inv_m` is the inverse 3x3 part of the affine transform, `t` is
/// the forward translation part.
#[inline]
pub fn vec_mul_affine_rev(self, inv_m: &[Self; 3], t: Self) -> Self {
let (v, v_err) = two_diff(self, t);
let (x, x_err) = (v.aaaa(), v_err.aaaa());
let (y, y_err) = (v.bbbb(), v_err.bbbb());
let (z, z_err) = (v.cccc(), v_err.cccc());
// Products.
let ((a, a_err1), a_err2) = (two_prod(x, inv_m[0]), x_err * inv_m[0]);
let ((b, b_err1), b_err2) = (two_prod(y, inv_m[1]), y_err * inv_m[1]);
let ((c, c_err1), c_err2) = (two_prod(z, inv_m[2]), z_err * inv_m[2]);
let a_err = a_err1 + a_err2;
let b_err = b_err1 + b_err2;
let c_err = c_err1 + c_err2;
// Sums.
let (s1, s1_err) = two_sum(a, b);
let err1 = a_err + (b_err + s1_err);
let (s2, s2_err) = two_sum(c, s1);
let err2 = c_err + (err1 + s2_err);
s2 + err2
}
/// Faster but less precise version of `vec_mul_affine_rev()`.
#[inline]
pub fn vec_mul_affine_rev_fast(self, inv_m: &[Self; 3], t: Self) -> Self {
let v = self - t;
let x = v.aaaa();
let y = v.bbbb();
let z = v.cccc();
(x * inv_m[0]) + (y * inv_m[1]) + (z * inv_m[2])
}
/// Returns whether the `Float4`s are approximately equal to each
/// other.
///
/// Each corresponding element cannot have a relative error exceeding
/// `epsilon`.
pub(crate) fn aprx_eq(a: Self, b: Self, max_ulps: u32) -> bool {
let mut eq = true;
eq &= ulps_eq(a.a(), b.a(), max_ulps);
eq &= ulps_eq(a.b(), b.b(), max_ulps);
eq &= ulps_eq(a.c(), b.c(), max_ulps);
eq &= ulps_eq(a.d(), b.d(), max_ulps);
eq
}
/// Transforms one affine transform by another.
///
/// The result is an affine transform that acts as a sequence of the
/// first followed by the second.
///
/// `m#` is the 3x3 part of the affine transform, `t#` is the translation part.
#[inline]
pub fn affine_mul_affine(
m1: &[Self; 3],
t1: Self,
m2: &[Self; 3],
t2: Self,
) -> ([Self; 3], Self) {
(
[
m1[0].vec_mul_3x3(&m2),
m1[1].vec_mul_3x3(&m2),
m1[2].vec_mul_3x3(&m2),
],
t1.vec_mul_affine(&m2, t2),
)
}
/// Faster but less precise version of `affine_mul_affine()`.
#[inline]
pub fn affine_mul_affine_fast(
m1: &[Self; 3],
t1: Self,
m2: &[Self; 3],
t2: Self,
) -> ([Self; 3], Self) {
(
[
m1[0].vec_mul_3x3_fast(&m2),
m1[1].vec_mul_3x3_fast(&m2),
m1[2].vec_mul_3x3_fast(&m2),
],
t1.vec_mul_affine_fast(&m2, t2),
)
}
}
impl From<Float4> for (f32, f32, f32, f32) {
fn from(v: Float4) -> (f32, f32, f32, f32) {
(v.a(), v.b(), v.c(), v.d())
}
}
impl AddAssign for Float4 {
#[inline(always)]
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
impl SubAssign for Float4 {
#[inline(always)]
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}
impl MulAssign for Float4 {
#[inline(always)]
fn mul_assign(&mut self, rhs: Self) {
*self = *self * rhs;
}
}
impl MulAssign<f32> for Float4 {
#[inline(always)]
fn mul_assign(&mut self, rhs: f32) {
*self = *self * rhs;
}
}
impl DivAssign for Float4 {
#[inline(always)]
fn div_assign(&mut self, rhs: Self) {
*self = *self / rhs;
}
}
impl DivAssign<f32> for Float4 {
#[inline(always)]
fn div_assign(&mut self, rhs: f32) {
*self = *self / rhs;
}
}
impl PartialEq for Float4 {
#[inline(always)]
fn eq(&self, rhs: &Self) -> bool {
self.cmpeq(*rhs).bitmask() == 0b1111
}
}
//--------
impl BitAndAssign for Bool4 {
#[inline(always)]
fn bitand_assign(&mut self, rhs: Self) {
*self = *self & rhs;
}
}
impl BitOrAssign for Bool4 {
#[inline(always)]
fn bitor_assign(&mut self, rhs: Self) {
*self = *self | rhs;
}
}
impl BitXorAssign for Bool4 {
#[inline(always)]
fn bitxor_assign(&mut self, rhs: Self) {
*self = *self ^ rhs;
}
}
impl PartialEq for Bool4 {
#[inline(always)]
fn eq(&self, rhs: &Self) -> bool {
self.bitmask() == rhs.bitmask()
}
}
impl Eq for Bool4 {}
impl std::fmt::Debug for Bool4 {
fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
f.write_str("Bool4(")?;
f.debug_list().entries(self.to_bools().iter()).finish()?;
f.write_str(")")?;
Ok(())
}
}
//-------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
//------------
// Float4
#[test]
fn approximate_equality_test() {
let a = Float4::new(1.0, 2.0, 3.0, 4.0);
let b = Float4::new(1.00001, 2.00002, 3.00003, 4.00004);
let c = Float4::new(1.0e-43, 2.0e-43, 3.0e-43, 4.0e-43);
let d = Float4::new(-1.0e-43, -2.0e-43, -3.0e-43, -4.0e-43);
assert!(Float4::aprx_eq(a, a, 0));
assert!(Float4::aprx_eq(a, b, 130));
assert!(!Float4::aprx_eq(a, b, 120));
assert!(Float4::aprx_eq(c, d, 575));
assert!(!Float4::aprx_eq(c, d, 565));
}
#[test]
fn index() {
let v = Float4::new(0.0, 1.0, 2.0, 3.0);
assert_eq!(v[0], 0.0);
assert_eq!(v[1], 1.0);
assert_eq!(v[2], 2.0);
assert_eq!(v[3], 3.0);
}
#[test]
fn get() {
let v = Float4::new(0.0, 1.0, 2.0, 3.0);
assert_eq!(v.a(), 0.0);
assert_eq!(v.b(), 1.0);
assert_eq!(v.c(), 2.0);
assert_eq!(v.d(), 3.0);
}
#[test]
fn set() {
let v = Float4::new(0.0, 1.0, 2.0, 3.0);
assert_eq!(v.set_a(9.0), Float4::new(9.0, 1.0, 2.0, 3.0));
assert_eq!(v.set_b(9.0), Float4::new(0.0, 9.0, 2.0, 3.0));
assert_eq!(v.set_c(9.0), Float4::new(0.0, 1.0, 9.0, 3.0));
assert_eq!(v.set_d(9.0), Float4::new(0.0, 1.0, 2.0, 9.0));
}
#[test]
fn shuffle() {
let v = Float4::new(0.0, 1.0, 2.0, 3.0);
assert_eq!(v.aaaa(), Float4::splat(0.0));
assert_eq!(v.bbbb(), Float4::splat(1.0));
assert_eq!(v.cccc(), Float4::splat(2.0));
assert_eq!(v.dddd(), Float4::splat(3.0));
assert_eq!(v.bcad(), Float4::new(1.0, 2.0, 0.0, 3.0));
assert_eq!(v.cabd(), Float4::new(2.0, 0.0, 1.0, 3.0));
}
#[test]
fn abs() {
let v1 = Float4::new(-1.0, 2.0, -3.0, 4.0);
let v2 = Float4::new(1.0, -2.0, 3.0, -4.0);
let r = Float4::new(1.0, 2.0, 3.0, 4.0);
assert_eq!(v1.abs(), r);
assert_eq!(v2.abs(), r);
}
#[test]
fn neg() {
let v1 = Float4::new(-1.0, 2.0, -3.0, 4.0);
let v2 = Float4::new(1.0, -2.0, 3.0, -4.0);
assert_eq!(-v1, v2);
assert_eq!(-v2, v1);
}
#[test]
fn cmp_ops() {
let a = Float4::new(1.0, 2.0, -2.0, 0.0);
let b = Float4::new(1.0, -2.0, 2.0, -0.0);
assert_eq!(a.cmplt(b), Bool4::new(false, false, true, false));
assert_eq!(a.cmplte(b), Bool4::new(true, false, true, true));
assert_eq!(a.cmpgt(b), Bool4::new(false, true, false, false));
assert_eq!(a.cmpgte(b), Bool4::new(true, true, false, true));
assert_eq!(a.cmpeq(b), Bool4::new(true, false, false, true));
}
#[test]
fn min_max() {
let a = Float4::new(1.0, 2.0, -2.0, 4.0);
let b = Float4::new(1.0, -2.0, 2.0, 5.0);
assert_eq!(a.min(b), Float4::new(1.0, -2.0, -2.0, 4.0));
assert_eq!(a.max(b), Float4::new(1.0, 2.0, 2.0, 5.0));
let c = Float4::new(std::f32::INFINITY, 2.0, std::f32::NAN, 4.0);
let d = Float4::new(1.0, -std::f32::INFINITY, 2.0, std::f32::NAN);
let r_min = c.min(d);
let r_max = c.max(d);
assert_eq!(r_min.a(), 1.0);
assert_eq!(r_min.b(), -std::f32::INFINITY);
assert_eq!(r_min.c(), 2.0);
assert!(r_min.d().is_nan());
assert_eq!(r_max.a(), std::f32::INFINITY);
assert_eq!(r_max.b(), 2.0);
assert_eq!(r_max.c(), 2.0);
assert!(r_max.d().is_nan());
}
#[test]
fn dot_3() {
let v1 = Float4::new(1.0, 2.0, -3.0, 0.0);
let v2 = Float4::new(4.0, -5.0, 6.0, 0.0);
assert_eq!(Float4::dot_3(v1, v2), -24.0);
assert_eq!(Float4::dot_3_fast(v1, v2), -24.0);
}
#[test]
fn cross_3() {
let v1 = Float4::new(1.0, 2.0, -3.0, 0.0);
let v2 = Float4::new(4.0, -5.0, 6.0, 0.0);
let r = Float4::new(-3.0, -18.0, -13.0, 0.0);
assert_eq!(Float4::cross_3(v1, v2), r);
assert_eq!(Float4::cross_3(v2, v1), -r);
assert_eq!(Float4::cross_3_fast(v1, v2), r);
assert_eq!(Float4::cross_3_fast(v2, v1), -r);
}
#[test]
fn transpose_3x3() {
let m1 = [
Float4::new(1.0, 4.0, 7.0, 0.0),
Float4::new(2.0, 5.0, 8.0, 0.0),
Float4::new(3.0, 6.0, 9.0, 0.0),
];
let m2 = [
Float4::new(1.0, 2.0, 3.0, 0.0),
Float4::new(4.0, 5.0, 6.0, 0.0),
Float4::new(7.0, 8.0, 9.0, 0.0),
];
assert_eq!(Float4::transpose_3x3(&m1), m2);
assert_eq!(Float4::transpose_3x3(&m2), m1);
}
#[test]
fn invert_3x3() {
let m = [
Float4::new(1.0, 3.0, 9.0, 0.0),
Float4::new(2.0, 6.0, 2.0, 0.0),
Float4::new(2.0, 7.0, 11.0, 0.0),
];
let inv_m = [
Float4::new(3.25, 1.875, -3.0, 0.0),
Float4::new(-1.125, -0.4375, 1.0, 0.0),
Float4::new(0.125, -0.0625, 0.0, 0.0),
];
assert_eq!(Float4::invert_3x3(&m).unwrap(), inv_m);
assert_eq!(Float4::invert_3x3(&inv_m).unwrap(), m);
assert_eq!(Float4::invert_3x3_fast(&m).unwrap(), inv_m);
assert_eq!(Float4::invert_3x3_fast(&inv_m).unwrap(), m);
}
#[test]
fn vec_mul_3x3() {
let v = Float4::new(1.0, 2.5, 4.0, 0.0);
let m = [
Float4::new(1.0, 3.0, 9.0, 0.0),
Float4::new(2.0, 6.0, 2.0, 0.0),
Float4::new(2.0, 7.0, 11.0, 0.0),
];
let r = Float4::new(14.0, 46.0, 58.0, 0.0);
assert_eq!(v.vec_mul_3x3(&m), r);
assert_eq!(v.vec_mul_3x3_fast(&m), r);
}
#[test]
fn vec_mul_affine() {
let p = Float4::new(1.0, 2.5, 4.0, 0.0);
let xform = (
[
Float4::new(1.0, 3.0, 9.0, 0.0),
Float4::new(2.0, 6.0, 2.0, 0.0),
Float4::new(2.0, 7.0, 11.0, 0.0),
],
Float4::new(1.5, 8.0, 12.0, 0.0),
);
let r = Float4::new(15.5, 54.0, 70.0, 0.0);
assert_eq!(p.vec_mul_affine(&xform.0, xform.1), r);
}
#[test]
fn vec_mul_affine_rev() {
let p = Float4::new(15.5, 54.0, 70.0, 0.0);
let inv_m = [
Float4::new(3.25, 1.875, -3.0, 0.0),
Float4::new(-1.125, -0.4375, 1.0, 0.0),
Float4::new(0.125, -0.0625, 0.0, 0.0),
];
let t = Float4::new(1.5, 8.0, 12.0, 0.0);
let r = Float4::new(1.0, 2.5, 4.0, 0.0);
assert_eq!(p.vec_mul_affine_rev(&inv_m, t), r);
assert_eq!(p.vec_mul_affine_rev_fast(&inv_m, t), r);
}
#[test]
fn affine_mul_affine() {
let a = (
[
Float4::new(1.0, 3.0, 9.0, 0.0),
Float4::new(2.0, 6.0, 2.0, 0.0),
Float4::new(2.0, 7.0, 11.0, 0.0),
],
Float4::new(1.5, 8.0, 12.0, 0.0),
);
let b = (
[
Float4::new(1.0, 2.0, 3.0, 0.0),
Float4::new(5.0, 6.0, 7.0, 0.0),
Float4::new(9.0, 10.0, 11.0, 0.0),
],
Float4::new(13.0, 14.0, 15.0, 0.0),
);
let r = (
[
Float4::new(97.0, 110.0, 123.0, 0.0),
Float4::new(50.0, 60.0, 70.0, 0.0),
Float4::new(136.0, 156.0, 176.0, 0.0),
],
Float4::new(162.5, 185.0, 207.5, 0.0),
);
assert_eq!(Float4::affine_mul_affine(&a.0, a.1, &b.0, b.1), r);
assert_eq!(Float4::affine_mul_affine_fast(&a.0, a.1, &b.0, b.1), r);
}
//------------
// Bool4
#[test]
fn bitmask() {
assert_eq!(Bool4::new(true, false, false, false).bitmask(), 0b0001);
assert_eq!(Bool4::new(false, true, false, false).bitmask(), 0b0010);
assert_eq!(Bool4::new(false, false, true, false).bitmask(), 0b0100);
assert_eq!(Bool4::new(false, false, false, true).bitmask(), 0b1000);
assert_eq!(Bool4::new(false, true, false, true).bitmask(), 0b1010);
assert_eq!(Bool4::new(true, false, true, false).bitmask(), 0b0101);
}
#[test]
fn to_bools() {
assert_eq!(
Bool4::new(true, false, false, false).to_bools(),
[true, false, false, false]
);
assert_eq!(
Bool4::new(false, true, false, false).to_bools(),
[false, true, false, false]
);
assert_eq!(
Bool4::new(false, false, true, false).to_bools(),
[false, false, true, false]
);
assert_eq!(
Bool4::new(false, false, false, true).to_bools(),
[false, false, false, true]
);
assert_eq!(
Bool4::new(false, true, false, true).to_bools(),
[false, true, false, true]
);
assert_eq!(
Bool4::new(true, false, true, false).to_bools(),
[true, false, true, false]
);
}
#[test]
fn any() {
assert_eq!(Bool4::new(true, false, false, false).any(), true);
assert_eq!(Bool4::new(false, true, false, false).any(), true);
assert_eq!(Bool4::new(false, false, true, false).any(), true);
assert_eq!(Bool4::new(false, false, false, true).any(), true);
assert_eq!(Bool4::new(false, false, false, false).any(), false);
}
#[test]
fn all() {
assert_eq!(Bool4::new(false, true, true, true).all(), false);
assert_eq!(Bool4::new(true, false, true, true).all(), false);
assert_eq!(Bool4::new(true, true, false, true).all(), false);
assert_eq!(Bool4::new(true, true, true, false).all(), false);
assert_eq!(Bool4::new(true, true, true, true).all(), true);
}
#[test]
fn boolean_ops() {
let all = Bool4::new(true, true, true, true);
let none = Bool4::new(false, false, false, false);
let a = Bool4::new(true, false, true, false);
let b = Bool4::new(false, true, false, true);
// Not.
assert_eq!(!a, b);
assert_eq!(!b, a);
assert_eq!(!all, none);
assert_eq!(!none, all);
// And.
assert_eq!(a & b, none);
assert_eq!(all & none, none);
assert_eq!(all & all, all);
assert_eq!(none & none, none);
// Or.
assert_eq!(a | b, all);
assert_eq!(all | none, all);
assert_eq!(all | all, all);
assert_eq!(none | none, none);
// Xor.
assert_eq!(a ^ b, all);
assert_eq!(all ^ none, all);
assert_eq!(all ^ all, none);
assert_eq!(none ^ none, none);
}
#[test]
fn matches_fallback() {
fn tf1(n: Float4) -> [f32; 4] {
[n.a(), n.b(), n.c(), n.d()]
}
fn tf2(n: fallback::Float4) -> [f32; 4] {
[n.a(), n.b(), n.c(), n.d()]
}
let a1 = Float4::new(1.53245, 5.4234523, -424.432, 0.0004231);
let b1 = Float4::new(74.63, -9.65436, 3.0, -1003.3);
let c1 = Float4::new(-0.4216, -132.52, 8.9452, 42.0);
let a2 = fallback::Float4::new(1.53245, 5.4234523, -424.432, 0.0004231);
let b2 = fallback::Float4::new(74.63, -9.65436, 3.0, -1003.3);
let c2 = fallback::Float4::new(-0.4216, -132.52, 8.9452, 42.0);
assert_eq!(tf1(a1), tf2(a2));
assert_eq!(tf1(b1), tf2(b2));
assert_eq!(tf1(c1), tf2(c2));
assert_eq!(tf1(a1 + b1), tf2(a2 + b2));
assert_eq!(tf1(a1 - b1), tf2(a2 - b2));
assert_eq!(tf1(a1 * b1), tf2(a2 * b2));
assert_eq!(tf1(a1 / b1), tf2(a2 / b2));
assert_eq!(tf1(a1.mul_add(b1, c1)), tf2(a2.mul_add(b2, c2)));
assert_eq!(tf1(a1.min(b1)), tf2(a2.min(b2)));
assert_eq!(tf1(a1.max(b1)), tf2(a2.max(b2)));
assert_eq!(a1.min_element(), a2.min_element());
assert_eq!(a1.max_element(), a2.max_element());
assert_eq!(tf1(a1.recip()), tf2(a2.recip()));
assert_eq!(tf1(a1.abs()), tf2(a2.abs()));
}
}

View File

@ -0,0 +1,391 @@
use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Index, Mul, Neg, Not, Sub};
use std::arch::x86_64::{
__m128, _mm_add_ps, _mm_and_ps, _mm_castsi128_ps, _mm_cmpeq_ps, _mm_cmpge_ps, _mm_cmpgt_ps,
_mm_cmple_ps, _mm_cmplt_ps, _mm_div_ps, _mm_fmadd_ps, _mm_max_ps, _mm_min_ps, _mm_movemask_ps,
_mm_mul_ps, _mm_or_ps, _mm_set1_epi32, _mm_set1_ps, _mm_set_epi32, _mm_set_ps, _mm_setzero_ps,
_mm_shuffle_ps, _mm_storeu_ps, _mm_sub_ps, _mm_xor_ps,
};
use crate::FMulAdd;
//=============================================================
// Float4
#[derive(Debug, Copy, Clone)]
#[repr(transparent)]
pub struct Float4(__m128);
impl Float4 {
/// Create a new `Float4` with the given components.
#[inline(always)]
pub fn new(a: f32, b: f32, c: f32, d: f32) -> Self {
Self(unsafe { _mm_set_ps(d, c, b, a) })
}
/// Create a new `Float4` with all elements set to `n`.
#[inline(always)]
pub fn splat(n: f32) -> Self {
Self(unsafe { _mm_set1_ps(n) })
}
/// Component-wise fused multiply-add.
///
/// `(self * a) + b` with only one rounding error.
#[inline(always)]
pub fn mul_add(self, a: Self, b: Self) -> Self {
if is_x86_feature_detected!("fma") {
Self(unsafe { _mm_fmadd_ps(self.0, a.0, b.0) })
} else {
Self::new(
self.a().mul_add(a.a(), b.a()),
self.b().mul_add(a.b(), b.b()),
self.c().mul_add(a.c(), b.c()),
self.d().mul_add(a.d(), b.d()),
)
}
}
/// Vertical minimum.
#[inline(always)]
pub fn min(self, rhs: Self) -> Self {
Self(unsafe { _mm_min_ps(self.0, rhs.0) })
}
/// Vertical maximum.
#[inline(always)]
pub fn max(self, rhs: Self) -> Self {
Self(unsafe { _mm_max_ps(self.0, rhs.0) })
}
/// Horizontal minimum.
#[inline(always)]
pub fn min_element(self) -> f32 {
let a = self.a().min(self.b());
let b = self.c().min(self.d());
a.min(b)
}
/// Horizontal maximum.
#[inline(always)]
pub fn max_element(self) -> f32 {
let a = self.a().max(self.b());
let b = self.c().max(self.d());
a.max(b)
}
/// 1.0 / self
#[inline(always)]
pub fn recip(self) -> Self {
// The reciprocal intrinsic is not precise enough.
// Self(unsafe { std::arch::x86_64::_mm_rcp_ps(self.0) })
Self::splat(1.0) / self
}
#[inline(always)]
pub fn abs(self) -> Self {
Self(unsafe {
let abs_mask = _mm_castsi128_ps(_mm_set1_epi32(!(1 << 31)));
_mm_and_ps(self.0, abs_mask)
})
}
//-----------------------------------------------------
// Comparisons.
/// Less than.
#[inline(always)]
pub fn cmplt(self, rhs: Self) -> Bool4 {
Bool4(unsafe { _mm_cmplt_ps(self.0, rhs.0) })
}
/// Less than or equal.
#[inline(always)]
pub fn cmplte(self, rhs: Self) -> Bool4 {
Bool4(unsafe { _mm_cmple_ps(self.0, rhs.0) })
}
/// Greater than.
#[inline(always)]
pub fn cmpgt(self, rhs: Self) -> Bool4 {
Bool4(unsafe { _mm_cmpgt_ps(self.0, rhs.0) })
}
/// Greater than or equal.
#[inline(always)]
pub fn cmpgte(self, rhs: Self) -> Bool4 {
Bool4(unsafe { _mm_cmpge_ps(self.0, rhs.0) })
}
/// Equal.
#[inline(always)]
pub fn cmpeq(self, rhs: Self) -> Bool4 {
Bool4(unsafe { _mm_cmpeq_ps(self.0, rhs.0) })
}
//-----------------------------------------------------
// Individual components.
#[inline(always)]
pub fn a(self) -> f32 {
self[0]
}
#[inline(always)]
pub fn b(self) -> f32 {
self[1]
}
#[inline(always)]
pub fn c(self) -> f32 {
self[2]
}
#[inline(always)]
pub fn d(self) -> f32 {
self[3]
}
#[inline(always)]
#[must_use]
pub fn set_a(self, n: f32) -> Self {
Self::new(n, self.b(), self.c(), self.d())
}
#[inline(always)]
#[must_use]
pub fn set_b(self, n: f32) -> Self {
Self::new(self.a(), n, self.c(), self.d())
}
#[inline(always)]
#[must_use]
pub fn set_c(self, n: f32) -> Self {
Self::new(self.a(), self.b(), n, self.d())
}
#[inline(always)]
#[must_use]
pub fn set_d(self, n: f32) -> Self {
Self::new(self.a(), self.b(), self.c(), n)
}
//-----------------------------------------------------
// Shuffles.
#[inline(always)]
pub fn aaaa(self) -> Self {
Self(unsafe { _mm_shuffle_ps(self.0, self.0, 0b00_00_00_00) })
}
#[inline(always)]
pub fn bbbb(self) -> Self {
Self(unsafe { _mm_shuffle_ps(self.0, self.0, 0b01_01_01_01) })
}
#[inline(always)]
pub fn cccc(self) -> Self {
Self(unsafe { _mm_shuffle_ps(self.0, self.0, 0b10_10_10_10) })
}
#[inline(always)]
pub fn dddd(self) -> Self {
Self(unsafe { _mm_shuffle_ps(self.0, self.0, 0b11_11_11_11) })
}
#[inline(always)]
pub fn bcad(self) -> Self {
Self(unsafe { _mm_shuffle_ps(self.0, self.0, 0b11_00_10_01) })
}
#[inline(always)]
pub fn cabd(self) -> Self {
Self(unsafe { _mm_shuffle_ps(self.0, self.0, 0b11_01_00_10) })
}
}
impl Index<usize> for Float4 {
type Output = f32;
#[inline(always)]
fn index(&self, idx: usize) -> &f32 {
let elements: &[f32; 4] = unsafe { std::mem::transmute(&self.0) };
match idx {
0 => &elements[0],
1 => &elements[1],
2 => &elements[2],
3 => &elements[3],
_ => panic!("Out of bounds access of Float4 elements."),
}
}
}
impl Add for Float4 {
type Output = Self;
#[inline(always)]
fn add(self, rhs: Self) -> Self {
Self(unsafe { _mm_add_ps(self.0, rhs.0) })
}
}
impl Sub for Float4 {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: Self) -> Self {
Self(unsafe { _mm_sub_ps(self.0, rhs.0) })
}
}
impl Mul for Float4 {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: Self) -> Self {
Self(unsafe { _mm_mul_ps(self.0, rhs.0) })
}
}
impl Mul<f32> for Float4 {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: f32) -> Self {
Self(unsafe { _mm_mul_ps(self.0, _mm_set1_ps(rhs)) })
}
}
impl Div for Float4 {
type Output = Self;
#[inline(always)]
fn div(self, rhs: Self) -> Self {
Self(unsafe { _mm_div_ps(self.0, rhs.0) })
}
}
impl Div<f32> for Float4 {
type Output = Self;
#[inline(always)]
fn div(self, rhs: f32) -> Self {
Self(unsafe { _mm_div_ps(self.0, _mm_set1_ps(rhs)) })
}
}
impl Neg for Float4 {
type Output = Self;
#[inline(always)]
fn neg(self) -> Self {
Self(unsafe {
let abs_mask = _mm_castsi128_ps(_mm_set1_epi32(1 << 31));
_mm_xor_ps(self.0, abs_mask)
})
}
}
impl FMulAdd for Float4 {
#[inline(always)]
fn fma(self, b: Self, c: Self) -> Self {
self.mul_add(b, c)
}
}
//=============================================================
// Bool4
#[derive(Copy, Clone)]
#[repr(transparent)]
pub struct Bool4(__m128);
impl Bool4 {
#[inline(always)]
pub fn new(a: bool, b: bool, c: bool, d: bool) -> Self {
const ONES: i32 = unsafe { std::mem::transmute(0xffffffffu32) };
unsafe {
let ints = _mm_set_epi32(
d as i32 * ONES,
c as i32 * ONES,
b as i32 * ONES,
a as i32 * ONES,
);
Bool4(_mm_castsi128_ps(ints))
}
}
#[inline(always)]
pub fn new_false() -> Self {
Self(unsafe { _mm_setzero_ps() })
}
#[inline(always)]
pub fn to_bools(self) -> [bool; 4] {
let mut v = [0.0f32; 4];
unsafe { _mm_storeu_ps((&mut v[..]).as_mut_ptr(), self.0) }
[
v[0].to_bits() != 0,
v[1].to_bits() != 0,
v[2].to_bits() != 0,
v[3].to_bits() != 0,
]
}
/// Note: `a` goes to the least significant bit.
#[inline(always)]
pub fn bitmask(self) -> u8 {
unsafe { _mm_movemask_ps(self.0) as u8 }
}
#[inline(always)]
pub fn any(self) -> bool {
self.bitmask() != 0
}
#[inline(always)]
pub fn all(self) -> bool {
self.bitmask() == 0b1111
}
}
impl BitAnd for Bool4 {
type Output = Self;
#[inline(always)]
fn bitand(self, rhs: Self) -> Self {
Self(unsafe { _mm_and_ps(self.0, rhs.0) })
}
}
impl BitOr for Bool4 {
type Output = Self;
#[inline(always)]
fn bitor(self, rhs: Self) -> Self {
Self(unsafe { _mm_or_ps(self.0, rhs.0) })
}
}
impl BitXor for Bool4 {
type Output = Self;
#[inline(always)]
fn bitxor(self, rhs: Self) -> Self {
Self(unsafe { _mm_xor_ps(self.0, rhs.0) })
}
}
impl Not for Bool4 {
type Output = Self;
#[inline(always)]
fn not(self) -> Self {
Self(unsafe {
let ones = _mm_castsi128_ps(_mm_set1_epi32(!0));
_mm_xor_ps(self.0, ones)
})
}
}

View File

@ -0,0 +1,305 @@
#![allow(dead_code)]
use std::ops::{Add, Mul};
use crate::point::Point;
use crate::sealed::Sealed;
use crate::wide4::Float4;
/// A forward affine transform.
///
/// Use this for working with transforms that still need to be
/// manipulated or composed with other transforms, or for storing
/// transforms more compactly.
///
/// Note: slightly counter-intuitively, even though this can perform
/// forward (but not inverse) transforms on points and vectors, it is
/// capable of *inverse* (but not forward) transforms on surface normals.
/// This is because forward transforms on surface normals require the
/// inverse transform matrix.
///
/// Convert to an [`XformFull`] for a larger-format type capable of
/// efficiently performing both forward and inverse transforms on all
/// types, but which is effectively "frozen" in terms of further
/// manipulation of the transform itself.
#[derive(Debug, Copy, Clone, PartialEq)]
#[repr(C)]
pub struct Xform {
/// Rotation/scale/shear matrix.
pub m: [Float4; 3],
/// Translation.
pub t: Float4,
}
impl Xform {
/// Creates a new affine transform with the specified values:
///
/// ```text
/// a d g j
/// b e h k
/// c f i l
/// ```
///
/// Where j, k, and l are the xyz translation component.
#[inline]
#[allow(clippy::many_single_char_names)]
#[allow(clippy::too_many_arguments)]
pub fn new(
a: f32,
b: f32,
c: f32,
d: f32,
e: f32,
f: f32,
g: f32,
h: f32,
i: f32,
j: f32,
k: f32,
l: f32,
) -> Self {
Self {
m: [
Float4::new(a, b, c, 0.0),
Float4::new(d, e, f, 0.0),
Float4::new(g, h, i, 0.0),
],
t: Float4::new(j, k, l, 0.0),
}
}
/// Creates a new identity transform.
#[inline]
pub fn identity() -> Self {
Self {
m: [
Float4::new(1.0, 0.0, 0.0, 0.0),
Float4::new(0.0, 1.0, 0.0, 0.0),
Float4::new(0.0, 0.0, 1.0, 0.0),
],
t: Float4::splat(0.0),
}
}
#[inline]
pub fn from_location(loc: Point) -> Xform {
Self {
m: [
Float4::new(1.0, 0.0, 0.0, 0.0),
Float4::new(0.0, 1.0, 0.0, 0.0),
Float4::new(0.0, 0.0, 1.0, 0.0),
],
t: loc.0,
}
}
/// Returns whether the matrices are approximately equal to each other.
/// Each corresponding element in the matrices cannot have a relative
/// error exceeding epsilon.
pub(crate) fn aprx_eq(&self, other: Xform, max_ulps: u32) -> bool {
let mut eq = true;
eq &= Float4::aprx_eq(self.m[0], other.m[0], max_ulps);
eq &= Float4::aprx_eq(self.m[1], other.m[1], max_ulps);
eq &= Float4::aprx_eq(self.m[2], other.m[2], max_ulps);
eq &= Float4::aprx_eq(self.t, other.t, max_ulps);
eq
}
/// Compute the "full" version of the transform.
#[inline]
pub fn to_full(&self) -> Option<XformFull> {
if let Some(inv_m) = Float4::invert_3x3(&self.m) {
Some(XformFull {
fwd: *self,
inv_m: inv_m,
})
} else {
None
}
}
/// Faster but less precise version of `to_full()`.
#[inline]
pub fn to_full_fast(&self) -> Option<XformFull> {
if let Some(inv_m) = Float4::invert_3x3_fast(&self.m) {
Some(XformFull {
fwd: *self,
inv_m: inv_m,
})
} else {
None
}
}
/// Composes two transforms together.
///
/// The resulting transform is the same as doing `self` and then
/// `rhs` in sequence.
#[inline]
pub fn compose(&self, rhs: &Self) -> Self {
let (m, t) = Float4::affine_mul_affine(&self.m, self.t, &rhs.m, rhs.t);
Self { m: m, t: t }
}
/// Composes two transforms together.
///
/// Faster but less precise version.
#[inline]
pub fn compose_fast(&self, rhs: &Self) -> Self {
let (m, t) = Float4::affine_mul_affine_fast(&self.m, self.t, &rhs.m, rhs.t);
Self { m: m, t: t }
}
}
impl Default for Xform {
fn default() -> Self {
Self::identity()
}
}
/// Multiply a matrix by a f32
impl Mul<f32> for Xform {
type Output = Self;
#[inline]
fn mul(self, rhs: f32) -> Self {
Self {
m: [self.m[0] * rhs, self.m[1] * rhs, self.m[2] * rhs],
t: self.t * rhs,
}
}
}
/// Add two matrices together
impl Add for Xform {
type Output = Self;
#[inline]
fn add(self, rhs: Self) -> Self {
Self {
m: [
self.m[0] + rhs.m[0],
self.m[1] + rhs.m[1],
self.m[2] + rhs.m[2],
],
t: self.t + rhs.t,
}
}
}
impl AsXform for Xform {
#[inline(always)]
fn as_xform(&self) -> &Xform {
self
}
}
impl Sealed for Xform {}
//-------------------------------------------------------------
/// A combined forward/inverse affine transform.
///
/// Unlike [`Xform`], this can perform both forward and inverse
/// transforms on all types. However, it also takes up more space and
/// is effectively "frozen" in terms of further manipulation. Prefer
/// [`Xform`] when manipulating or composing transforms, and also
/// when storing transforms if space is a consideration.
///
/// Note: only the 3x3 part of the transform is stored inverted. This
/// is because it's both trivial and more numerically stable to reuse
/// the forward translation vector to do inverse transforms, as
/// `(point - fwd.t) * inv_m`.
#[derive(Debug, Copy, Clone)]
#[repr(C)]
pub struct XformFull {
/// Forward transform.
pub fwd: Xform,
/// Inverse rotation/scale/shear matrix.
pub inv_m: [Float4; 3],
}
impl XformFull {
pub fn identity() -> Self {
Self {
fwd: Xform {
m: [
Float4::new(1.0, 0.0, 0.0, 0.0),
Float4::new(0.0, 1.0, 0.0, 0.0),
Float4::new(0.0, 0.0, 1.0, 0.0),
],
t: Float4::splat(0.0),
},
inv_m: [
Float4::new(1.0, 0.0, 0.0, 0.0),
Float4::new(0.0, 1.0, 0.0, 0.0),
Float4::new(0.0, 0.0, 1.0, 0.0),
],
}
}
}
impl AsXform for XformFull {
#[inline(always)]
fn as_xform(&self) -> &Xform {
&self.fwd
}
}
impl Sealed for XformFull {}
//-------------------------------------------------------------
pub trait AsXform: Sealed {
fn as_xform(&self) -> &Xform;
}
//-------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn equality() {
let a = Xform::identity();
let b = Xform::identity();
let c = Xform::new(1.1, 0.0, 0.0, 0.0, 0.0, 1.1, 0.0, 0.0, 0.0, 0.0, 1.1, 0.0);
assert_eq!(a, b);
assert!(a != c);
}
#[test]
fn approximate_equality() {
let a = Xform::identity();
let b = Xform::new(
1.000001, 0.0, 0.0, 0.0, 1.000001, 0.0, 0.0, 0.0, 1.000001, 0.0, 0.0, 0.0,
);
let c = Xform::new(
1.000003, 0.0, 0.0, 0.0, 1.000003, 0.0, 0.0, 0.0, 1.000003, 0.0, 0.0, 0.0,
);
assert!(a.aprx_eq(b, 10));
assert!(!a.aprx_eq(b, 6));
assert!(a.aprx_eq(c, 27));
assert!(!a.aprx_eq(c, 23));
}
#[test]
fn compose() {
let a = Xform::new(1.0, 3.0, 9.0, 2.0, 6.0, 2.0, 2.0, 7.0, 11.0, 1.5, 8.0, 12.0);
let b = Xform::new(
1.0, 2.0, 3.0, 5.0, 6.0, 7.0, 9.0, 10.0, 11.0, 13.0, 14.0, 15.0,
);
let c = Xform::new(
97.0, 110.0, 123.0, 50.0, 60.0, 70.0, 136.0, 156.0, 176.0, 162.5, 185.0, 207.5,
);
assert_eq!(a.compose(&b), c);
assert_eq!(a.compose_fast(&b), c);
}
}

View File

@ -0,0 +1,12 @@
[package]
name = "rrand"
version = "0.1.0"
edition = "2021"
[dev-dependencies]
bencher = "0.1.5"
rand = "0.6"
[[bench]]
name = "bench"
harness = false

View File

@ -0,0 +1,67 @@
use bencher::{benchmark_group, benchmark_main, black_box, Bencher};
use rrand::{mix32, mix32_seed, mix64, mix64_seed, Rng};
//----
fn rng_u32_100000(bench: &mut Bencher) {
bench.iter(|| {
let mut rng = Rng::new(black_box(0));
for _ in 0..100000 {
black_box(rng.u32());
}
});
}
fn rng_u64_100000(bench: &mut Bencher) {
bench.iter(|| {
let mut rng = Rng::new(black_box(0));
for _ in 0..100000 {
black_box(rng.u64());
}
});
}
fn mix32_100000(bench: &mut Bencher) {
bench.iter(|| {
for i in 0..100000 {
black_box(mix32(black_box(i)));
}
});
}
fn mix64_100000(bench: &mut Bencher) {
bench.iter(|| {
for i in 0..100000 {
black_box(mix64(black_box(i)));
}
});
}
fn mix32_seed_100000(bench: &mut Bencher) {
bench.iter(|| {
for i in 0..100000 {
black_box(mix32_seed(black_box(i), black_box(0)));
}
});
}
fn mix64_seed_100000(bench: &mut Bencher) {
bench.iter(|| {
for i in 0..100000 {
black_box(mix64_seed(black_box(i), black_box(0)));
}
});
}
//----
benchmark_group!(
benches,
rng_u32_100000,
rng_u64_100000,
mix32_100000,
mix64_100000,
mix32_seed_100000,
mix64_seed_100000,
);
benchmark_main!(benches);

127
sub_crates/rrand/src/lib.rs Normal file
View File

@ -0,0 +1,127 @@
//! Sources of deterministic "randomness" for rendering applications.
/// Convert a `u32` to a float in [0.0, 1.0).
///
/// Use for getting f32 values from random u32 sources.
///
/// Note: this is a linear mapping from [0, int_max] to [0.0, 1.0).
#[inline(always)]
pub fn u32_to_f32_norm(n: u32) -> f32 {
f32::from_bits((n >> 9) | 0x3f800000) - 1.0
}
//-------------------------------------------------------------
/// A fast RNG.
///
#[derive(Debug, Copy, Clone)]
pub struct Rng {
state: u64,
}
impl Rng {
/// Creates a new Rng from a seed.
///
/// A seed of zero is perfectly fine, and does not affect the quality
/// of the generator.
#[inline]
pub fn new(seed: u64) -> Self {
Self { state: seed }
}
/// Gets the nth relative RNG stream from this one.
///
/// The returned stream will be at the same point in its sequence as
/// this one.
#[inline]
pub fn nth_stream(&self, n: u64) -> Self {
Self {
// We just jump forward 2^40*n states. This gives us 2^24
// unique streams, each of which is 2^40 numbers long.
state: self
.state
.wrapping_add(0xa0761d6478bd642f_u64.wrapping_mul(1 << 40).wrapping_mul(n)),
}
}
/// Returns a random u32 in [0, int_max].
#[inline(always)]
pub fn u32(&mut self) -> u32 {
self.u64() as u32
}
/// Returns a random u64 in [0, int_max].
#[inline(always)]
pub fn u64(&mut self) -> u64 {
// The wyrand RNG.
self.state = self.state.wrapping_add(0xa0761d6478bd642f);
let t = (self.state as u128).wrapping_mul(self.state as u128 ^ 0xe7037ed1a0b428db);
((t >> 64) ^ t) as u64
}
/// Returns a random f32 in [0.0, 1.0).
#[inline(always)]
pub fn f32(&mut self) -> f32 {
u32_to_f32_norm(self.u32())
}
}
//-------------------------------------------------------------
/// A fast 32-bit mixing function.
///
/// Scrambles the input number to produce a different deterministic
/// "random" number.
#[inline(always)]
pub fn mix32(mut n: u32) -> u32 {
// From https://github.com/skeeto/hash-prospector
n ^= n >> 16;
n = n.wrapping_mul(0x21f0aaad);
n ^= n >> 15;
n = n.wrapping_mul(0xd35a2d97);
n ^= n >> 15;
// Xor by a random number so input zero doesn't map to output zero.
// The particular number used here isn't special.
n ^ 0xe6fe3beb
}
/// A fast seedable 32-bit mixing function.
///
/// Same as `mix32()` but takes a seed.
#[inline(always)]
pub fn mix32_seed(n: u32, seed: u32) -> u32 {
// We rotate the bits of `seed` so it's unlikely to interact with `n`
// in bad ways if they're both e.g. incrementing. The particular
// rotation constant used here isn't special.
mix32(n ^ seed.rotate_left(23))
}
/// A fast 64-bit mixing function.
///
/// Scrambles the input number to produce a different deterministic
/// "random" number.
#[inline(always)]
pub fn mix64(mut n: u64) -> u64 {
// From https://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html
n ^= n >> 30;
n = n.wrapping_mul(0xbf58476d1ce4e5b9);
n ^= n >> 27;
n = n.wrapping_mul(0x94d049bb133111eb);
n ^= n >> 31;
// Xor by a random number so input zero doesn't map to output zero.
// The particular number used here isn't special.
n ^ 0x4acc3f27cc712c9d
}
/// A fast seedable 64-bit mixing function.
///
/// Same as `mix64()` but takes a seed.
#[inline(always)]
pub fn mix64_seed(n: u64, seed: u64) -> u64 {
// We rotate the bits of `seed` so it's unlikely to interact with `n`
// in bad ways if they're both e.g. incrementing. The particular
// rotation constant used here isn't special.
mix64(n ^ seed.rotate_left(47))
}

View File

@ -10,4 +10,4 @@ name = "spectral_upsampling"
path = "src/lib.rs"
[dependencies]
glam = "0.15"
rmath = { path = "../rmath" }

View File

@ -6,7 +6,9 @@
/// The provides similar color matching as full Jakob, at the expense of
/// somewhat lower quality spectrums, and the inability to precalculate
/// the coefficents for even more efficient evaluation later on.
use glam::Vec4;
use rmath::wide4::Float4;
pub const EQUAL_ENERGY_REFLECTANCE: f32 = 1.0;
/// How many polynomial coefficients?
const RGB2SPEC_N_COEFFS: usize = 3;
@ -15,7 +17,7 @@ const RGB2SPEC_N_COEFFS: usize = 3;
include!(concat!(env!("OUT_DIR"), "/jakob_table_inc.rs"));
#[inline]
pub fn rec709_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 {
pub fn rec709_to_spectrum_p4(lambdas: Float4, rgb: (f32, f32, f32)) -> Float4 {
small_rgb_to_spectrum_p4(
REC709_TABLE,
REC709_TABLE_RES,
@ -26,7 +28,7 @@ pub fn rec709_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 {
}
#[inline]
pub fn rec2020_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 {
pub fn rec2020_to_spectrum_p4(lambdas: Float4, rgb: (f32, f32, f32)) -> Float4 {
small_rgb_to_spectrum_p4(
REC2020_TABLE,
REC2020_TABLE_RES,
@ -37,7 +39,7 @@ pub fn rec2020_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 {
}
#[inline]
pub fn aces_to_spectrum_p4(lambdas: Vec4, rgb: (f32, f32, f32)) -> Vec4 {
pub fn aces_to_spectrum_p4(lambdas: Float4, rgb: (f32, f32, f32)) -> Float4 {
small_rgb_to_spectrum_p4(
ACES_TABLE,
ACES_TABLE_RES,
@ -56,9 +58,9 @@ fn small_rgb_to_spectrum_p4(
table: &[[(f32, f32, f32); 2]],
table_res: usize,
table_mid_value: f32,
lambdas: Vec4,
lambdas: Float4,
rgb: (f32, f32, f32),
) -> Vec4 {
) -> Float4 {
// Determine largest RGB component, and calculate the other two
// components scaled for lookups.
let (i, max_val, x, y) = if rgb.0 > rgb.1 && rgb.0 > rgb.2 {
@ -71,7 +73,7 @@ fn small_rgb_to_spectrum_p4(
if max_val == 0.0 {
// If max_val is zero, just return zero. This avoids NaN's from
// divide by zero. This is also correct, since it's black.
return Vec4::splat(0.0);
return Float4::splat(0.0);
}
let x = x * 63.0 / max_val;
let y = y * 63.0 / max_val;
@ -91,20 +93,20 @@ fn small_rgb_to_spectrum_p4(
// Convert to SIMD format for faster interpolation.
let a0 = [
Vec4::new(a0[0].0, a0[0].1, a0[0].2, 0.0),
Vec4::new(a0[1].0, a0[1].1, a0[1].2, 0.0),
Float4::new(a0[0].0, a0[0].1, a0[0].2, 0.0),
Float4::new(a0[1].0, a0[1].1, a0[1].2, 0.0),
];
let a1 = [
Vec4::new(a1[0].0, a1[0].1, a1[0].2, 0.0),
Vec4::new(a1[1].0, a1[1].1, a1[1].2, 0.0),
Float4::new(a1[0].0, a1[0].1, a1[0].2, 0.0),
Float4::new(a1[1].0, a1[1].1, a1[1].2, 0.0),
];
let a2 = [
Vec4::new(a2[0].0, a2[0].1, a2[0].2, 0.0),
Vec4::new(a2[1].0, a2[1].1, a2[1].2, 0.0),
Float4::new(a2[0].0, a2[0].1, a2[0].2, 0.0),
Float4::new(a2[1].0, a2[1].1, a2[1].2, 0.0),
];
let a3 = [
Vec4::new(a3[0].0, a3[0].1, a3[0].2, 0.0),
Vec4::new(a3[1].0, a3[1].1, a3[1].2, 0.0),
Float4::new(a3[0].0, a3[0].1, a3[0].2, 0.0),
Float4::new(a3[1].0, a3[1].1, a3[1].2, 0.0),
];
// Do interpolation.
@ -133,22 +135,22 @@ fn small_rgb_to_spectrum_p4(
// Coefficient -> eval functions
#[inline(always)]
fn rgb2spec_fma_4(a: Vec4, b: Vec4, c: Vec4) -> Vec4 {
(a * b) + c
fn rgb2spec_fma_4(a: Float4, b: Float4, c: Float4) -> Float4 {
a.mul_add(b, c)
}
fn rgb2spec_eval_4(coeff: [f32; RGB2SPEC_N_COEFFS], lambda: Vec4) -> Vec4 {
let co0 = Vec4::splat(coeff[0]);
let co1 = Vec4::splat(coeff[1]);
let co2 = Vec4::splat(coeff[2]);
fn rgb2spec_eval_4(coeff: [f32; RGB2SPEC_N_COEFFS], lambda: Float4) -> Float4 {
let co0 = Float4::splat(coeff[0]);
let co1 = Float4::splat(coeff[1]);
let co2 = Float4::splat(coeff[2]);
let x = rgb2spec_fma_4(rgb2spec_fma_4(co0, lambda, co1), lambda, co2);
let y = {
// TODO: replace this with a SIMD sqrt op.
let (x, y, z, w) = rgb2spec_fma_4(x, x, Vec4::splat(1.0)).into();
Vec4::new(x.sqrt(), y.sqrt(), z.sqrt(), w.sqrt()).recip()
let (x, y, z, w) = rgb2spec_fma_4(x, x, Float4::splat(1.0)).into();
Float4::new(x.sqrt(), y.sqrt(), z.sqrt(), w.sqrt()).recip()
};
rgb2spec_fma_4(Vec4::splat(0.5) * x, y, Vec4::splat(0.5))
rgb2spec_fma_4(Float4::splat(0.5) * x, y, Float4::splat(0.5))
}

View File

@ -6,7 +6,7 @@
use std::f32;
use glam::Vec4;
use rmath::wide4::Float4;
mod meng_spectra_tables;
@ -174,7 +174,7 @@ pub fn spectrum_xyz_to_p(lambda: f32, xyz: (f32, f32, f32)) -> f32 {
///
/// Works on 4 wavelengths at once via SIMD.
#[inline]
pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 {
pub fn spectrum_xyz_to_p_4(lambdas: Float4, xyz: (f32, f32, f32)) -> Float4 {
assert!(lambdas.min_element() >= SPECTRUM_SAMPLE_MIN);
assert!(lambdas.max_element() <= SPECTRUM_SAMPLE_MAX);
@ -184,7 +184,7 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 {
if norm < f32::MAX {
norm
} else {
return Vec4::splat(0.0);
return Float4::splat(0.0);
}
};
@ -197,7 +197,7 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 {
|| uv.1 < 0.0
|| uv.1 >= SPECTRUM_GRID_HEIGHT as f32
{
return Vec4::splat(0.0);
return Float4::splat(0.0);
}
let uvi = (uv.0 as i32, uv.1 as i32);
@ -214,11 +214,11 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 {
// If the cell has no points, nothing we can do, so return 0.0
if num == 0 {
return Vec4::splat(0.0);
return Float4::splat(0.0);
}
// Normalize lambda to spectrum table index range.
let sb: Vec4 = (lambdas - Vec4::splat(SPECTRUM_SAMPLE_MIN))
let sb: Float4 = (lambdas - Float4::splat(SPECTRUM_SAMPLE_MIN))
/ (SPECTRUM_SAMPLE_MAX - SPECTRUM_SAMPLE_MIN)
* (SPECTRUM_NUM_SAMPLES as f32 - 1.0);
debug_assert!(sb.min_element() >= 0.0);
@ -226,7 +226,7 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 {
// Get the spectral values for the vertices of the grid cell.
// TODO: use integer SIMD intrinsics to make this part faster.
let mut p = [Vec4::splat(0.0); 6];
let mut p = [Float4::splat(0.0); 6];
let sb0: [i32; 4] = [sb[0] as i32, sb[1] as i32, sb[2] as i32, sb[3] as i32];
assert!(sb0[0].max(sb0[1]).max(sb0[2].max(sb0[3])) < SPECTRUM_NUM_SAMPLES);
let sb1: [i32; 4] = [
@ -235,27 +235,27 @@ pub fn spectrum_xyz_to_p_4(lambdas: Vec4, xyz: (f32, f32, f32)) -> Vec4 {
(sb[2] as i32 + 1).min(SPECTRUM_NUM_SAMPLES - 1),
(sb[3] as i32 + 1).min(SPECTRUM_NUM_SAMPLES - 1),
];
let sbf = sb - Vec4::new(sb0[0] as f32, sb0[1] as f32, sb0[2] as f32, sb0[3] as f32);
let sbf = sb - Float4::new(sb0[0] as f32, sb0[1] as f32, sb0[2] as f32, sb0[3] as f32);
for i in 0..(num as usize) {
debug_assert!(idx[i] >= 0);
let spectrum = &SPECTRUM_DATA_POINTS[idx[i] as usize].spectrum;
let p0 = Vec4::new(
let p0 = Float4::new(
spectrum[sb0[0] as usize],
spectrum[sb0[1] as usize],
spectrum[sb0[2] as usize],
spectrum[sb0[3] as usize],
);
let p1 = Vec4::new(
let p1 = Float4::new(
spectrum[sb1[0] as usize],
spectrum[sb1[1] as usize],
spectrum[sb1[2] as usize],
spectrum[sb1[3] as usize],
);
p[i] = p0 * (Vec4::splat(1.0) - sbf) + p1 * sbf;
p[i] = p0 * (Float4::splat(1.0) - sbf) + p1 * sbf;
}
// Linearly interpolate the spectral power of the cell vertices.
let mut interpolated_p = Vec4::splat(0.0);
let mut interpolated_p = Float4::splat(0.0);
if inside {
// Fast path for normal inner quads:
let uv2 = (uv.0 - uvi.0 as f32, uv.1 - uvi.1 as f32);