-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathBitangentNoise.hlsl
170 lines (146 loc) · 6.2 KB
/
BitangentNoise.hlsl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
// --------------------------------------------------------------------
// Optimized implementation of 3D/4D bitangent noise.
// Based on stegu's simplex noise: https://github.com/stegu/webgl-noise.
// Contact : atyuwen@gmail.com
// Author : Yuwen Wu (https://atyuwen.github.io/)
// License : Distributed under the MIT License.
// --------------------------------------------------------------------
// Permuted congruential generator (only top 16 bits are well shuffled).
// References: 1. Mark Jarzynski and Marc Olano, "Hash Functions for GPU Rendering".
// 2. UnrealEngine/Random.ush. https://github.com/EpicGames/UnrealEngine
uint2 _pcg3d16(uint3 p)
{
uint3 v = p * 1664525u + 1013904223u;
v.x += v.y*v.z; v.y += v.z*v.x; v.z += v.x*v.y;
v.x += v.y*v.z; v.y += v.z*v.x;
return v.xy;
}
uint2 _pcg4d16(uint4 p)
{
uint4 v = p * 1664525u + 1013904223u;
v.x += v.y*v.w; v.y += v.z*v.x; v.z += v.x*v.y; v.w += v.y*v.z;
v.x += v.y*v.w; v.y += v.z*v.x;
return v.xy;
}
// Get random gradient from hash value.
float3 _gradient3d(uint hash)
{
float3 g = float3(hash.xxx & uint3(0x80000, 0x40000, 0x20000));
return g * float3(1.0 / 0x40000, 1.0 / 0x20000, 1.0 / 0x10000) - 1.0;
}
float4 _gradient4d(uint hash)
{
float4 g = float4(hash.xxxx & uint4(0x80000, 0x40000, 0x20000, 0x10000));
return g * float4(1.0 / 0x40000, 1.0 / 0x20000, 1.0 / 0x10000, 1.0 / 0x8000) - 1.0;
}
// Optimized 3D Bitangent Noise. Approximately 113 instruction slots used.
// Assume p is in the range [-32768, 32767].
float3 BitangentNoise3D(float3 p)
{
const float2 C = float2(1.0 / 6.0, 1.0 / 3.0);
const float4 D = float4(0.0, 0.5, 1.0, 2.0);
// First corner
float3 i = floor(p + dot(p, C.yyy));
float3 x0 = p - i + dot(i, C.xxx);
// Other corners
float3 g = step(x0.yzx, x0.xyz);
float3 l = 1.0 - g;
float3 i1 = min(g.xyz, l.zxy);
float3 i2 = max(g.xyz, l.zxy);
// x0 = x0 - 0.0 + 0.0 * C.xxx;
// x1 = x0 - i1 + 1.0 * C.xxx;
// x2 = x0 - i2 + 2.0 * C.xxx;
// x3 = x0 - 1.0 + 3.0 * C.xxx;
float3 x1 = x0 - i1 + C.xxx;
float3 x2 = x0 - i2 + C.yyy; // 2.0*C.x = 1/3 = C.y
float3 x3 = x0 - D.yyy; // -1.0+3.0*C.x = -0.5 = -D.y
i = i + 32768.5;
uint2 hash0 = _pcg3d16((uint3)i);
uint2 hash1 = _pcg3d16((uint3)(i + i1));
uint2 hash2 = _pcg3d16((uint3)(i + i2));
uint2 hash3 = _pcg3d16((uint3)(i + 1 ));
float3 p00 = _gradient3d(hash0.x); float3 p01 = _gradient3d(hash0.y);
float3 p10 = _gradient3d(hash1.x); float3 p11 = _gradient3d(hash1.y);
float3 p20 = _gradient3d(hash2.x); float3 p21 = _gradient3d(hash2.y);
float3 p30 = _gradient3d(hash3.x); float3 p31 = _gradient3d(hash3.y);
// Calculate noise gradients.
float4 m = saturate(0.5 - float4(dot(x0, x0), dot(x1, x1), dot(x2, x2), dot(x3, x3)));
float4 mt = m * m;
float4 m4 = mt * mt;
mt = mt * m;
float4 pdotx = float4(dot(p00, x0), dot(p10, x1), dot(p20, x2), dot(p30, x3));
float4 temp = mt * pdotx;
float3 gradient0 = -8.0 * (temp.x * x0 + temp.y * x1 + temp.z * x2 + temp.w * x3);
gradient0 += m4.x * p00 + m4.y * p10 + m4.z * p20 + m4.w * p30;
pdotx = float4(dot(p01, x0), dot(p11, x1), dot(p21, x2), dot(p31, x3));
temp = mt * pdotx;
float3 gradient1 = -8.0 * (temp.x * x0 + temp.y * x1 + temp.z * x2 + temp.w * x3);
gradient1 += m4.x * p01 + m4.y * p11 + m4.z * p21 + m4.w * p31;
// The cross products of two gradients is divergence free.
return cross(gradient0, gradient1) * 3918.76;
}
// 4D Bitangent noise. Approximately 163 instruction slots used.
// Assume p is in the range [-32768, 32767].
float3 BitangentNoise4D(float4 p)
{
const float4 F4 = 0.309016994374947451;
const float4 C = float4( 0.138196601125011, // (5 - sqrt(5))/20 G4
0.276393202250021, // 2 * G4
0.414589803375032, // 3 * G4
-0.447213595499958); // -1 + 4 * G4
// First corner
float4 i = floor(p + dot(p, F4) );
float4 x0 = p - i + dot(i, C.xxxx);
// Other corners
// Rank sorting originally contributed by Bill Licea-Kane, AMD (formerly ATI)
float4 i0;
float3 isX = step( x0.yzw, x0.xxx );
float3 isYZ = step( x0.zww, x0.yyz );
// i0.x = dot( isX, float3( 1.0 ) );
i0.x = isX.x + isX.y + isX.z;
i0.yzw = 1.0 - isX;
// i0.y += dot( isYZ.xy, float2( 1.0 ) );
i0.y += isYZ.x + isYZ.y;
i0.zw += 1.0 - isYZ.xy;
i0.z += isYZ.z;
i0.w += 1.0 - isYZ.z;
// i0 now contains the unique values 0,1,2,3 in each channel
float4 i3 = saturate( i0 );
float4 i2 = saturate( i0 - 1.0 );
float4 i1 = saturate( i0 - 2.0 );
// x0 = x0 - 0.0 + 0.0 * C.xxxx
// x1 = x0 - i1 + 1.0 * C.xxxx
// x2 = x0 - i2 + 2.0 * C.xxxx
// x3 = x0 - i3 + 3.0 * C.xxxx
// x4 = x0 - 1.0 + 4.0 * C.xxxx
float4 x1 = x0 - i1 + C.xxxx;
float4 x2 = x0 - i2 + C.yyyy;
float4 x3 = x0 - i3 + C.zzzz;
float4 x4 = x0 + C.wwww;
i = i + 32768.5;
uint2 hash0 = _pcg4d16((uint4)i);
uint2 hash1 = _pcg4d16((uint4)(i + i1));
uint2 hash2 = _pcg4d16((uint4)(i + i2));
uint2 hash3 = _pcg4d16((uint4)(i + i3));
uint2 hash4 = _pcg4d16((uint4)(i + 1 ));
float4 p00 = _gradient4d(hash0.x); float4 p01 = _gradient4d(hash0.y);
float4 p10 = _gradient4d(hash1.x); float4 p11 = _gradient4d(hash1.y);
float4 p20 = _gradient4d(hash2.x); float4 p21 = _gradient4d(hash2.y);
float4 p30 = _gradient4d(hash3.x); float4 p31 = _gradient4d(hash3.y);
float4 p40 = _gradient4d(hash4.x); float4 p41 = _gradient4d(hash4.y);
// Calculate noise gradients.
float3 m0 = saturate(0.6 - float3(dot(x0, x0), dot(x1, x1), dot(x2, x2)));
float2 m1 = saturate(0.6 - float2(dot(x3, x3), dot(x4, x4) ));
float3 m02 = m0 * m0; float3 m03 = m02 * m0;
float2 m12 = m1 * m1; float2 m13 = m12 * m1;
float3 temp0 = m02 * float3(dot(p00, x0), dot(p10, x1), dot(p20, x2));
float2 temp1 = m12 * float2(dot(p30, x3), dot(p40, x4));
float4 grad0 = -6.0 * (temp0.x * x0 + temp0.y * x1 + temp0.z * x2 + temp1.x * x3 + temp1.y * x4);
grad0 += m03.x * p00 + m03.y * p10 + m03.z * p20 + m13.x * p30 + m13.y * p40;
temp0 = m02 * float3(dot(p01, x0), dot(p11, x1), dot(p21, x2));
temp1 = m12 * float2(dot(p31, x3), dot(p41, x4));
float4 grad1 = -6.0 * (temp0.x * x0 + temp0.y * x1 + temp0.z * x2 + temp1.x * x3 + temp1.y * x4);
grad1 += m03.x * p01 + m03.y * p11 + m03.z * p21 + m13.x * p31 + m13.y * p41;
// The cross products of two gradients is divergence free.
return cross(grad0.xyz, grad1.xyz) * 81.0;
}