You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

307 lines
8.1 KiB

#version 450 core
#extension GL_AMD_gpu_shader_half_float: enable
#extension GL_ARB_gpu_shader_int64: enable
void main()
{
}
// Half float literals
void literal()
{
const float16_t f16c = 0.000001hf;
const f16vec2 f16cv = f16vec2(-0.25HF, 0.03HF);
f16vec2 f16v;
f16v.x = f16c;
f16v += f16cv;
}
// Block memory layout
struct S
{
float16_t x; // rule 1: align = 2, takes offsets 0-1
f16vec2 y; // rule 2: align = 4, takes offsets 4-7
f16vec3 z; // rule 3: align = 8, takes offsets 8-13
};
layout(column_major, std140) uniform B1
{
float16_t a; // rule 1: align = 2, takes offsets 0-1
f16vec2 b; // rule 2: align = 4, takes offsets 4-7
f16vec3 c; // rule 3: align = 8, takes offsets 8-15
float16_t d[2]; // rule 4: align = 16, array stride = 16,
// takes offsets 16-47
f16mat2x3 e; // rule 5: align = 16, matrix stride = 16,
// takes offsets 48-79
f16mat2x3 f[2]; // rule 6: align = 16, matrix stride = 16,
// array stride = 32, f[0] takes
// offsets 80-111, f[1] takes offsets
// 112-143
S g; // rule 9: align = 16, g.x takes offsets
// 144-145, g.y takes offsets 148-151,
// g.z takes offsets 152-159
S h[2]; // rule 10: align = 16, array stride = 16, h[0]
// takes offsets 160-175, h[1] takes
// offsets 176-191
};
layout(row_major, std430) buffer B2
{
float16_t o; // rule 1: align = 2, takes offsets 0-1
f16vec2 p; // rule 2: align = 4, takes offsets 4-7
f16vec3 q; // rule 3: align = 8, takes offsets 8-13
float16_t r[2]; // rule 4: align = 2, array stride = 2, takes
// offsets 14-17
f16mat2x3 s; // rule 7: align = 4, matrix stride = 4, takes
// offsets 20-31
f16mat2x3 t[2]; // rule 8: align = 4, matrix stride = 4, array
// stride = 12, t[0] takes offsets
// 32-43, t[1] takes offsets 44-55
S u; // rule 9: align = 8, u.x takes offsets
// 56-57, u.y takes offsets 60-63, u.z
// takes offsets 64-69
S v[2]; // rule 10: align = 8, array stride = 16, v[0]
// takes offsets 72-87, v[1] takes
// offsets 88-103
};
// Specialization constant
layout(constant_id = 100) const float16_t sf16 = 0.125hf;
layout(constant_id = 101) const float sf = 0.25;
layout(constant_id = 102) const double sd = 0.5lf;
const float f16_to_f = float(sf16);
const double f16_to_d = float(sf16);
const float16_t f_to_f16 = float16_t(sf);
const float16_t d_to_f16 = float16_t(sd);
void operators()
{
float16_t f16;
f16vec2 f16v;
f16mat2x2 f16m;
bool b;
// Arithmetic
f16v += f16v;
f16v -= f16v;
f16v *= f16v;
f16v /= f16v;
f16v++;
f16v--;
++f16m;
--f16m;
f16v = -f16v;
f16m = -f16m;
f16 = f16v.x + f16v.y;
f16 = f16v.x - f16v.y;
f16 = f16v.x * f16v.y;
f16 = f16v.x / f16v.y;
// Relational
b = (f16v.x != f16);
b = (f16v.y == f16);
b = (f16v.x > f16);
b = (f16v.y < f16);
b = (f16v.x >= f16);
b = (f16v.y <= f16);
// Vector/matrix operations
f16v = f16v * f16;
f16m = f16m * f16;
f16v = f16m * f16v;
f16v = f16v * f16m;
f16m = f16m * f16m;
}
void typeCast()
{
bvec3 bv;
vec3 fv;
dvec3 dv;
ivec3 iv;
uvec3 uv;
i64vec3 i64v;
u64vec3 u64v;
f16vec3 f16v;
f16v = f16vec3(bv); // bool -> float16
bv = bvec3(f16v); // float16 -> bool
f16v = f16vec3(fv); // float -> float16
fv = vec3(f16v); // float16 -> float
f16v = f16vec3(dv); // double -> float16
dv = dvec3(dv); // float16 -> double
f16v = f16vec3(iv); // int -> float16
iv = ivec3(f16v); // float16 -> int
f16v = f16vec3(uv); // uint -> float16
uv = uvec3(f16v); // float16 -> uint
f16v = f16vec3(i64v); // int64 -> float16
i64v = i64vec3(f16v); // float16 -> int64
f16v = f16vec3(u64v); // uint64 -> float16
u64v = u64vec3(f16v); // float16 -> uint64
}
void builtinAngleTrigFuncs()
{
f16vec4 f16v1, f16v2;
f16v2 = radians(f16v1);
f16v2 = degrees(f16v1);
f16v2 = sin(f16v1);
f16v2 = cos(f16v1);
f16v2 = tan(f16v1);
f16v2 = asin(f16v1);
f16v2 = acos(f16v1);
f16v2 = atan(f16v1, f16v2);
f16v2 = atan(f16v1);
f16v2 = sinh(f16v1);
f16v2 = cosh(f16v1);
f16v2 = tanh(f16v1);
f16v2 = asinh(f16v1);
f16v2 = acosh(f16v1);
f16v2 = atanh(f16v1);
}
void builtinExpFuncs()
{
f16vec2 f16v1, f16v2;
f16v2 = pow(f16v1, f16v2);
f16v2 = exp(f16v1);
f16v2 = log(f16v1);
f16v2 = exp2(f16v1);
f16v2 = log2(f16v1);
f16v2 = sqrt(f16v1);
f16v2 = inversesqrt(f16v1);
}
void builtinCommonFuncs()
{
f16vec3 f16v1, f16v2, f16v3;
float16_t f16;
bool b;
bvec3 bv;
ivec3 iv;
f16v2 = abs(f16v1);
f16v2 = sign(f16v1);
f16v2 = floor(f16v1);
f16v2 = trunc(f16v1);
f16v2 = round(f16v1);
f16v2 = roundEven(f16v1);
f16v2 = ceil(f16v1);
f16v2 = fract(f16v1);
f16v2 = mod(f16v1, f16v2);
f16v2 = mod(f16v1, f16);
f16v3 = modf(f16v1, f16v2);
f16v3 = min(f16v1, f16v2);
f16v3 = min(f16v1, f16);
f16v3 = max(f16v1, f16v2);
f16v3 = max(f16v1, f16);
f16v3 = clamp(f16v1, f16, f16v2.x);
f16v3 = clamp(f16v1, f16v2, f16vec3(f16));
f16v3 = mix(f16v1, f16v2, f16);
f16v3 = mix(f16v1, f16v2, f16v3);
f16v3 = mix(f16v1, f16v2, bv);
f16v3 = step(f16v1, f16v2);
f16v3 = step(f16, f16v3);
f16v3 = smoothstep(f16v1, f16v2, f16v3);
f16v3 = smoothstep(f16, f16v1.x, f16v2);
b = isnan(f16);
bv = isinf(f16v1);
f16v3 = fma(f16v1, f16v2, f16v3);
f16v2 = frexp(f16v1, iv);
f16v2 = ldexp(f16v1, iv);
}
void builtinPackUnpackFuncs()
{
uint u;
f16vec2 f16v;
u = packFloat2x16(f16v);
f16v = unpackFloat2x16(u);
}
void builtinGeometryFuncs()
{
float16_t f16;
f16vec3 f16v1, f16v2, f16v3;
f16 = length(f16v1);
f16 = distance(f16v1, f16v2);
f16 = dot(f16v1, f16v2);
f16v3 = cross(f16v1, f16v2);
f16v2 = normalize(f16v1);
f16v3 = faceforward(f16v1, f16v2, f16v3);
f16v3 = reflect(f16v1, f16v2);
f16v3 = refract(f16v1, f16v2, f16);
}
void builtinMatrixFuncs()
{
f16mat2x3 f16m1, f16m2, f16m3;
f16mat3x2 f16m4;
f16mat3 f16m5;
f16mat4 f16m6, f16m7;
f16vec3 f16v1;
f16vec2 f16v2;
float16_t f16;
f16m3 = matrixCompMult(f16m1, f16m2);
f16m1 = outerProduct(f16v1, f16v2);
f16m4 = transpose(f16m1);
f16 = determinant(f16m5);
f16m6 = inverse(f16m7);
}
void builtinVecRelFuncs()
{
f16vec3 f16v1, f16v2;
bvec3 bv;
bv = lessThan(f16v1, f16v2);
bv = lessThanEqual(f16v1, f16v2);
bv = greaterThan(f16v1, f16v2);
bv = greaterThanEqual(f16v1, f16v2);
bv = equal(f16v1, f16v2);
bv = notEqual(f16v1, f16v2);
}
in f16vec3 if16v;
void builtinFragProcFuncs()
{
f16vec3 f16v;
// Derivative
f16v.x = dFdx(if16v.x);
f16v.y = dFdy(if16v.y);
f16v.xy = dFdxFine(if16v.xy);
f16v.xy = dFdyFine(if16v.xy);
f16v = dFdxCoarse(if16v);
f16v = dFdxCoarse(if16v);
f16v.x = fwidth(if16v.x);
f16v.xy = fwidthFine(if16v.xy);
f16v = fwidthCoarse(if16v);
// Interpolation
f16v.x = interpolateAtCentroid(if16v.x);
f16v.xy = interpolateAtSample(if16v.xy, 1);
f16v = interpolateAtOffset(if16v, f16vec2(0.5hf));
}