2024-05-07 20:05:50 +04:00

873 lines
28 KiB
Metal

#include <metal_stdlib>
using namespace metal;
typedef struct
{
packed_float2 position;
packed_float2 texCoord;
} QuadVertex;
typedef struct
{
packed_float2 position;
} Vertex;
typedef struct
{
float4 position [[position]];
float2 texCoord;
float2 transformedPosition;
} QuadOut;
typedef struct
{
float4 position [[position]];
float direction;
} FillVertexOut;
float calculateNormalDirection(float2 a, float2 b, float2 c) {
float2 ab = b - a;
float2 ac = c - a;
return ab.x * ac.y - ab.y * ac.x;
}
vertex QuadOut quad_vertex_shader(
device QuadVertex const *vertices [[buffer(0)]],
uint vertexId [[vertex_id]],
device matrix<float, 4> const &transform [[buffer(1)]]
) {
QuadVertex in = vertices[vertexId];
QuadOut out;
float4 position = transform * float4(float2(in.position), 0.0, 1.0);
out.position = position;
out.texCoord = in.texCoord;
out.transformedPosition = (transform * float4(float2(in.position), 0.0, 1.0)).xy;
return out;
}
vertex FillVertexOut fill_vertex_shader(
device Vertex const *vertices [[buffer(0)]],
uint vertexId [[vertex_id]],
device matrix<float, 4> const &transform [[buffer(1)]],
device packed_float2 const &baseVertex [[buffer(2)]]
) {
FillVertexOut out;
uint triangleIndex = vertexId / 3;
uint vertexInTriangleIndex = vertexId % 3;
//[0, 1], [1, 2], [2, 3]...
//0, 1, 2
float2 sourcePosition;
float2 v1 = float2(vertices[triangleIndex].position);
float2 v2 = float2(vertices[triangleIndex + 1].position);
sourcePosition = select(
select(
v2,
v1,
vertexInTriangleIndex == 1
),
baseVertex,
vertexInTriangleIndex == 0
);
float normalDirection = calculateNormalDirection(baseVertex, v1, v2);
float4 position = transform * float4(sourcePosition, 0.0, 1.0);
out.position = position;
out.direction = sign(normalDirection);
return out;
}
struct ShapeOut {
half4 color [[color(1)]];
};
fragment ShapeOut fragment_shader(
FillVertexOut in [[stage_in]],
ShapeOut current,
device const int32_t &mode [[buffer(1)]]
) {
ShapeOut out = current;
if (mode == 0) {
half result = select(out.color.r, half(127.0 / 255.0), out.color.r == 0.0);
result += half(in.direction) * 3.0 / 255.0;
out.color.r = result;
} else {
out.color.r = out.color.r == 0.0 ? 1.0 : 0.0;
}
return out;
}
fragment ShapeOut clear_mask_fragment(
QuadOut in [[stage_in]]
) {
ShapeOut out;
out.color = half4(0.0);
return out;
}
struct ColorOut {
half4 color [[color(0)]];
};
fragment ColorOut merge_color_fill_fragment_shader(
ShapeOut colorIn,
device const float4 &color [[buffer(0)]],
device const int32_t &mode [[buffer(1)]]
) {
ColorOut out;
half4 sampledColor = half4(color);
sampledColor.r = sampledColor.r * sampledColor.a;
sampledColor.g = sampledColor.g * sampledColor.a;
sampledColor.b = sampledColor.b * sampledColor.a;
if (mode == 0) {
half diff = abs(colorIn.color.r - 127.0 / 255.0);
float diffSelect = select(0.0, 1.0, diff > (2.0 / 255.0));
float outColorFactor = select(
0.0,
diffSelect,
colorIn.color.r > 1.0 / 255.0
);
out.color = sampledColor * outColorFactor;
} else {
float outColorFactor = select(
0.0,
1.0,
colorIn.color.r > 1.0 / 255.0
);
out.color = sampledColor * outColorFactor;
}
if (out.color.a == 0.0) {
//discard_fragment();
}
return out;
}
typedef struct
{
packed_float4 color;
float location;
} GradientColorStop;
float linearGradientStep(float edge0, float edge1, float x) {
float t = clamp((x - edge0) / (edge1 - edge0), float(0), float(1));
return t;
}
fragment ColorOut merge_linear_gradient_fill_fragment_shader(
QuadOut quadIn [[stage_in]],
ShapeOut colorIn,
device const GradientColorStop *colorStops [[buffer(0)]],
device const int32_t &mode [[buffer(1)]],
device const uint &numColorStops [[buffer(2)]],
device const packed_float2 &localStartPosition [[buffer(3)]],
device const packed_float2 &localEndPosition [[buffer(4)]]
) {
ColorOut out;
float4 sourceColor;
if (numColorStops <= 1) {
sourceColor = colorStops[0].color;
} else {
float2 localPixelPosition = quadIn.transformedPosition.xy;
float2 gradientVector = normalize(localEndPosition - localStartPosition);
float2 pointVector = localPixelPosition - localStartPosition;
float pixelDistance = dot(pointVector, gradientVector) / dot(gradientVector, gradientVector);
float gradientLength = length(localEndPosition - localStartPosition);
float pixelValue = clamp(pixelDistance / gradientLength, 0.0, 1.0);
sourceColor = mix(colorStops[0].color, colorStops[1].color, linearGradientStep(
colorStops[0].location,
colorStops[1].location,
pixelValue
));
for (int i = 1; i < (int)numColorStops - 1; i++) {
sourceColor = mix(sourceColor, colorStops[i + 1].color, linearGradientStep(
colorStops[i].location,
colorStops[i + 1].location,
pixelValue
));
}
}
half4 sampledColor = half4(sourceColor);
sampledColor.r = sampledColor.r * sampledColor.a;
sampledColor.g = sampledColor.g * sampledColor.a;
sampledColor.b = sampledColor.b * sampledColor.a;
if (mode == 0) {
half diff = abs(colorIn.color.r - 127.0 / 255.0);
float diffSelect = select(0.0, 1.0, diff > (2.0 / 255.0));
float outColorFactor = select(
0.0,
diffSelect,
colorIn.color.r > 1.0 / 255.0
);
out.color = sampledColor * outColorFactor;
} else {
float outColorFactor = select(
0.0,
1.0,
colorIn.color.r > 1.0 / 255.0
);
out.color = sampledColor * outColorFactor;
}
if (out.color.a == 0.0) {
//discard_fragment();
}
return out;
}
fragment ColorOut merge_radial_gradient_fill_fragment_shader(
QuadOut quadIn [[stage_in]],
ShapeOut colorIn,
device const GradientColorStop *colorStops [[buffer(0)]],
device const int32_t &mode [[buffer(1)]],
device const uint &numColorStops [[buffer(2)]],
device const packed_float2 &localStartPosition [[buffer(3)]],
device const packed_float2 &localEndPosition [[buffer(4)]]
) {
ColorOut out;
float4 sourceColor;
if (numColorStops <= 1) {
sourceColor = colorStops[0].color;
} else {
float pixelDistance = distance(quadIn.transformedPosition.xy, localStartPosition);
float gradientLength = length(localEndPosition - localStartPosition);
float pixelValue = clamp(pixelDistance / gradientLength, 0.0, 1.0);
sourceColor = colorStops[0].color;
for (int i = 0; i < (int)numColorStops - 1; i++) {
float currentStopLocation = colorStops[i].location;
float nextStopLocation = colorStops[i + 1].location;
float4 nextStopColor = colorStops[i + 1].color;
sourceColor = mix(sourceColor, nextStopColor, linearGradientStep(
currentStopLocation,
nextStopLocation,
pixelValue
));
}
}
half4 sampledColor = half4(sourceColor);
sampledColor.r = sampledColor.r * sampledColor.a;
sampledColor.g = sampledColor.g * sampledColor.a;
sampledColor.b = sampledColor.b * sampledColor.a;
if (mode == 0) {
half diff = abs(colorIn.color.r - 127.0 / 255.0);
float diffSelect = select(0.0, 1.0, diff > (2.0 / 255.0));
float outColorFactor = select(
0.0,
diffSelect,
colorIn.color.r > 1.0 / 255.0
);
out.color = sampledColor * outColorFactor;
} else {
float outColorFactor = select(
0.0,
1.0,
colorIn.color.r > 1.0 / 255.0
);
out.color = sampledColor * outColorFactor;
}
if (out.color.a == 0.0) {
//discard_fragment();
}
return out;
}
typedef struct {
packed_float2 position;
} StrokePositionIn;
typedef struct {
packed_float2 point;
} StrokePointIn;
typedef struct {
float id;
} StrokeRoundJoinVertexIn;
typedef struct {
packed_float4 position;
} StrokeMiterJoinVertexIn;
typedef struct {
packed_float3 position;
} StrokeBevelJoinVertexIn;
typedef struct {
packed_float2 position;
} StrokeCapVertexIn;
typedef struct
{
float4 position [[position]];
} StrokeVertexOut;
fragment ColorOut stroke_fragment_shader(
StrokeVertexOut in [[stage_in]],
ShapeOut colorIn,
device const float4 &color [[buffer(0)]]
) {
ColorOut out;
half4 result = half4(color);
result.r *= result.a;
result.g *= result.a;
result.b *= result.a;
out.color = result;
return out;
}
typedef struct {
int32_t bufferOffset; // 4
packed_float2 start; // 4 * 2
packed_float2 end; // 4 * 2
packed_float2 cp1; // 4 * 2
packed_float2 cp2; // 4 * 2
float offset; // 4
} BezierInputItem;
kernel void evaluateBezier(
device BezierInputItem const *inputItems [[buffer(0)]],
device float *vertexData [[buffer(1)]],
device uint const &itemCount [[buffer(2)]],
uint2 index [[ thread_position_in_grid ]]
) {
if (index.x >= itemCount) {
return;
}
BezierInputItem item = inputItems[index.x];
float2 p0 = item.start;
float2 p1 = item.cp1;
float2 p2 = item.cp2;
float2 p3 = item.end;
float t = (((float)index.y) + 1.0) / (8.0);
float oneMinusT = 1.0 - t;
float2 value = oneMinusT * oneMinusT * oneMinusT * p0 + 3.0 * t * oneMinusT * oneMinusT * p1 + 3.0 * t * t * oneMinusT * p2 + t * t * t * p3;
vertexData[item.bufferOffset + 2 * index.y] = value.x;
vertexData[item.bufferOffset + 2 * index.y + 1] = value.y;
}
fragment half4 quad_offscreen_fragment(
QuadOut in [[stage_in]],
texture2d<half, access::sample> texture[[texture(0)]],
device float const &opacity [[buffer(1)]]
) {
constexpr sampler s(address::clamp_to_edge, filter::linear);
half4 color = texture.sample(s, float2(in.texCoord.x, 1.0 - in.texCoord.y));
color *= half(opacity);
return color;
}
fragment half4 quad_offscreen_fragment_with_mask(
QuadOut in [[stage_in]],
texture2d<half, access::sample> texture[[texture(0)]],
texture2d<half, access::sample> maskTexture[[texture(1)]],
device float const &opacity [[buffer(1)]],
device uint const &maskMode [[buffer(2)]]
) {
constexpr sampler s(address::clamp_to_edge, filter::linear);
half4 color = texture.sample(s, float2(in.texCoord.x, 1.0 - in.texCoord.y));
half4 maskColor = maskTexture.sample(s, float2(in.texCoord.x, 1.0 - in.texCoord.y));
if (maskMode == 0) {
color *= maskColor.a;
} else {
color *= 1.0 - maskColor.a;
}
color *= half(opacity);
return color;
}
bool myIsNan(float val) {
return (val < 0.0 || 0.0 < val || val == 0.0) ? false : true;
}
bool isLinePointInvalid(float4 p) {
return p.w == 0.0 || myIsNan(p.x);
}
// Adapted from https://github.com/rreusser/regl-gpu-lines
vertex StrokeVertexOut strokeTerminalVertex(
uint instanceId [[instance_id]],
uint index [[vertex_id]],
device StrokePointIn const *points [[buffer(0)]],
device matrix<float, 4> const &transform [[buffer(1)]],
device packed_float2 const &_vertCnt2 [[buffer(2)]],
device packed_float2 const &_capJoinRes2 [[buffer(3)]],
device uint const &isJoinRound [[buffer(4)]],
device uint const &isCapRound [[buffer(5)]],
device float const &miterLimit [[buffer(6)]],
device float const &width [[buffer(7)]]
) {
const float2 ROUND_CAP_SCALE = float2(1.0, 1.0);
const float2 SQUARE_CAP_SCALE = float2(2.0, 2.0 / sqrt(3.0));
float2 _capScale = isCapRound ? ROUND_CAP_SCALE : SQUARE_CAP_SCALE;
const float pi = 3.141592653589793;
float2 xyB = points[instanceId * 3 + 0].point;
float2 xyC = points[instanceId * 3 + 1].point;
float2 xyD = points[instanceId * 3 + 2].point;
StrokeVertexOut out;
float4 pB = float4(xyB, 0.0, 1.0);
float4 pC = float4(xyC, 0.0, 1.0);
float4 pD = float4(xyD, 0.0, 1.0);
// A sensible default for early returns
out.position = pB;
bool aInvalid = false;
bool bInvalid = isLinePointInvalid(pB);
bool cInvalid = isLinePointInvalid(pC);
bool dInvalid = isLinePointInvalid(pD);
// Vertex count for each part (first half of join, second (mirrored) half). Note that not all of
// these vertices may be used, for example if we have enough for a round cap but only draw a miter
// join.
float2 v = _vertCnt2 + 3.0;
// Total vertex count
float N = dot(v, float2(1));
// If we're past the first half-join and half of the segment, then we swap all vertices and start
// over from the opposite end.
bool mirror = index >= v.x;
// When rendering dedicated endpoints, this allows us to insert an end cap *alone* (without the attached
// segment and join)
if (dInvalid && mirror) {
return out;
}
// Convert to screen-pixel coordinates
// Save w so we can perspective re-multiply at the end to get varyings depth-correct
float pw = mirror ? pC.w : pB.w;
pB = float4(float3(pB.xy, pB.z) / pB.w, 1);
pC = float4(float3(pC.xy, pC.z) / pC.w, 1);
pD = float4(float3(pD.xy, pD.z) / pD.w, 1);
// If it's a cap, mirror A back onto C to accomplish a round
float4 pA = pC;
// Reject if invalid or if outside viewing planes
if (bInvalid || cInvalid || max(abs(pB.z), abs(pC.z)) > 1.0) {
return out;
}
// Swap everything computed so far if computing mirrored half
if (mirror) {
float4 vTmp = pC; pC = pB; pB = vTmp;
vTmp = pD; pD = pA; pA = vTmp;
bool bTmp = dInvalid; dInvalid = aInvalid; aInvalid = bTmp;
}
bool isCap = !mirror;
// Either flip A onto C (and D onto B) to produce a 180 degree-turn cap, or extrapolate to produce a
// degenerate (no turn) join, depending on whether we're inserting caps or just leaving ends hanging.
if (aInvalid) { pA = 2.0 * pB - pC; }
if (dInvalid) { pD = 2.0 * pC - pB; }
bool roundOrCap = isJoinRound || isCap;
// Tangent and normal vectors
float2 tBC = pC.xy - pB.xy;
float lBC = length(tBC);
tBC /= lBC;
float2 nBC = float2(-tBC.y, tBC.x);
float2 tAB = pB.xy - pA.xy;
float lAB = length(tAB);
if (lAB > 0.0) tAB /= lAB;
float2 nAB = float2(-tAB.y, tAB.x);
float2 tCD = pD.xy - pC.xy;
float lCD = length(tCD);
if (lCD > 0.0) tCD /= lCD;
float2 nCD = float2(-tCD.y, tCD.x);
// Clamp for safety, since we take the arccos
float cosB = clamp(dot(tAB, tBC), -1.0, 1.0);
// This section is somewhat fragile. When lines are collinear, signs flip randomly and break orientation
// of the middle segment. The fix appears straightforward, but this took a few hours to get right.
const float tol = 1e-4;
float mirrorSign = mirror ? -1.0 : 1.0;
float dirB = -dot(tBC, nAB);
float dirC = dot(tBC, nCD);
bool bCollinear = abs(dirB) < tol;
bool cCollinear = abs(dirC) < tol;
bool bIsHairpin = bCollinear && cosB < 0.0;
// bool cIsHairpin = cCollinear && dot(tBC, tCD) < 0.0;
dirB = bCollinear ? -mirrorSign : sign(dirB);
dirC = cCollinear ? -mirrorSign : sign(dirC);
float2 miter = bIsHairpin ? -tBC : 0.5 * (nAB + nBC) * dirB;
// Compute our primary "join index", that is, the index starting at the very first point of the join.
// The second half of the triangle strip instance is just the first, reversed, and with vertices swapped!
float i = mirror ? N - index : index;
// Decide the resolution of whichever feature we're drawing. n is twice the number of points used since
// that's the only form in which we use this number.
float res = (isCap ? _capJoinRes2.x : _capJoinRes2.y);
// Shift the index to send unused vertices to an index below zero, which will then just get clamped to
// zero and result in repeated points, i.e. degenerate triangles.
i -= max(0.0, (mirror ? _vertCnt2.y : _vertCnt2.x) - res);
// Use the direction to offset the index by one. This has the effect of flipping the winding number so
// that it's always consistent no matter which direction the join turns.
i += (dirB < 0.0 ? -1.0 : 0.0);
// Vertices of the second (mirrored) half of the join are offset by one to get it to connect correctly
// in the middle, where the mirrored and unmirrored halves meet.
i -= mirror ? 1.0 : 0.0;
// Clamp to zero and repeat unused excess vertices.
i = max(0.0, i);
// Start with a default basis pointing along the segment with normal vector outward
float2 xBasis = tBC;
float2 yBasis = nBC * dirB;
// Default point is 0 along the segment, 1 (width unit) normal to it
float2 xy = float2(0);
if (i == res + 1.0) {
// pick off this one specific index to be the interior miter point
// If not div-by-zero, then sinB / (1 + cosB)
float m = cosB > -0.9999 ? (tAB.x * tBC.y - tAB.y * tBC.x) / (1.0 + cosB) : 0.0;
xy = float2(min(abs(m), min(lBC, lAB) / width), -1);
} else {
// Draw half of a join
float m2 = dot(miter, miter);
float lm = sqrt(m2);
yBasis = miter / lm;
xBasis = dirB * float2(yBasis.y, -yBasis.x);
bool isBevel = 1.0 > miterLimit * m2;
if (((int)i) % 2 == 0) {
// Outer joint points
if (roundOrCap || i != 0.0) {
// Round joins
float theta = -0.5 * (acos(cosB) * (clamp(i, 0.0, res) / res) - pi) * (isCap ? 2.0 : 1.0);
xy = float2(cos(theta), sin(theta));
if (isCap) {
// A special multiplier factor for turning 3-point rounds into square caps (but leave the
// y == 0.0 point unaffected)
if (xy.y > 0.001) xy *= _capScale;
}
} else {
// Miter joins
yBasis = bIsHairpin ? float2(0) : miter;
xy.y = isBevel ? 1.0 : 1.0 / m2;
}
} else {
// Offset the center vertex position to get bevel SDF correct
if (isBevel && !roundOrCap) {
xy.y = -1.0 + sqrt((1.0 + cosB) * 0.5);
}
}
}
// Point offset from main vertex position
float2 dP = float2x2(xBasis, yBasis) * xy;
out.position = pB;
out.position.xy += width * dP;
out.position *= pw;
out.position = transform * out.position;
return out;
}
vertex StrokeVertexOut strokeInnerVertex(
uint instanceId [[instance_id]],
uint index [[vertex_id]],
device StrokePointIn const *points [[buffer(0)]],
device matrix<float, 4> const &transform [[buffer(1)]],
device packed_float2 const &_vertCnt2 [[buffer(2)]],
device packed_float2 const &_capJoinRes2 [[buffer(3)]],
device uint const &isJoinRound [[buffer(4)]],
device uint const &isCapRound [[buffer(5)]],
device float const &miterLimit [[buffer(6)]],
device float const &width [[buffer(7)]]
) {
const float2 ROUND_CAP_SCALE = float2(1.0, 1.0);
const float2 SQUARE_CAP_SCALE = float2(2.0, 2.0 / sqrt(3.0));
float2 _capScale = isCapRound ? ROUND_CAP_SCALE : SQUARE_CAP_SCALE;
const float pi = 3.141592653589793;
float2 xyA = points[instanceId + 0].point;
float2 xyB = points[instanceId + 1].point;
float2 xyC = points[instanceId + 2].point;
float2 xyD = points[instanceId + 3].point;
StrokeVertexOut out;
float4 pA = float4(xyA, 0.0, 1.0);
float4 pB = float4(xyB, 0.0, 1.0);
float4 pC = float4(xyC, 0.0, 1.0);
float4 pD = float4(xyD, 0.0, 1.0);
// A sensible default for early returns
out.position = pB;
bool aInvalid = isLinePointInvalid(pA);
bool bInvalid = isLinePointInvalid(pB);
bool cInvalid = isLinePointInvalid(pC);
bool dInvalid = isLinePointInvalid(pD);
// Vertex count for each part (first half of join, second (mirrored) half). Note that not all of
// these vertices may be used, for example if we have enough for a round cap but only draw a miter
// join.
float2 v = _vertCnt2 + 3.0;
// Total vertex count
float N = dot(v, float2(1));
// If we're past the first half-join and half of the segment, then we swap all vertices and start
// over from the opposite end.
bool mirror = index >= v.x;
// When rendering dedicated endoints, this allows us to insert an end cap *alone* (without the attached
// segment and join)
// Convert to screen-pixel coordinates
// Save w so we can perspective re-multiply at the end to get varyings depth-correct
float pw = mirror ? pC.w : pB.w;
pA = float4(float3(pA.xy, pA.z) / pA.w, 1);
pB = float4(float3(pB.xy, pB.z) / pB.w, 1);
pC = float4(float3(pC.xy, pC.z) / pC.w, 1);
pD = float4(float3(pD.xy, pD.z) / pD.w, 1);
// If it's a cap, mirror A back onto C to accomplish a round
// Reject if invalid or if outside viewing planes
if (bInvalid || cInvalid || max(abs(pB.z), abs(pC.z)) > 1.0) {
return out;
}
// Swap everything computed so far if computing mirrored half
if (mirror) {
float4 vTmp = pC; pC = pB; pB = vTmp;
vTmp = pD; pD = pA; pA = vTmp;
bool bTmp = dInvalid; dInvalid = aInvalid; aInvalid = bTmp;
}
const bool isCap = false;
// Either flip A onto C (and D onto B) to produce a 180 degree-turn cap, or extrapolate to produce a
// degenerate (no turn) join, depending on whether we're inserting caps or just leaving ends hanging.
if (aInvalid) { pA = 2.0 * pB - pC; }
if (dInvalid) { pD = 2.0 * pC - pB; }
bool roundOrCap = isJoinRound || isCap;
// Tangent and normal vectors
float2 tBC = pC.xy - pB.xy;
float lBC = length(tBC);
tBC /= lBC;
float2 nBC = float2(-tBC.y, tBC.x);
float2 tAB = pB.xy - pA.xy;
float lAB = length(tAB);
if (lAB > 0.0) tAB /= lAB;
float2 nAB = float2(-tAB.y, tAB.x);
float2 tCD = pD.xy - pC.xy;
float lCD = length(tCD);
if (lCD > 0.0) tCD /= lCD;
float2 nCD = float2(-tCD.y, tCD.x);
// Clamp for safety, since we take the arccos
float cosB = clamp(dot(tAB, tBC), -1.0, 1.0);
// This section is somewhat fragile. When lines are collinear, signs flip randomly and break orientation
// of the middle segment. The fix appears straightforward, but this took a few hours to get right.
const float tol = 1e-4;
float mirrorSign = mirror ? -1.0 : 1.0;
float dirB = -dot(tBC, nAB);
float dirC = dot(tBC, nCD);
bool bCollinear = abs(dirB) < tol;
bool cCollinear = abs(dirC) < tol;
bool bIsHairpin = bCollinear && cosB < 0.0;
// bool cIsHairpin = cCollinear && dot(tBC, tCD) < 0.0;
dirB = bCollinear ? -mirrorSign : sign(dirB);
dirC = cCollinear ? -mirrorSign : sign(dirC);
float2 miter = bIsHairpin ? -tBC : 0.5 * (nAB + nBC) * dirB;
// Compute our primary "join index", that is, the index starting at the very first point of the join.
// The second half of the triangle strip instance is just the first, reversed, and with vertices swapped!
float i = mirror ? N - index : index;
// Decide the resolution of whichever feature we're drawing. n is twice the number of points used since
// that's the only form in which we use this number.
float res = (isCap ? _capJoinRes2.x : _capJoinRes2.y);
// Shift the index to send unused vertices to an index below zero, which will then just get clamped to
// zero and result in repeated points, i.e. degenerate triangles.
i -= max(0.0, (mirror ? _vertCnt2.y : _vertCnt2.x) - res);
// Use the direction to offset the index by one. This has the effect of flipping the winding number so
// that it's always consistent no matter which direction the join turns.
i += (dirB < 0.0 ? -1.0 : 0.0);
// Vertices of the second (mirrored) half of the join are offset by one to get it to connect correctly
// in the middle, where the mirrored and unmirrored halves meet.
i -= mirror ? 1.0 : 0.0;
// Clamp to zero and repeat unused excess vertices.
i = max(0.0, i);
// Start with a default basis pointing along the segment with normal vector outward
float2 xBasis = tBC;
float2 yBasis = nBC * dirB;
// Default point is 0 along the segment, 1 (width unit) normal to it
float2 xy = float2(0);
if (i == res + 1.0) {
// pick off this one specific index to be the interior miter point
// If not div-by-zero, then sinB / (1 + cosB)
float m = cosB > -0.9999 ? (tAB.x * tBC.y - tAB.y * tBC.x) / (1.0 + cosB) : 0.0;
xy = float2(min(abs(m), min(lBC, lAB) / width), -1);
} else {
// Draw half of a join
float m2 = dot(miter, miter);
float lm = sqrt(m2);
yBasis = miter / lm;
xBasis = dirB * float2(yBasis.y, -yBasis.x);
bool isBevel = 1.0 > miterLimit * m2;
if (((int)i) % 2 == 0) {
// Outer joint points
if (roundOrCap || i != 0.0) {
// Round joins
float theta = -0.5 * (acos(cosB) * (clamp(i, 0.0, res) / res) - pi) * (isCap ? 2.0 : 1.0);
xy = float2(cos(theta), sin(theta));
if (isCap) {
// A special multiplier factor for turning 3-point rounds into square caps (but leave the
// y == 0.0 point unaffected)
if (xy.y > 0.001) xy *= _capScale;
}
} else {
// Miter joins
yBasis = bIsHairpin ? float2(0) : miter;
xy.y = isBevel ? 1.0 : 1.0 / m2;
}
} else {
// Offset the center vertex position to get bevel SDF correct
if (isBevel && !roundOrCap) {
xy.y = -1.0 + sqrt((1.0 + cosB) * 0.5);
}
}
}
// Point offset from main vertex position
float2 dP = float2x2(xBasis, yBasis) * xy;
// The varying generation code handles clamping, if needed
out.position = pB;
out.position.xy += width * dP;
out.position *= pw;
out.position = transform * out.position;
return out;
}
constant static float2 quadVertices[6] = {
float2(0.0, 0.0),
float2(1.0, 0.0),
float2(0.0, 1.0),
float2(1.0, 0.0),
float2(0.0, 1.0),
float2(1.0, 1.0)
};
struct MetalEngineRectangle {
float2 origin;
float2 size;
};
struct MetalEngineQuadVertexOut {
float4 position [[position]];
float2 uv;
};
vertex MetalEngineQuadVertexOut blitVertex(
const device MetalEngineRectangle &rect [[ buffer(0) ]],
unsigned int vid [[ vertex_id ]]
) {
float2 quadVertex = quadVertices[vid];
MetalEngineQuadVertexOut out;
out.position = float4(rect.origin.x + quadVertex.x * rect.size.x, rect.origin.y + quadVertex.y * rect.size.y, 0.0, 1.0);
out.position.x = -1.0 + out.position.x * 2.0;
out.position.y = -1.0 + out.position.y * 2.0;
out.uv = float2(quadVertex.x, 1.0 - quadVertex.y);
return out;
}
fragment half4 blitFragment(
MetalEngineQuadVertexOut in [[stage_in]],
texture2d<half> texture [[ texture(0) ]]
) {
constexpr sampler sampler(coord::normalized, address::repeat, filter::linear);
half4 color = texture.sample(sampler, in.uv);
return half4(color.r, color.g, color.b, color.a);
}