PiMaker · December 8, 2022 18:03
diff --git a/EnergiaRaymarch.shader b/EnergiaRaymarch.shader
 Shader "Custom/Energia v2 Toon Cutout" // VRChat fallback is "Toon Cutout", fully transparent
 {
    Properties
    {
        // for fallback only
        _MainTex ("[Fallback Only] Texture", 2D) = "transparent" {}
        _Color ("[Fallback Only] Color", Color) = (0, 0, 0, 0)
        _Cutoff ("[Fallback Only] Alpha Cutoff", Range(0.0, 1.0)) = 1.0

        // animateable
        _EnergiaEnabled ("Enabled", Float) = 0
        _EnergiaObjType1 ("Object Type 1", Float) = 0
        _EnergiaObjType2 ("Object Type 2", Float) = 0
        _EnergiaObjTypeLerp ("Object Type Lerp", Range(0.0, 1.0)) = 0

        // Note for use: Every object using this shader has to have a *unique* scale!
        // Also, to ensure consistency, make sure x, y and z scale are always equal.
    }
    SubShader
    {
        Tags {
            "Queue" = "Transparent+613" // we have to be alone in our queue for instancing!
            "RenderType" = "Opaque"
            "ForceNoShadowCasting" = "True"
            "IgnoreProjector" = "True"
        }

        ZWrite Off
        ZTest LEqual
        Cull Front // render on backfaces of cube to avoid disappearing when inside it
        Blend SrcAlpha OneMinusSrcAlpha

        Pass
        {
            CGPROGRAM
            #pragma vertex vert
            #pragma fragment frag

            #pragma target 5.0
            #pragma fragmentoption ARB_precision_hint_fastest

            #pragma multi_compile_instancing
            #pragma instancing_options nolightprobe
            #pragma instancing_options nolightmap
            #pragma instancing_options nolodfade
            // keep in sync with ELEMS:
            #pragma instancing_options maxcount:8
            #pragma instancing_options forcemaxcount:8

            /* #pragma enable_d3d11_debug_symbols */

            #include "UnityCG.cginc"
            #include "UnityInstancing.cginc"

            // From: https://gist.github.com/mattatz/86fff4b32d198d0928d0fa4ff32cf6fa
            #include "Matrix.cginc"
            // From: https://gist.github.com/mattatz/40a91588d5fb38240403f198a938a593
            #include "Quaternion.cginc"

            #define ELEMS 8
            #define MAX_STEPS 24
            #define EPSILON 0.0018f

            // these parameters are available to animate per instance
            UNITY_INSTANCING_BUFFER_START(Props)
                UNITY_DEFINE_INSTANCED_PROP(float, _EnergiaObjType1)
                UNITY_DEFINE_INSTANCED_PROP(float, _EnergiaObjType2)
                UNITY_DEFINE_INSTANCED_PROP(float, _EnergiaObjTypeLerp)
                UNITY_DEFINE_INSTANCED_PROP(float, _EnergiaEnabled)
            UNITY_INSTANCING_BUFFER_END(Props)

            /*
             * STRUCTS
             */
            struct appdata {
                float4 vertex : POSITION;
                float2 uv : TEXCOORD0;

                UNITY_VERTEX_INPUT_INSTANCE_ID
            };

            struct v2f {
                float4 clipPos : SV_POSITION;
                float4 screenPos : TEXCOORD0;
                float4 ray : TEXCOORD1;

                uint localIndex : TEXCOORD2;

                // transfer unchanging data from vert to frag:
                // 0: world position (w=scale)
                // 1: rotation quaternion
                // 2: x=type1 y=type2 z=lerp w=disabled (when 0)
                float4 elems0[ELEMS] : TEXCOORD3;
                half4 elems1[ELEMS] : TEXCOORD11;
                half4 elems2[ELEMS] : TEXCOORD19;

                UNITY_VERTEX_INPUT_INSTANCE_ID
                UNITY_VERTEX_OUTPUT_STEREO
            };

            struct f2s {
                float depth : SV_Depth;
                float4 color : SV_Target;
            };

            // these are used to access vertex output data in the fragment shader,
            // since we want to access the v2f struct directly, to avoid copying data (and
            // further array writes, since these are very poorly supported and heavy it seems)
            #define EL_CENTER(input, idx) input.elems0[idx].xyz
            #define EL_SCALE(input, idx) input.elems0[idx].w
            #define EL_QUATERNION(input, idx) input.elems1[idx]
            #define EL_OBJ_TYPE_1(input, idx) input.elems2[idx].x
            #define EL_OBJ_TYPE_2(input, idx) input.elems2[idx].y
            #define EL_OBJ_TYPE_LERP(input, idx) input.elems2[idx].z
            #define EL_DISABLED(input, idx) (input.elems2[idx].w < 0.5f)

            /*
             * STATICS
             */
            float3 get_camera_pos() {
                float3 worldCam;
                worldCam.x = unity_CameraToWorld[0][3];
                worldCam.y = unity_CameraToWorld[1][3];
                worldCam.z = unity_CameraToWorld[2][3];
                return worldCam;
            }
            // _WorldSpaceCameraPos is broken in VR (single pass stereo)
            static float3 camera_pos = get_camera_pos();
            // detects VRChat mirror cameras
            static bool isInMirror = UNITY_MATRIX_P._31 != 0 || UNITY_MATRIX_P._32 != 0;

            /*
             * VERTEX
             */
            void set_o(inout v2f o, uint i, float3 pos, float scale, float4 rotation) {
                // set data for our pixel shader to consume
                // I couldn't get matrix-parameters to work correctly, so here's a simpler way:
                // three elemsX arrays with manual assignment - see the #defines above for layout
                o.elems0[i].xyz = pos;
                o.elems0[i].w = scale;
                o.elems1[i] = q_inverse(rotation);
                o.elems2[i].x = UNITY_ACCESS_INSTANCED_PROP(Props, _EnergiaObjType1);
                o.elems2[i].y = UNITY_ACCESS_INSTANCED_PROP(Props, _EnergiaObjType2);
                o.elems2[i].z = UNITY_ACCESS_INSTANCED_PROP(Props, _EnergiaObjTypeLerp);
                o.elems2[i].w = 1;
            }

            v2f vert(appdata v) {
                v2f o;
                // start out with everything at 0, including all arrays;
                // this is necessary because set_o might not be called for all indices
                // up to ELEMS, and thus we'd have uninitialized outputs
                UNITY_INITIALIZE_OUTPUT(v2f, o);

 #ifdef UNITY_INSTANCING_ENABLED

                UNITY_SETUP_INSTANCE_ID(v);
                UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(o);

                uint local_id = unity_InstanceID;

                if (local_id >= ELEMS) {
                    // discard
                    o.clipPos = float4(0, 0, 0, 0);
                } else {
                    o.clipPos = UnityObjectToClipPos(v.vertex);
                }
                o.screenPos = ComputeNonStereoScreenPos(o.clipPos);

                float4 worldPos = mul(unity_ObjectToWorld, v.vertex);
                o.ray.xyz = worldPos.xyz - camera_pos.xyz;
                o.ray.w = o.clipPos.w;

                float4 local_worldCenter = mul(unity_ObjectToWorld, float4(0, 0, 0, 1));

                float4 rotation;
                float3 unused_position;
                float3 scale3;
                decompose(unity_ObjectToWorld, unused_position, rotation, scale3);
                float local_uuid = min(scale3.x, min(scale3.y, scale3.z));

                uint assigned = 0;

                // we use the bits of this uint as a sort of bloom filter to determine which
                // uuid's we've already encountered
                uint seen = 0;

                // the idea here is that Unity sets parameters for all instances at the same time,
                // so by "pretending" to be a different instance, we can access their data

                [unroll] // array access means we can't [loop]
                for (uint i = 0; i < ELEMS; i++) {
                    // start out disabled
                    o.elems0[i] = 0;
                    o.elems1[i] = 0;
                    o.elems2[i] = 0;

                    v.instanceID = i;
                    UNITY_SETUP_INSTANCE_ID(v);

                    // unity_ObjectToWorld is now the o2w matrix of instance i
                    decompose(unity_ObjectToWorld, unused_position, rotation, scale3);
                    float scale = min(scale3.x, min(scale3.y, scale3.z));
                    float4 position = mul(unity_ObjectToWorld, float4(0, 0, 0, 1));

                    float enabled = UNITY_ACCESS_INSTANCED_PROP(Props, _EnergiaEnabled) > 0.001f;
                    bool pos_is_diff = distance(position, local_worldCenter) > 0.001f;
                    bool is_not_000 = distance(position, float3(0, 0, 0)) > 0.001f;

                    bool uuid_is_diff = distance(scale, local_uuid) > 0.001f;
                    uint uuid_bits = asuint(scale);
                    bool stale = (seen & uuid_bits) == uuid_bits;
                    bool valid_uuid = (!stale && uuid_is_diff);

                    if (enabled && is_not_000 && // always ignore if its at (0, 0, 0) or disabled
                        (i == local_id || // otherwise always accept if this is the current local instance
                        (pos_is_diff && valid_uuid))) // or if we're sure it's a different one, and is valid
                    {
                        // needs a switch because of array write, compiler be dumb
                        #define C(x) case x: { set_o(o, x, position, scale, rotation); break; }
                        switch (assigned) {
                            C(0) C(1) C(2) C(3)
                            C(4) C(5) C(6) C(7)
                        }

                        if (i == local_id) {
                            o.localIndex = assigned;
                        }

                        seen |= uuid_bits;
                        assigned++;
                    }
                }

                // just to be safe, set id back and only transfer now
                v.instanceID = local_id;
                UNITY_SETUP_INSTANCE_ID(v);
                UNITY_TRANSFER_INSTANCE_ID(v, o);

                return o;
 #else
                o.clipPos = UnityObjectToClipPos(v.vertex);
                return o;
 #endif //UNITY_INSTANCING_ENABLED
            }

            /*
             * RAYMARCHING
             * all of these based on:
             * https://www.iquilezles.org/www/articles/distfunctions/distfunctions.htm
             */
            float sdf_sphere(float3 pos, float3 center, float radius) {
                return length(center - pos) - radius;
            }

            float sdf_box(float3 pos, float3 center, float s)
            {
                float3 q = abs(center - pos) - float3(s, s, s);
                return length(max(q, 0.0)) + min(max(q.x, max(q.y, q.z)), 0.0);
            }

            float sdf_octahedron(float3 pos, float3 center, float s)
            {
                float3 p = abs(pos - center);
                return (p.x+p.y+p.z-s)*0.57735027f;
            }

            float sdf_bounding_box(float3 pos, float3 center, float s)
            {
                static const float e = 0.004f;

                float3 p = center - pos;
                p = abs(p)-float3(s, s, s);
                float3 q = abs(p+e)-e;
                return min(min(
                    length(max(float3(p.x,q.y,q.z),0.0))+min(max(p.x,max(q.y,q.z)),0.0),
                    length(max(float3(q.x,p.y,q.z),0.0))+min(max(q.x,max(p.y,q.z)),0.0)),
                    length(max(float3(q.x,q.y,p.z),0.0))+min(max(q.x,max(q.y,p.z)),0.0));
            }

            float sdf_for_effect(uint eff, float3 pos, float3 center, float scale) {
                switch (eff) {
                    case 0:
                        return sdf_sphere(pos, center, 0.26f * scale);
                    case 1:
                        return sdf_octahedron(pos, center, 0.26f * scale);
                    case 2:
                        return sdf_box(pos, center, 0.24f * scale);
                    case 3:
                        return sdf_bounding_box(pos, center, 0.24f * scale);
                    // a default is not necessary, the compiler is clever enough to figure
                    // out that 'eff' will always be one of these values
                    // I really can't decide if the compiler is too *smart* or too *dumb* for me
                    // default:
                    //     return 999999;
                }
            }

            float sminCubic(float a, float b, float k) {
                float h = max(k - abs(a - b), 0.0f) / k;
                return min(a, b) - h * h * h * k * (1.0f / 6.0f);
            }

            float3 rotate(float3 pos, float3 center, float4 rot) {
                // rotate around center point, note that quaternion is already
                // inverted by vertex shader, as we rotate the sample point,
                // not the actual object
                pos = rotate_vector(pos - center, rot) + center;
                return pos;
            }

            float sdf_effect_selector(float3 pos, v2f i, uint idx) {
                // 4 types in total, so do 'mod 4'
                uint eff1 = (uint)(EL_OBJ_TYPE_1(i, idx) + 0.1f) & 0x3;
                uint eff2 = (uint)(EL_OBJ_TYPE_2(i, idx) + 0.1f) & 0x3;
                float eff = eff1 == eff2 ? 0 : EL_OBJ_TYPE_LERP(i, idx);

                float3 center = EL_CENTER(i, idx);
                float scale = EL_SCALE(i, idx);

                // lerp between SDF variants based on instance parameters
                float ret1 = eff > 1 - EPSILON*0.1f ? 0 :
                    sdf_for_effect(eff1, pos, center, scale);
                float ret2 = eff < EPSILON*0.1f ? ret1 :
                    sdf_for_effect(eff2, pos, center, scale);

                return lerp(ret1, ret2, eff);
            }

            // this is the main sdf entry point, i.e. this defines the scene
            float2 sdf(float3 pos, v2f i) {
                float res = 999999;
                float tag = -1;
                float tagDist = 999999;

                for (uint j = 0; j < ELEMS; j++) {
                    // we can return as soon as we find a disabled elem, since our vertex
                    // shader is smart enough to put all enabled ones at the front
                    // THIS IS IMPORTANT! A 'continue' here KILLS performance!
                    if (EL_DISABLED(i, j)) return float2(res, tag);

                    float3 tpos = rotate(pos, EL_CENTER(i, j), EL_QUATERNION(i, j));
                    float ires = sdf_effect_selector(tpos, i, j);
                    res = sminCubic(res, ires, 0.1);

                    // closest object by distance is what we'll hit, if our caller deems
                    // this a hit at all - return that as 'tag' value
                    if (ires < tagDist) {
                        tag = j;
                        tagDist = ires;
                    }
                }

                return float2(res, tag);
            }

            // do an actual raymarch until we hit something or miss entirely,
            // this is where most performance is used
            float3 raymarch(float3 start, float3 dir, v2f i, float screenDist, float farPlane) {
                float3 cur = start;
                float dist = _ProjectionParams.y;
                float lastDist = 999999;
                float travelled = 0;

                farPlane = min(farPlane, _ProjectionParams.z * 0.5f);

                // "foveated" rendering:
                // increase EPSILON as we get closer to the edge of the screen
                float epsMod = saturate((1 - screenDist) + 0.10f);
                float eps = EPSILON / epsMod;

                for (uint j; j < MAX_STEPS; j++) {
                    cur += dir * dist;
                    travelled += dist;

                    float2 res = sdf(cur, i);
                    dist = res.x;

                    if (dist < eps) {
                        // hit, return distance to hit and 'tag':
                        // res.y (tag) defines which object instance we hit, approximately anyway
                        return float3(0, travelled, res.y);

                    } else if (travelled > farPlane ||
                               (dist > 0.5f && dist > lastDist * 1.988)) // *
                    {
                        // miss
                        return float3(1, 0, 0);
                    }

                    // * this optimization is not mathematically sound for non-convex shapes,
                    //   it would only be if the second parameter where 2.0f, but alas, it
                    //   produces very minimal artifacts and increases performance ~2-fold

                    lastDist = dist;
                }

                // overrun, miss
                return float3(2, 0, 0);
            }

            // normal estimation, 4 extra sdf calls per hit - gives us nicer 'shading'
            float3 estimateNormal(float3 pos, v2f i) {
                float f = sdf(pos, i);
                return normalize(float3(
                    sdf(pos + float3(EPSILON, 0, 0), i).x - f,
                    sdf(pos + float3(0, EPSILON, 0), i).x - f,
                    sdf(pos + float3(0, 0, EPSILON), i).x - f
                ));
            }

            /*
             * HELPERS
             */
            // actual ray tracing, heck
            float3 planeIntersect(float3 rayStart, float3 ray, float3 pos, float3 norm) {
                float3 diff = rayStart - pos;
                float3 prod1 = dot(diff, norm);
                float3 prod2 = dot(ray, norm);
                float3 prod3 = prod1 / prod2;
                return rayStart - ray * prod3;
            }

            float3 hue_to_rgb(float H)
            {
                // inverted colors in mirror, because we can
                if (isInMirror) {
                    H = 1 - H;
                }
                float R = abs(H * 6 - 3) - 1;
                float G = 2 - abs(H * 6 - 2);
                float B = 2 - abs(H * 6 - 4);
                return saturate(float3(R,G,B));
            }

            // maybe for later use sometime?
            /* float rand(in float2 uv) */
            /* { */
            /*     float2 noise = (frac(sin(dot(uv ,float2(12.9898,78.233)*2.0)) * 43758.5453)); */
            /*     return abs(noise.x + noise.y) * 0.5; */
            /* } */

            /*
             * FRAGMENT
             */
            f2s frag(v2f i)
            {
                f2s ret;
                ret.color = ret.depth = 1;

 #ifndef UNITY_INSTANCING_ENABLED
                ret.color = float4(1, 0, 0, 1);
                return ret;
 #endif

                UNITY_SETUP_INSTANCE_ID(i);
                UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(i);

                uint idx = i.localIndex;

                // calculate view ray from interpolated vertex ray
                float3 rayStart = camera_pos;
                float3 rayDir = normalize((i.ray.xyz / i.ray.w).xyz);

                float min_dist = 999999;
                uint min_dist_idx = idx;
                float far_plane = 0;

                float camera_normal = normalize(UNITY_MATRIX_V[2].xyz);

                for (uint j = 0; j < ELEMS; j++) {
                    if (EL_DISABLED(i, j)) break;

                    // avoid double drawing if ray would go through multiple boxes
                    float3 center = EL_CENTER(i, j);
                    float3 intersect = planeIntersect(
                        rayStart,
                        rayDir * 100000,
                        center,
                        camera_normal
                    );
                    float inter_dist = distance(center, intersect);
                    if (inter_dist < min_dist)
                    {
                        min_dist = inter_dist;
                        min_dist_idx = j;
                    }
                    float cam_dist = distance(center, rayStart);
                    if (cam_dist > far_plane) {
                        far_plane = cam_dist;
                    }
                }

                if (min_dist_idx != idx) {
                    // the important part here is that for each pixel on the screen
                    // always *zero or one* (but exactly *one* if hit) instances
                    // of this frag shader will go pass this point
                    discard;
                }

                // used to debug the above:
                /* ret.color = float4( */
                /*     unity_InstanceID == 0 ? 1 : 0, */
                /*     unity_InstanceID == 1 ? 1 : 0, */
                /*     unity_InstanceID == 2 ? 1 : 0, */
                /*     0.4 */
                /* ); */
                /* return ret; */

                // now do the actual SDF raymarch, aka. the cool part
                far_plane += EL_SCALE(i, idx);
                float screenDist = distance(i.screenPos.xy/i.screenPos.w, float2(0.5f, 0.5f));
                float3 raymarchResult = raymarch(rayStart, rayDir, i, screenDist, far_plane);

                float miss = raymarchResult.x;
                if (miss) {
                    discard;
                }

                // calculate world-space position of our hit result
                float dist = raymarchResult.y;
                float3 hitPos = rayStart + rayDir * dist;

                float3 hitObjCenter = EL_CENTER(i, (uint)(raymarchResult.z + EPSILON*2));
                float colorOffset = distance(hitPos, hitObjCenter) * 2.0f;

                // foveated normal calculation: fade out towards edges, save sdf() calls
                float normalIntensity = screenDist < 0.20f ? 1 :
                    (screenDist > 0.40f ? 0 : 1 - (screenDist - 0.20f) * 6);
                float3 normal = normalIntensity < EPSILON ? float3(0, 0, 0) :
                    lerp(float3(0, 0, 0), estimateNormal(hitPos, i), normalIntensity);
                float angle = acos(dot(normal, rayDir));

                // calculate shading/color based on _Time and object normals
                float edgeLightAdd = 1 - saturate(angle/PI + 0.25f);
                float3 rgb = hue_to_rgb(frac(_Time.x + colorOffset));
                ret.color = float4(rgb, 0.7);
                ret.color.rgb += edgeLightAdd;

                // make transparent - this works because of our double-draw-avoidance above
                ret.color.a = lerp(ret.color.a, 1, saturate(angle - PI + 0.70));

                // and finally, calculate the depth of our hit in clip-space, to make
                // object intersection work
                float4 depthPos = mul(UNITY_MATRIX_VP, float4(hitPos, 1));
                ret.depth = depthPos.z / depthPos.w;

                return ret;
            }
            ENDCG
        }
    }
 }
	Shader "Custom/Energia v2 Toon Cutout" // VRChat fallback is "Toon Cutout", fully transparent
	{
	Properties
	{
	// for fallback only
	_MainTex ("[Fallback Only] Texture", 2D) = "transparent" {}
	_Color ("[Fallback Only] Color", Color) = (0, 0, 0, 0)
	_Cutoff ("[Fallback Only] Alpha Cutoff", Range(0.0, 1.0)) = 1.0

	// animateable
	_EnergiaEnabled ("Enabled", Float) = 0
	_EnergiaObjType1 ("Object Type 1", Float) = 0
	_EnergiaObjType2 ("Object Type 2", Float) = 0
	_EnergiaObjTypeLerp ("Object Type Lerp", Range(0.0, 1.0)) = 0

	// Note for use: Every object using this shader has to have a unique scale!
	// Also, to ensure consistency, make sure x, y and z scale are always equal.
	}
	SubShader
	{
	Tags {
	"Queue" = "Transparent+613" // we have to be alone in our queue for instancing!
	"RenderType" = "Opaque"
	"ForceNoShadowCasting" = "True"
	"IgnoreProjector" = "True"
	}

	ZWrite Off
	ZTest LEqual
	Cull Front // render on backfaces of cube to avoid disappearing when inside it
	Blend SrcAlpha OneMinusSrcAlpha

	Pass
	{
	CGPROGRAM
	#pragma vertex vert
	#pragma fragment frag

	#pragma target 5.0
	#pragma fragmentoption ARB_precision_hint_fastest

	#pragma multi_compile_instancing
	#pragma instancing_options nolightprobe
	#pragma instancing_options nolightmap
	#pragma instancing_options nolodfade
	// keep in sync with ELEMS:
	#pragma instancing_options maxcount:8
	#pragma instancing_options forcemaxcount:8

	/* #pragma enable_d3d11_debug_symbols */

	#include "UnityCG.cginc"
	#include "UnityInstancing.cginc"

	// From: https://gist.github.com/mattatz/86fff4b32d198d0928d0fa4ff32cf6fa
	#include "Matrix.cginc"
	// From: https://gist.github.com/mattatz/40a91588d5fb38240403f198a938a593
	#include "Quaternion.cginc"

	#define ELEMS 8
	#define MAX_STEPS 24
	#define EPSILON 0.0018f

	// these parameters are available to animate per instance
	UNITY_INSTANCING_BUFFER_START(Props)
	UNITY_DEFINE_INSTANCED_PROP(float, _EnergiaObjType1)
	UNITY_DEFINE_INSTANCED_PROP(float, _EnergiaObjType2)
	UNITY_DEFINE_INSTANCED_PROP(float, _EnergiaObjTypeLerp)
	UNITY_DEFINE_INSTANCED_PROP(float, _EnergiaEnabled)
	UNITY_INSTANCING_BUFFER_END(Props)

	/*
	* STRUCTS
	*/
	struct appdata {
	float4 vertex : POSITION;
	float2 uv : TEXCOORD0;

	UNITY_VERTEX_INPUT_INSTANCE_ID
	};

	struct v2f {
	float4 clipPos : SV_POSITION;
	float4 screenPos : TEXCOORD0;
	float4 ray : TEXCOORD1;

	uint localIndex : TEXCOORD2;

	// transfer unchanging data from vert to frag:
	// 0: world position (w=scale)
	// 1: rotation quaternion
	// 2: x=type1 y=type2 z=lerp w=disabled (when 0)
	float4 elems0[ELEMS] : TEXCOORD3;
	half4 elems1[ELEMS] : TEXCOORD11;
	half4 elems2[ELEMS] : TEXCOORD19;

	UNITY_VERTEX_INPUT_INSTANCE_ID
	UNITY_VERTEX_OUTPUT_STEREO
	};

	struct f2s {
	float depth : SV_Depth;
	float4 color : SV_Target;
	};

	// these are used to access vertex output data in the fragment shader,
	// since we want to access the v2f struct directly, to avoid copying data (and
	// further array writes, since these are very poorly supported and heavy it seems)
	#define EL_CENTER(input, idx) input.elems0[idx].xyz
	#define EL_SCALE(input, idx) input.elems0[idx].w
	#define EL_QUATERNION(input, idx) input.elems1[idx]
	#define EL_OBJ_TYPE_1(input, idx) input.elems2[idx].x
	#define EL_OBJ_TYPE_2(input, idx) input.elems2[idx].y
	#define EL_OBJ_TYPE_LERP(input, idx) input.elems2[idx].z
	#define EL_DISABLED(input, idx) (input.elems2[idx].w < 0.5f)

	/*
	* STATICS
	*/
	float3 get_camera_pos() {
	float3 worldCam;
	worldCam.x = unity_CameraToWorld[0][3];
	worldCam.y = unity_CameraToWorld[1][3];
	worldCam.z = unity_CameraToWorld[2][3];
	return worldCam;
	}
	// _WorldSpaceCameraPos is broken in VR (single pass stereo)
	static float3 camera_pos = get_camera_pos();
	// detects VRChat mirror cameras
	static bool isInMirror = UNITY_MATRIX_P._31 != 0 \|\| UNITY_MATRIX_P._32 != 0;

	/*
	* VERTEX
	*/
	void set_o(inout v2f o, uint i, float3 pos, float scale, float4 rotation) {
	// set data for our pixel shader to consume
	// I couldn't get matrix-parameters to work correctly, so here's a simpler way:
	// three elemsX arrays with manual assignment - see the #defines above for layout
	o.elems0[i].xyz = pos;
	o.elems0[i].w = scale;
	o.elems1[i] = q_inverse(rotation);
	o.elems2[i].x = UNITY_ACCESS_INSTANCED_PROP(Props, _EnergiaObjType1);
	o.elems2[i].y = UNITY_ACCESS_INSTANCED_PROP(Props, _EnergiaObjType2);
	o.elems2[i].z = UNITY_ACCESS_INSTANCED_PROP(Props, _EnergiaObjTypeLerp);
	o.elems2[i].w = 1;
	}

	v2f vert(appdata v) {
	v2f o;
	// start out with everything at 0, including all arrays;
	// this is necessary because set_o might not be called for all indices
	// up to ELEMS, and thus we'd have uninitialized outputs
	UNITY_INITIALIZE_OUTPUT(v2f, o);

	#ifdef UNITY_INSTANCING_ENABLED

	UNITY_SETUP_INSTANCE_ID(v);
	UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(o);

	uint local_id = unity_InstanceID;

	if (local_id >= ELEMS) {
	// discard
	o.clipPos = float4(0, 0, 0, 0);
	} else {
	o.clipPos = UnityObjectToClipPos(v.vertex);
	}
	o.screenPos = ComputeNonStereoScreenPos(o.clipPos);

	float4 worldPos = mul(unity_ObjectToWorld, v.vertex);
	o.ray.xyz = worldPos.xyz - camera_pos.xyz;
	o.ray.w = o.clipPos.w;

	float4 local_worldCenter = mul(unity_ObjectToWorld, float4(0, 0, 0, 1));

	float4 rotation;
	float3 unused_position;
	float3 scale3;
	decompose(unity_ObjectToWorld, unused_position, rotation, scale3);
	float local_uuid = min(scale3.x, min(scale3.y, scale3.z));

	uint assigned = 0;

	// we use the bits of this uint as a sort of bloom filter to determine which
	// uuid's we've already encountered
	uint seen = 0;

	// the idea here is that Unity sets parameters for all instances at the same time,
	// so by "pretending" to be a different instance, we can access their data

	[unroll] // array access means we can't [loop]
	for (uint i = 0; i < ELEMS; i++) {
	// start out disabled
	o.elems0[i] = 0;
	o.elems1[i] = 0;
	o.elems2[i] = 0;

	v.instanceID = i;
	UNITY_SETUP_INSTANCE_ID(v);

	// unity_ObjectToWorld is now the o2w matrix of instance i
	decompose(unity_ObjectToWorld, unused_position, rotation, scale3);
	float scale = min(scale3.x, min(scale3.y, scale3.z));
	float4 position = mul(unity_ObjectToWorld, float4(0, 0, 0, 1));

	float enabled = UNITY_ACCESS_INSTANCED_PROP(Props, _EnergiaEnabled) > 0.001f;
	bool pos_is_diff = distance(position, local_worldCenter) > 0.001f;
	bool is_not_000 = distance(position, float3(0, 0, 0)) > 0.001f;

	bool uuid_is_diff = distance(scale, local_uuid) > 0.001f;
	uint uuid_bits = asuint(scale);
	bool stale = (seen & uuid_bits) == uuid_bits;
	bool valid_uuid = (!stale && uuid_is_diff);

	if (enabled && is_not_000 && // always ignore if its at (0, 0, 0) or disabled
	(i == local_id \|\| // otherwise always accept if this is the current local instance
	(pos_is_diff && valid_uuid))) // or if we're sure it's a different one, and is valid
	{
	// needs a switch because of array write, compiler be dumb
	#define C(x) case x: { set_o(o, x, position, scale, rotation); break; }
	switch (assigned) {
	C(0) C(1) C(2) C(3)
	C(4) C(5) C(6) C(7)
	}

	if (i == local_id) {
	o.localIndex = assigned;
	}

	seen \|= uuid_bits;
	assigned++;
	}
	}

	// just to be safe, set id back and only transfer now
	v.instanceID = local_id;
	UNITY_SETUP_INSTANCE_ID(v);
	UNITY_TRANSFER_INSTANCE_ID(v, o);

	return o;
	#else
	o.clipPos = UnityObjectToClipPos(v.vertex);
	return o;
	#endif //UNITY_INSTANCING_ENABLED
	}

	/*
	* RAYMARCHING
	* all of these based on:
	* https://www.iquilezles.org/www/articles/distfunctions/distfunctions.htm
	*/
	float sdf_sphere(float3 pos, float3 center, float radius) {
	return length(center - pos) - radius;
	}

	float sdf_box(float3 pos, float3 center, float s)
	{
	float3 q = abs(center - pos) - float3(s, s, s);
	return length(max(q, 0.0)) + min(max(q.x, max(q.y, q.z)), 0.0);
	}

	float sdf_octahedron(float3 pos, float3 center, float s)
	{
	float3 p = abs(pos - center);
	return (p.x+p.y+p.z-s)*0.57735027f;
	}

	float sdf_bounding_box(float3 pos, float3 center, float s)
	{
	static const float e = 0.004f;

	float3 p = center - pos;
	p = abs(p)-float3(s, s, s);
	float3 q = abs(p+e)-e;
	return min(min(
	length(max(float3(p.x,q.y,q.z),0.0))+min(max(p.x,max(q.y,q.z)),0.0),
	length(max(float3(q.x,p.y,q.z),0.0))+min(max(q.x,max(p.y,q.z)),0.0)),
	length(max(float3(q.x,q.y,p.z),0.0))+min(max(q.x,max(q.y,p.z)),0.0));
	}

	float sdf_for_effect(uint eff, float3 pos, float3 center, float scale) {
	switch (eff) {
	case 0:
	return sdf_sphere(pos, center, 0.26f * scale);
	case 1:
	return sdf_octahedron(pos, center, 0.26f * scale);
	case 2:
	return sdf_box(pos, center, 0.24f * scale);
	case 3:
	return sdf_bounding_box(pos, center, 0.24f * scale);
	// a default is not necessary, the compiler is clever enough to figure
	// out that 'eff' will always be one of these values
	// I really can't decide if the compiler is too smart or too dumb for me
	// default:
	// return 999999;
	}
	}

	float sminCubic(float a, float b, float k) {
	float h = max(k - abs(a - b), 0.0f) / k;
	return min(a, b) - h * h * h * k * (1.0f / 6.0f);
	}

	float3 rotate(float3 pos, float3 center, float4 rot) {
	// rotate around center point, note that quaternion is already
	// inverted by vertex shader, as we rotate the sample point,
	// not the actual object
	pos = rotate_vector(pos - center, rot) + center;
	return pos;
	}

	float sdf_effect_selector(float3 pos, v2f i, uint idx) {
	// 4 types in total, so do 'mod 4'
	uint eff1 = (uint)(EL_OBJ_TYPE_1(i, idx) + 0.1f) & 0x3;
	uint eff2 = (uint)(EL_OBJ_TYPE_2(i, idx) + 0.1f) & 0x3;
	float eff = eff1 == eff2 ? 0 : EL_OBJ_TYPE_LERP(i, idx);

	float3 center = EL_CENTER(i, idx);
	float scale = EL_SCALE(i, idx);

	// lerp between SDF variants based on instance parameters
	float ret1 = eff > 1 - EPSILON*0.1f ? 0 :
	sdf_for_effect(eff1, pos, center, scale);
	float ret2 = eff < EPSILON*0.1f ? ret1 :
	sdf_for_effect(eff2, pos, center, scale);

	return lerp(ret1, ret2, eff);
	}

	// this is the main sdf entry point, i.e. this defines the scene
	float2 sdf(float3 pos, v2f i) {
	float res = 999999;
	float tag = -1;
	float tagDist = 999999;

	for (uint j = 0; j < ELEMS; j++) {
	// we can return as soon as we find a disabled elem, since our vertex
	// shader is smart enough to put all enabled ones at the front
	// THIS IS IMPORTANT! A 'continue' here KILLS performance!
	if (EL_DISABLED(i, j)) return float2(res, tag);

	float3 tpos = rotate(pos, EL_CENTER(i, j), EL_QUATERNION(i, j));
	float ires = sdf_effect_selector(tpos, i, j);
	res = sminCubic(res, ires, 0.1);

	// closest object by distance is what we'll hit, if our caller deems
	// this a hit at all - return that as 'tag' value
	if (ires < tagDist) {
	tag = j;
	tagDist = ires;
	}
	}

	return float2(res, tag);
	}

	// do an actual raymarch until we hit something or miss entirely,
	// this is where most performance is used
	float3 raymarch(float3 start, float3 dir, v2f i, float screenDist, float farPlane) {
	float3 cur = start;
	float dist = _ProjectionParams.y;
	float lastDist = 999999;
	float travelled = 0;

	farPlane = min(farPlane, _ProjectionParams.z * 0.5f);

	// "foveated" rendering:
	// increase EPSILON as we get closer to the edge of the screen
	float epsMod = saturate((1 - screenDist) + 0.10f);
	float eps = EPSILON / epsMod;

	for (uint j; j < MAX_STEPS; j++) {
	cur += dir * dist;
	travelled += dist;

	float2 res = sdf(cur, i);
	dist = res.x;

	if (dist < eps) {
	// hit, return distance to hit and 'tag':
	// res.y (tag) defines which object instance we hit, approximately anyway
	return float3(0, travelled, res.y);

	} else if (travelled > farPlane \|\|
	(dist > 0.5f && dist > lastDist * 1.988)) // *
	{
	// miss
	return float3(1, 0, 0);
	}

	// * this optimization is not mathematically sound for non-convex shapes,
	// it would only be if the second parameter where 2.0f, but alas, it
	// produces very minimal artifacts and increases performance ~2-fold

	lastDist = dist;
	}

	// overrun, miss
	return float3(2, 0, 0);
	}

	// normal estimation, 4 extra sdf calls per hit - gives us nicer 'shading'
	float3 estimateNormal(float3 pos, v2f i) {
	float f = sdf(pos, i);
	return normalize(float3(
	sdf(pos + float3(EPSILON, 0, 0), i).x - f,
	sdf(pos + float3(0, EPSILON, 0), i).x - f,
	sdf(pos + float3(0, 0, EPSILON), i).x - f
	));
	}

	/*
	* HELPERS
	*/
	// actual ray tracing, heck
	float3 planeIntersect(float3 rayStart, float3 ray, float3 pos, float3 norm) {
	float3 diff = rayStart - pos;
	float3 prod1 = dot(diff, norm);
	float3 prod2 = dot(ray, norm);
	float3 prod3 = prod1 / prod2;
	return rayStart - ray * prod3;
	}

	float3 hue_to_rgb(float H)
	{
	// inverted colors in mirror, because we can
	if (isInMirror) {
	H = 1 - H;
	}
	float R = abs(H * 6 - 3) - 1;
	float G = 2 - abs(H * 6 - 2);
	float B = 2 - abs(H * 6 - 4);
	return saturate(float3(R,G,B));
	}

	// maybe for later use sometime?
	/* float rand(in float2 uv) */
	/* { */
	/* float2 noise = (frac(sin(dot(uv ,float2(12.9898,78.233)2.0)) 43758.5453)); */
	/* return abs(noise.x + noise.y) * 0.5; */
	/* } */

	/*
	* FRAGMENT
	*/
	f2s frag(v2f i)
	{
	f2s ret;
	ret.color = ret.depth = 1;

	#ifndef UNITY_INSTANCING_ENABLED
	ret.color = float4(1, 0, 0, 1);
	return ret;
	#endif

	UNITY_SETUP_INSTANCE_ID(i);
	UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(i);

	uint idx = i.localIndex;

	// calculate view ray from interpolated vertex ray
	float3 rayStart = camera_pos;
	float3 rayDir = normalize((i.ray.xyz / i.ray.w).xyz);

	float min_dist = 999999;
	uint min_dist_idx = idx;
	float far_plane = 0;

	float camera_normal = normalize(UNITY_MATRIX_V[2].xyz);

	for (uint j = 0; j < ELEMS; j++) {
	if (EL_DISABLED(i, j)) break;

	// avoid double drawing if ray would go through multiple boxes
	float3 center = EL_CENTER(i, j);
	float3 intersect = planeIntersect(
	rayStart,
	rayDir * 100000,
	center,
	camera_normal
	);
	float inter_dist = distance(center, intersect);
	if (inter_dist < min_dist)
	{
	min_dist = inter_dist;
	min_dist_idx = j;
	}
	float cam_dist = distance(center, rayStart);
	if (cam_dist > far_plane) {
	far_plane = cam_dist;
	}
	}

	if (min_dist_idx != idx) {
	// the important part here is that for each pixel on the screen
	// always zero or one (but exactly one if hit) instances
	// of this frag shader will go pass this point
	discard;
	}

	// used to debug the above:
	/* ret.color = float4( */
	/* unity_InstanceID == 0 ? 1 : 0, */
	/* unity_InstanceID == 1 ? 1 : 0, */
	/* unity_InstanceID == 2 ? 1 : 0, */
	/* 0.4 */
	/* ); */
	/* return ret; */

	// now do the actual SDF raymarch, aka. the cool part
	far_plane += EL_SCALE(i, idx);
	float screenDist = distance(i.screenPos.xy/i.screenPos.w, float2(0.5f, 0.5f));
	float3 raymarchResult = raymarch(rayStart, rayDir, i, screenDist, far_plane);

	float miss = raymarchResult.x;
	if (miss) {
	discard;
	}

	// calculate world-space position of our hit result
	float dist = raymarchResult.y;
	float3 hitPos = rayStart + rayDir * dist;

	float3 hitObjCenter = EL_CENTER(i, (uint)(raymarchResult.z + EPSILON*2));
	float colorOffset = distance(hitPos, hitObjCenter) * 2.0f;

	// foveated normal calculation: fade out towards edges, save sdf() calls
	float normalIntensity = screenDist < 0.20f ? 1 :
	(screenDist > 0.40f ? 0 : 1 - (screenDist - 0.20f) * 6);
	float3 normal = normalIntensity < EPSILON ? float3(0, 0, 0) :
	lerp(float3(0, 0, 0), estimateNormal(hitPos, i), normalIntensity);
	float angle = acos(dot(normal, rayDir));

	// calculate shading/color based on _Time and object normals
	float edgeLightAdd = 1 - saturate(angle/PI + 0.25f);
	float3 rgb = hue_to_rgb(frac(_Time.x + colorOffset));
	ret.color = float4(rgb, 0.7);
	ret.color.rgb += edgeLightAdd;

	// make transparent - this works because of our double-draw-avoidance above
	ret.color.a = lerp(ret.color.a, 1, saturate(angle - PI + 0.70));

	// and finally, calculate the depth of our hit in clip-space, to make
	// object intersection work
	float4 depthPos = mul(UNITY_MATRIX_VP, float4(hitPos, 1));
	ret.depth = depthPos.z / depthPos.w;

	return ret;
	}
	ENDCG
	}
	}
	}