topin89 · March 20, 2023 19:21
diff --git a/Readme.md b/Readme.md
diff --git a/CMakeLists.txt b/CMakeLists.txt
 cmake_minimum_required(VERSION 3.0.0)
 project(unclean VERSION 0.1.0)

 set (CMAKE_CXX_STANDARD 17)

 add_executable(shape_bench main.cpp)
diff --git a/main.cpp b/main.cpp
 #define _USE_MATH_DEFINES
 #include <math.h>
 #include <stdint.h>

 #include <chrono>
 #include <iostream>
 #include <random>
 #include <stdexcept>
 #include <type_traits>
 #include <vector>
 #include <variant>


 using f32 = float;
 using u32 = uint32_t;

 static constexpr f32 Pi32 = M_PI;
 static constexpr size_t num_of_shapes = 8192;
 static constexpr size_t num_of_outer_loops = 1024*1024*20 / num_of_shapes;

 // static constexpr size_t num_of_shapes = 1024*1024*20;

 // utilities

 class TimeMeasure {
 public:
    TimeMeasure() = default;
    void begin(){ start_point = std::chrono::steady_clock::now(); }
    void end(){ end_point = std::chrono::steady_clock::now(); }

    int64_t ms(){
        return std::chrono::duration_cast<std::chrono::milliseconds>(end_point - start_point).count();
    }

    double ms_per_tick(int64_t ticks) {
        return static_cast<double>(ms()) / ticks;
    }
 private:
    std::chrono::steady_clock::time_point start_point;
    std::chrono::steady_clock::time_point end_point;
 };

 // from https://stackoverflow.com/a/34111095
 template <typename Kind, typename... Kinds>
 constexpr bool any_of_types(){
    /* The following expands to :
     * std::is_same_v<Kind, Kind0> || std::is_same_v<Kind, Kind1> || ... */
    if constexpr ((std::is_same_v<Kind, Kinds> || ...)) {
        return true;
    }

    return false;
 };

 // This should really be in the standard
 template< typename T>
 class RandomNumberGen{
 public:
    RandomNumberGen(T min = 14, T max = 9001, size_t seed = 42):
        rng(seed), uni(min, max) {}

    T operator()(){ return uni(rng);}
 private:
    using uniform_dist = std::conditional_t<
        any_of_types<T, float, double, long double>(),
            std::uniform_real_distribution<T>, std::uniform_int_distribution<T>
        >;
    std::mt19937 rng;
    uniform_dist uni;
 };

 // main dish


 enum shape_type : u32
 {
    Shape_Square,
    Shape_Rectangle,
    Shape_Triangle,
    Shape_Circle,

    Shape_Count,
 };

 struct shape_union
 {
    shape_type Type;
    f32 Width;
    f32 Height;
 };

 f32 const CTable[Shape_Count] = {1.0f, 1.0f, 0.5f, Pi32};

 class shape_base
 {
 public:
    shape_base() {}
    virtual f32 Area() = 0;
    virtual ~shape_base() = default;
 };

 class square : public shape_base
 {
 public:
    square(f32 SideInit) : Side(SideInit) {}
    virtual f32 Area() {return Side*Side;}

 private:
    f32 Side;
 };

 class rectangle : public shape_base
 {
 public:
    rectangle(f32 WidthInit, f32 HeightInit) : Width(WidthInit), Height(HeightInit) {}
    virtual f32 Area() {return Width*Height;}

 private:
    f32 Width, Height;
 };

 class triangle : public shape_base
 {
 public:
    triangle(f32 BaseInit, f32 HeightInit) : Base(BaseInit), Height(HeightInit) {}
    virtual f32 Area() {return 0.5f*Base*Height;}

 private:
    f32 Base, Height;
 };

 class circle : public shape_base
 {
 public:
    circle(f32 RadiusInit) : Radius(RadiusInit) {}
    virtual f32 Area() {return Pi32*Radius*Radius;}

 private:
    f32 Radius;
 };

 f32 TotalAreaVTBL(u32 ShapeCount, shape_base **Shapes)
 {
    f32 Accum = 0.0f;
    for(u32 ShapeIndex = 0; ShapeIndex < ShapeCount; ++ShapeIndex)
    {
        Accum += Shapes[ShapeIndex]->Area();
    }

    return Accum;
 }

 f32 TotalAreaVTBL4(u32 ShapeCount, shape_base **Shapes)
 {
    f32 Accum0 = 0.0f;
    f32 Accum1 = 0.0f;
    f32 Accum2 = 0.0f;
    f32 Accum3 = 0.0f;

    u32 Count = ShapeCount/4;
    while(Count--)
    {
        Accum0 += Shapes[0]->Area();
        Accum1 += Shapes[1]->Area();
        Accum2 += Shapes[2]->Area();
        Accum3 += Shapes[3]->Area();

        Shapes += 4;
    }

    f32 Result = (Accum0 + Accum1 + Accum2 + Accum3);
    return Result;
 }

 f32 GetAreaSwitch(shape_union Shape)
 {
    f32 Result = 0.0f;

    switch(Shape.Type)
    {
        case Shape_Square: {Result = Shape.Width*Shape.Width;} break;
        case Shape_Rectangle: {Result = Shape.Width*Shape.Height;} break;
        case Shape_Triangle: {Result = 0.5f*Shape.Width*Shape.Height;} break;
        case Shape_Circle: {Result = Pi32*Shape.Width*Shape.Width;} break;

        case Shape_Count: {} break;
    }

    return Result;
 }

 f32 TotalAreaSwitch(u32 ShapeCount, shape_union *Shapes)
 {
    f32 Accum = 0.0f;

    for(u32 ShapeIndex = 0; ShapeIndex < ShapeCount; ++ShapeIndex)
    {
        Accum += GetAreaSwitch(Shapes[ShapeIndex]);
    }

    return Accum;
 }

 f32 TotalAreaSwitch4(u32 ShapeCount, shape_union *Shapes)
 {
    f32 Accum0 = 0.0f;
    f32 Accum1 = 0.0f;
    f32 Accum2 = 0.0f;
    f32 Accum3 = 0.0f;

    ShapeCount /= 4;
    while(ShapeCount--)
    {
        Accum0 += GetAreaSwitch(Shapes[0]);
        Accum1 += GetAreaSwitch(Shapes[1]);
        Accum2 += GetAreaSwitch(Shapes[2]);
        Accum3 += GetAreaSwitch(Shapes[3]);

        Shapes += 4;
    }

    f32 Result = (Accum0 + Accum1 + Accum2 + Accum3);
    return Result;
 }


 f32 GetAreaUnion(shape_union Shape)
 {
    f32 Result = CTable[Shape.Type]*Shape.Width*Shape.Height;
    return Result;
 }

 f32 TotalAreaUnion(u32 ShapeCount, shape_union *Shapes)
 {
    f32 Accum = 0.0f;

    for(u32 ShapeIndex = 0; ShapeIndex < ShapeCount; ++ShapeIndex)
    {
        Accum += GetAreaUnion(Shapes[ShapeIndex]);
    }

    return Accum;
 }

 f32 TotalAreaUnion4(u32 ShapeCount, shape_union *Shapes)
 {
    f32 Accum0 = 0.0f;
    f32 Accum1 = 0.0f;
    f32 Accum2 = 0.0f;
    f32 Accum3 = 0.0f;

    ShapeCount /= 4;
    while(ShapeCount--)
    {
        Accum0 += GetAreaUnion(Shapes[0]);
        Accum1 += GetAreaUnion(Shapes[1]);
        Accum2 += GetAreaUnion(Shapes[2]);
        Accum3 += GetAreaUnion(Shapes[3]);

        Shapes += 4;
    }

    f32 Result = (Accum0 + Accum1 + Accum2 + Accum3);
    return Result;
 }

 class square_no_vt
 {
 public:
    square_no_vt(f32 SideInit) : Side(SideInit) {}
    f32 Area() {return Side*Side;}

 private:
    f32 Side;
 };

 class rectangle_no_vt
 {
 public:
    rectangle_no_vt(f32 WidthInit, f32 HeightInit) : Width(WidthInit), Height(HeightInit) {}
    f32 Area() {return Width*Height;}

 private:
    f32 Width, Height;
 };

 class triangle_no_vt
 {
 public:
    triangle_no_vt(f32 BaseInit, f32 HeightInit) : Base(BaseInit), Height(HeightInit) {}
    f32 Area() {return 0.5f*Base*Height;}

 private:
    f32 Base, Height;
 };

 class circle_no_vt
 {
 public:
    circle_no_vt(f32 RadiusInit) : Radius(RadiusInit) {}
    f32 Area() {return Pi32*Radius*Radius;}

 private:
    f32 Radius;
 };

 using shape_variant = std::variant<square_no_vt, rectangle_no_vt, triangle_no_vt, circle_no_vt>;

 class shape_base_no_vt_opt{
 public:
    shape_base_no_vt_opt(shape_type shape_index_, f32 side1_, f32 side2_) :
        shape_index(shape_index_), side1(side1_), side2(side2_)
    {}
    f32 Area() { return c_table[shape_index] * side1 * side2; }
 private:

    static constexpr f32 c_table[4] = {1.0f, 1.0f, 0.5f, Pi32};

    shape_type shape_index;
    f32 side1;
    f32 side2;
 };

 class square_no_vt_opt : public shape_base_no_vt_opt
 {
 public:
    square_no_vt_opt(f32 SideInit) : shape_base_no_vt_opt(Shape_Square, SideInit, SideInit) {}

 };

 class rectangle_no_vt_opt: public shape_base_no_vt_opt
 {
 public:
    rectangle_no_vt_opt(f32 WidthInit, f32 HeightInit) : shape_base_no_vt_opt(Shape_Rectangle, WidthInit, HeightInit) {}
 };

 class triangle_no_vt_opt: public shape_base_no_vt_opt
 {
 public:
    triangle_no_vt_opt(f32 BaseInit, f32 HeightInit) : shape_base_no_vt_opt(Shape_Triangle, BaseInit, HeightInit) {}
 };

 class circle_no_vt_opt: public shape_base_no_vt_opt
 {
 public:
    circle_no_vt_opt(f32 RadiusInit) : shape_base_no_vt_opt(Shape_Circle, RadiusInit, RadiusInit) {}
 };

 using shape_variant_opt = std::variant<square_no_vt_opt, rectangle_no_vt_opt, triangle_no_vt_opt, circle_no_vt_opt>;


 template<typename ShapeVariant>
 f32 TotalAreaVariant(u32 ShapeCount, ShapeVariant *Shapes) noexcept
 {
    f32 Accum = 0.0f;
    for(u32 ShapeIndex = 0; ShapeIndex < ShapeCount; ++ShapeIndex)
    {
        Accum += std::visit([](auto shape){return shape.Area();}, Shapes[ShapeIndex]);
    }

    return Accum;
 }

 template<typename ShapeVariant>
 f32 TotalAreaVariant4(u32 ShapeCount, ShapeVariant *Shapes) noexcept
 {
    f32 Accum0 = 0.0f;
    f32 Accum1 = 0.0f;
    f32 Accum2 = 0.0f;
    f32 Accum3 = 0.0f;

    u32 Count = ShapeCount/4;
    while(Count--)
    {
        Accum0 += std::visit([](auto shape){return shape.Area();}, Shapes[0]);
        Accum1 += std::visit([](auto shape){return shape.Area();}, Shapes[1]);
        Accum2 += std::visit([](auto shape){return shape.Area();}, Shapes[2]);
        Accum3 += std::visit([](auto shape){return shape.Area();}, Shapes[3]);

        Shapes += 4;
    }

    f32 Result = (Accum0 + Accum1 + Accum2 + Accum3);
    return Result;
 }


 class shape_base_opt: public shape_base{
 public:
    shape_base_opt(shape_type shape_index_, f32 side1_, f32 side2_) :
        shape_index(shape_index_), side1(side1_), side2(side2_)
    {}
    virtual f32 Area() { return c_table[shape_index] * side1 * side2; }
 private:

    static constexpr f32 c_table[4] = {1.0f, 1.0f, 0.5f, Pi32};

    shape_type shape_index;
    f32 side1;
    f32 side2;
 };

 class square_opt : public shape_base_opt
 {
 public:
    square_opt(f32 SideInit) : shape_base_opt(Shape_Square, SideInit, SideInit) {}

 };

 class rectangle_opt: public shape_base_opt
 {
 public:
    rectangle_opt(f32 WidthInit, f32 HeightInit) : shape_base_opt(Shape_Rectangle, WidthInit, HeightInit) {}
 };

 class triangle_opt: public shape_base_opt
 {
 public:
    triangle_opt(f32 BaseInit, f32 HeightInit) : shape_base_opt(Shape_Triangle, BaseInit, HeightInit) {}
 };

 class circle_opt: public shape_base_opt
 {
 public:
    circle_opt(f32 RadiusInit) : shape_base_opt(Shape_Circle, RadiusInit, RadiusInit) {}
 };

 shape_base* generate_shape_base(){
    static RandomNumberGen<size_t> next_shape{0, 3};
    static RandomNumberGen<f32> next_argument{};

    switch(next_shape()){
        case 0: return new square{next_argument()};
        case 1: return new rectangle{next_argument(), next_argument()};
        case 2: return new triangle{next_argument(), next_argument()};
        case 3: return new circle{next_argument()};
    }

    abort(); // silencing no return warning
 }

 shape_union generate_shape_union(){
    static RandomNumberGen<size_t> next_shape{0, 3};
    static RandomNumberGen<f32> next_argument{};

    switch(next_shape()){
        case 0: {
            f32 edge = next_argument();
            return shape_union{Shape_Square, edge, edge};
        };
        case 1: return shape_union{Shape_Rectangle, next_argument(), next_argument()};
        case 2: return shape_union{Shape_Triangle, next_argument(), next_argument()};
        case 3: {
            f32 edge = next_argument();
            return shape_union{Shape_Circle, edge, edge};
        };
    }

    abort(); // silencing no return warning
 }

 shape_variant generate_shape_variant(){
    static RandomNumberGen<size_t> next_shape{0, 3};
    static RandomNumberGen<f32> next_argument{};

    switch(next_shape()){
        case 0: return square_no_vt{next_argument()};
        case 1: return rectangle_no_vt{next_argument(), next_argument()};
        case 2: return triangle_no_vt{next_argument(), next_argument()};
        case 3: return circle_no_vt{next_argument()};
    }

    abort(); // silencing no return warning
 }

 shape_variant_opt generate_shape_variant_opt(){
    static RandomNumberGen<size_t> next_shape{0, 3};
    static RandomNumberGen<f32> next_argument{};

    switch(next_shape()){
        case 0: return square_no_vt_opt{next_argument()};
        case 1: return rectangle_no_vt_opt{next_argument(), next_argument()};
        case 2: return triangle_no_vt_opt{next_argument(), next_argument()};
        case 3: return circle_no_vt_opt{next_argument()};
    }

    abort(); // silencing no return warning
 }

 shape_base* generate_shape_base_opt(){
    static RandomNumberGen<size_t> next_shape{0, 3};
    static RandomNumberGen<f32> next_argument{};

    switch(next_shape()){
        case 0: return new square_opt{next_argument()};
        case 1: return new rectangle_opt{next_argument(), next_argument()};
        case 2: return new triangle_opt{next_argument(), next_argument()};
        case 3: return new circle_opt{next_argument()};
    }

    abort(); // silencing no return warning
 }

 template<typename ShapeGenerator>
 auto generate_shapes(ShapeGenerator generate_shape) -> std::vector<decltype(generate_shape())>{
    std::vector<decltype(generate_shape())> result;
    result.reserve(num_of_shapes);

    for(size_t i=0; i < num_of_shapes; ++i){
        result.push_back(generate_shape());
    }

    return result;
 }


 template<typename ShapeGenerator, typename AreaCount, typename AreaCountLoopUnrolled>
 void make_bench(ShapeGenerator gen_shape, AreaCount TotalArea, AreaCountLoopUnrolled TotalArea4, char const * const benchname){
    TimeMeasure tm_common;
    TimeMeasure tm_loop_unrolled;

    f32 area_common = 0;
    f32 area_unrolled = 0;
    {
        auto shapes = generate_shapes(gen_shape);

        tm_common.begin();
        for(size_t i = 0; i < num_of_outer_loops; ++i){
            area_common += TotalArea(shapes.size(), shapes.data());
        }
        tm_common.end();

        if constexpr (std::is_pointer_v<std::decay_t<decltype(shapes[0])>>){
            for(const auto shape: shapes ){
                delete shape;
            }
        }
    }

    {
        auto shapes = generate_shapes(gen_shape);
        tm_loop_unrolled.begin();
        for(size_t i = 0; i < num_of_outer_loops; ++i){
            area_unrolled += TotalArea4(shapes.size(), shapes.data());
        }
        tm_loop_unrolled.end();

        if constexpr (std::is_pointer_v<std::decay_t<decltype(shapes[0])>>){
            for(const auto shape: shapes ){
                delete shape;
            }
        }
    }
    std::cout << "Bench: " << benchname << '\n';
    std::cout << "\tRegular  loop, time: " << tm_common.ms() << " ms area: " << area_common << '\n';
    std::cout << "\tUnrolled loop, time: " << tm_loop_unrolled.ms() << " ms area: " << area_unrolled << "\n\n";
 }

 int main(int, char**) {
    make_bench(generate_shape_base, TotalAreaVTBL, TotalAreaVTBL4, "virtual functions");
    make_bench(generate_shape_union, TotalAreaSwitch, TotalAreaSwitch4, "switch case");
    make_bench(generate_shape_union, TotalAreaUnion, TotalAreaUnion4, "lookup table");
    make_bench(generate_shape_variant, TotalAreaVariant<shape_variant>, TotalAreaVariant4<shape_variant>, "std::variant");
    make_bench(generate_shape_variant_opt, TotalAreaVariant<shape_variant_opt>, TotalAreaVariant4<shape_variant_opt>, "std::variant optimized");
    make_bench(generate_shape_base_opt, TotalAreaVTBL, TotalAreaVTBL4, "virtual functions optimized");
 }
	gcc 12	gcc 11	clang 15 Lin	clang 15 Win	MSVC
virtual functions	141	128	136	154	152
switch case	83	88	119	131	130
lookup table	21	19	19	18	21
std::variant	78	114	116	133	124
std::variant optimized	68	115	122	19	118
virtual functions optimized	45	43	67	41	45
	cmake_minimum_required(VERSION 3.0.0)
	project(unclean VERSION 0.1.0)

	set (CMAKE_CXX_STANDARD 17)

	add_executable(shape_bench main.cpp)
	#define _USE_MATH_DEFINES
	#include <math.h>
	#include <stdint.h>

	#include <chrono>
	#include <iostream>
	#include <random>
	#include <stdexcept>
	#include <type_traits>
	#include <vector>
	#include <variant>


	using f32 = float;
	using u32 = uint32_t;

	static constexpr f32 Pi32 = M_PI;
	static constexpr size_t num_of_shapes = 8192;
	static constexpr size_t num_of_outer_loops = 1024102420 / num_of_shapes;

	// static constexpr size_t num_of_shapes = 1024102420;

	// utilities

	class TimeMeasure {
	public:
	TimeMeasure() = default;
	void begin(){ start_point = std::chrono::steady_clock::now(); }
	void end(){ end_point = std::chrono::steady_clock::now(); }

	int64_t ms(){
	return std::chrono::duration_cast<std::chrono::milliseconds>(end_point - start_point).count();
	}

	double ms_per_tick(int64_t ticks) {
	return static_cast<double>(ms()) / ticks;
	}
	private:
	std::chrono::steady_clock::time_point start_point;
	std::chrono::steady_clock::time_point end_point;
	};

	// from https://stackoverflow.com/a/34111095
	template <typename Kind, typename... Kinds>
	constexpr bool any_of_types(){
	/* The following expands to :
	* std::is_same_v<Kind, Kind0> \|\| std::is_same_v<Kind, Kind1> \|\| ... */
	if constexpr ((std::is_same_v<Kind, Kinds> \|\| ...)) {
	return true;
	}

	return false;
	};

	// This should really be in the standard
	template< typename T>
	class RandomNumberGen{
	public:
	RandomNumberGen(T min = 14, T max = 9001, size_t seed = 42):
	rng(seed), uni(min, max) {}

	T operator()(){ return uni(rng);}
	private:
	using uniform_dist = std::conditional_t<
	any_of_types<T, float, double, long double>(),
	std::uniform_real_distribution<T>, std::uniform_int_distribution<T>
	>;
	std::mt19937 rng;
	uniform_dist uni;
	};

	// main dish


	enum shape_type : u32
	{
	Shape_Square,
	Shape_Rectangle,
	Shape_Triangle,
	Shape_Circle,

	Shape_Count,
	};

	struct shape_union
	{
	shape_type Type;
	f32 Width;
	f32 Height;
	};

	f32 const CTable[Shape_Count] = {1.0f, 1.0f, 0.5f, Pi32};

	class shape_base
	{
	public:
	shape_base() {}
	virtual f32 Area() = 0;
	virtual ~shape_base() = default;
	};

	class square : public shape_base
	{
	public:
	square(f32 SideInit) : Side(SideInit) {}
	virtual f32 Area() {return Side*Side;}

	private:
	f32 Side;
	};

	class rectangle : public shape_base
	{
	public:
	rectangle(f32 WidthInit, f32 HeightInit) : Width(WidthInit), Height(HeightInit) {}
	virtual f32 Area() {return Width*Height;}

	private:
	f32 Width, Height;
	};

	class triangle : public shape_base
	{
	public:
	triangle(f32 BaseInit, f32 HeightInit) : Base(BaseInit), Height(HeightInit) {}
	virtual f32 Area() {return 0.5fBaseHeight;}

	private:
	f32 Base, Height;
	};

	class circle : public shape_base
	{
	public:
	circle(f32 RadiusInit) : Radius(RadiusInit) {}
	virtual f32 Area() {return Pi32RadiusRadius;}

	private:
	f32 Radius;
	};

	f32 TotalAreaVTBL(u32 ShapeCount, shape_base **Shapes)
	{
	f32 Accum = 0.0f;
	for(u32 ShapeIndex = 0; ShapeIndex < ShapeCount; ++ShapeIndex)
	{
	Accum += Shapes[ShapeIndex]->Area();
	}

	return Accum;
	}

	f32 TotalAreaVTBL4(u32 ShapeCount, shape_base **Shapes)
	{
	f32 Accum0 = 0.0f;
	f32 Accum1 = 0.0f;
	f32 Accum2 = 0.0f;
	f32 Accum3 = 0.0f;

	u32 Count = ShapeCount/4;
	while(Count--)
	{
	Accum0 += Shapes[0]->Area();
	Accum1 += Shapes[1]->Area();
	Accum2 += Shapes[2]->Area();
	Accum3 += Shapes[3]->Area();

	Shapes += 4;
	}

	f32 Result = (Accum0 + Accum1 + Accum2 + Accum3);
	return Result;
	}

	f32 GetAreaSwitch(shape_union Shape)
	{
	f32 Result = 0.0f;

	switch(Shape.Type)
	{
	case Shape_Square: {Result = Shape.Width*Shape.Width;} break;
	case Shape_Rectangle: {Result = Shape.Width*Shape.Height;} break;
	case Shape_Triangle: {Result = 0.5fShape.WidthShape.Height;} break;
	case Shape_Circle: {Result = Pi32Shape.WidthShape.Width;} break;

	case Shape_Count: {} break;
	}

	return Result;
	}

	f32 TotalAreaSwitch(u32 ShapeCount, shape_union *Shapes)
	{
	f32 Accum = 0.0f;

	for(u32 ShapeIndex = 0; ShapeIndex < ShapeCount; ++ShapeIndex)
	{
	Accum += GetAreaSwitch(Shapes[ShapeIndex]);
	}

	return Accum;
	}

	f32 TotalAreaSwitch4(u32 ShapeCount, shape_union *Shapes)
	{
	f32 Accum0 = 0.0f;
	f32 Accum1 = 0.0f;
	f32 Accum2 = 0.0f;
	f32 Accum3 = 0.0f;

	ShapeCount /= 4;
	while(ShapeCount--)
	{
	Accum0 += GetAreaSwitch(Shapes[0]);
	Accum1 += GetAreaSwitch(Shapes[1]);
	Accum2 += GetAreaSwitch(Shapes[2]);
	Accum3 += GetAreaSwitch(Shapes[3]);

	Shapes += 4;
	}

	f32 Result = (Accum0 + Accum1 + Accum2 + Accum3);
	return Result;
	}


	f32 GetAreaUnion(shape_union Shape)
	{
	f32 Result = CTable[Shape.Type]Shape.WidthShape.Height;
	return Result;
	}

	f32 TotalAreaUnion(u32 ShapeCount, shape_union *Shapes)
	{
	f32 Accum = 0.0f;

	for(u32 ShapeIndex = 0; ShapeIndex < ShapeCount; ++ShapeIndex)
	{
	Accum += GetAreaUnion(Shapes[ShapeIndex]);
	}

	return Accum;
	}

	f32 TotalAreaUnion4(u32 ShapeCount, shape_union *Shapes)
	{
	f32 Accum0 = 0.0f;
	f32 Accum1 = 0.0f;
	f32 Accum2 = 0.0f;
	f32 Accum3 = 0.0f;

	ShapeCount /= 4;
	while(ShapeCount--)
	{
	Accum0 += GetAreaUnion(Shapes[0]);
	Accum1 += GetAreaUnion(Shapes[1]);
	Accum2 += GetAreaUnion(Shapes[2]);
	Accum3 += GetAreaUnion(Shapes[3]);

	Shapes += 4;
	}

	f32 Result = (Accum0 + Accum1 + Accum2 + Accum3);
	return Result;
	}

	class square_no_vt
	{
	public:
	square_no_vt(f32 SideInit) : Side(SideInit) {}
	f32 Area() {return Side*Side;}

	private:
	f32 Side;
	};

	class rectangle_no_vt
	{
	public:
	rectangle_no_vt(f32 WidthInit, f32 HeightInit) : Width(WidthInit), Height(HeightInit) {}
	f32 Area() {return Width*Height;}

	private:
	f32 Width, Height;
	};

	class triangle_no_vt
	{
	public:
	triangle_no_vt(f32 BaseInit, f32 HeightInit) : Base(BaseInit), Height(HeightInit) {}
	f32 Area() {return 0.5fBaseHeight;}

	private:
	f32 Base, Height;
	};

	class circle_no_vt
	{
	public:
	circle_no_vt(f32 RadiusInit) : Radius(RadiusInit) {}
	f32 Area() {return Pi32RadiusRadius;}

	private:
	f32 Radius;
	};

	using shape_variant = std::variant<square_no_vt, rectangle_no_vt, triangle_no_vt, circle_no_vt>;

	class shape_base_no_vt_opt{
	public:
	shape_base_no_vt_opt(shape_type shape_index_, f32 side1_, f32 side2_) :
	shape_index(shape_index_), side1(side1_), side2(side2_)
	{}
	f32 Area() { return c_table[shape_index] * side1 * side2; }
	private:

	static constexpr f32 c_table[4] = {1.0f, 1.0f, 0.5f, Pi32};

	shape_type shape_index;
	f32 side1;
	f32 side2;
	};

	class square_no_vt_opt : public shape_base_no_vt_opt
	{
	public:
	square_no_vt_opt(f32 SideInit) : shape_base_no_vt_opt(Shape_Square, SideInit, SideInit) {}

	};

	class rectangle_no_vt_opt: public shape_base_no_vt_opt
	{
	public:
	rectangle_no_vt_opt(f32 WidthInit, f32 HeightInit) : shape_base_no_vt_opt(Shape_Rectangle, WidthInit, HeightInit) {}
	};

	class triangle_no_vt_opt: public shape_base_no_vt_opt
	{
	public:
	triangle_no_vt_opt(f32 BaseInit, f32 HeightInit) : shape_base_no_vt_opt(Shape_Triangle, BaseInit, HeightInit) {}
	};

	class circle_no_vt_opt: public shape_base_no_vt_opt
	{
	public:
	circle_no_vt_opt(f32 RadiusInit) : shape_base_no_vt_opt(Shape_Circle, RadiusInit, RadiusInit) {}
	};

	using shape_variant_opt = std::variant<square_no_vt_opt, rectangle_no_vt_opt, triangle_no_vt_opt, circle_no_vt_opt>;


	template<typename ShapeVariant>
	f32 TotalAreaVariant(u32 ShapeCount, ShapeVariant *Shapes) noexcept
	{
	f32 Accum = 0.0f;
	for(u32 ShapeIndex = 0; ShapeIndex < ShapeCount; ++ShapeIndex)
	{
	Accum += std::visit([](auto shape){return shape.Area();}, Shapes[ShapeIndex]);
	}

	return Accum;
	}

	template<typename ShapeVariant>
	f32 TotalAreaVariant4(u32 ShapeCount, ShapeVariant *Shapes) noexcept
	{
	f32 Accum0 = 0.0f;
	f32 Accum1 = 0.0f;
	f32 Accum2 = 0.0f;
	f32 Accum3 = 0.0f;

	u32 Count = ShapeCount/4;
	while(Count--)
	{
	Accum0 += std::visit([](auto shape){return shape.Area();}, Shapes[0]);
	Accum1 += std::visit([](auto shape){return shape.Area();}, Shapes[1]);
	Accum2 += std::visit([](auto shape){return shape.Area();}, Shapes[2]);
	Accum3 += std::visit([](auto shape){return shape.Area();}, Shapes[3]);

	Shapes += 4;
	}

	f32 Result = (Accum0 + Accum1 + Accum2 + Accum3);
	return Result;
	}


	class shape_base_opt: public shape_base{
	public:
	shape_base_opt(shape_type shape_index_, f32 side1_, f32 side2_) :
	shape_index(shape_index_), side1(side1_), side2(side2_)
	{}
	virtual f32 Area() { return c_table[shape_index] * side1 * side2; }
	private:

	static constexpr f32 c_table[4] = {1.0f, 1.0f, 0.5f, Pi32};

	shape_type shape_index;
	f32 side1;
	f32 side2;
	};

	class square_opt : public shape_base_opt
	{
	public:
	square_opt(f32 SideInit) : shape_base_opt(Shape_Square, SideInit, SideInit) {}

	};

	class rectangle_opt: public shape_base_opt
	{
	public:
	rectangle_opt(f32 WidthInit, f32 HeightInit) : shape_base_opt(Shape_Rectangle, WidthInit, HeightInit) {}
	};

	class triangle_opt: public shape_base_opt
	{
	public:
	triangle_opt(f32 BaseInit, f32 HeightInit) : shape_base_opt(Shape_Triangle, BaseInit, HeightInit) {}
	};

	class circle_opt: public shape_base_opt
	{
	public:
	circle_opt(f32 RadiusInit) : shape_base_opt(Shape_Circle, RadiusInit, RadiusInit) {}
	};

	shape_base* generate_shape_base(){
	static RandomNumberGen<size_t> next_shape{0, 3};
	static RandomNumberGen<f32> next_argument{};

	switch(next_shape()){
	case 0: return new square{next_argument()};
	case 1: return new rectangle{next_argument(), next_argument()};
	case 2: return new triangle{next_argument(), next_argument()};
	case 3: return new circle{next_argument()};
	}

	abort(); // silencing no return warning
	}

	shape_union generate_shape_union(){
	static RandomNumberGen<size_t> next_shape{0, 3};
	static RandomNumberGen<f32> next_argument{};

	switch(next_shape()){
	case 0: {
	f32 edge = next_argument();
	return shape_union{Shape_Square, edge, edge};
	};
	case 1: return shape_union{Shape_Rectangle, next_argument(), next_argument()};
	case 2: return shape_union{Shape_Triangle, next_argument(), next_argument()};
	case 3: {
	f32 edge = next_argument();
	return shape_union{Shape_Circle, edge, edge};
	};
	}

	abort(); // silencing no return warning
	}

	shape_variant generate_shape_variant(){
	static RandomNumberGen<size_t> next_shape{0, 3};
	static RandomNumberGen<f32> next_argument{};

	switch(next_shape()){
	case 0: return square_no_vt{next_argument()};
	case 1: return rectangle_no_vt{next_argument(), next_argument()};
	case 2: return triangle_no_vt{next_argument(), next_argument()};
	case 3: return circle_no_vt{next_argument()};
	}

	abort(); // silencing no return warning
	}

	shape_variant_opt generate_shape_variant_opt(){
	static RandomNumberGen<size_t> next_shape{0, 3};
	static RandomNumberGen<f32> next_argument{};

	switch(next_shape()){
	case 0: return square_no_vt_opt{next_argument()};
	case 1: return rectangle_no_vt_opt{next_argument(), next_argument()};
	case 2: return triangle_no_vt_opt{next_argument(), next_argument()};
	case 3: return circle_no_vt_opt{next_argument()};
	}

	abort(); // silencing no return warning
	}

	shape_base* generate_shape_base_opt(){
	static RandomNumberGen<size_t> next_shape{0, 3};
	static RandomNumberGen<f32> next_argument{};

	switch(next_shape()){
	case 0: return new square_opt{next_argument()};
	case 1: return new rectangle_opt{next_argument(), next_argument()};
	case 2: return new triangle_opt{next_argument(), next_argument()};
	case 3: return new circle_opt{next_argument()};
	}

	abort(); // silencing no return warning
	}

	template<typename ShapeGenerator>
	auto generate_shapes(ShapeGenerator generate_shape) -> std::vector<decltype(generate_shape())>{
	std::vector<decltype(generate_shape())> result;
	result.reserve(num_of_shapes);

	for(size_t i=0; i < num_of_shapes; ++i){
	result.push_back(generate_shape());
	}

	return result;
	}


	template<typename ShapeGenerator, typename AreaCount, typename AreaCountLoopUnrolled>
	void make_bench(ShapeGenerator gen_shape, AreaCount TotalArea, AreaCountLoopUnrolled TotalArea4, char const * const benchname){
	TimeMeasure tm_common;
	TimeMeasure tm_loop_unrolled;

	f32 area_common = 0;
	f32 area_unrolled = 0;
	{
	auto shapes = generate_shapes(gen_shape);

	tm_common.begin();
	for(size_t i = 0; i < num_of_outer_loops; ++i){
	area_common += TotalArea(shapes.size(), shapes.data());
	}
	tm_common.end();

	if constexpr (std::is_pointer_v<std::decay_t<decltype(shapes[0])>>){
	for(const auto shape: shapes ){
	delete shape;
	}
	}
	}

	{
	auto shapes = generate_shapes(gen_shape);
	tm_loop_unrolled.begin();
	for(size_t i = 0; i < num_of_outer_loops; ++i){
	area_unrolled += TotalArea4(shapes.size(), shapes.data());
	}
	tm_loop_unrolled.end();

	if constexpr (std::is_pointer_v<std::decay_t<decltype(shapes[0])>>){
	for(const auto shape: shapes ){
	delete shape;
	}
	}
	}
	std::cout << "Bench: " << benchname << '\n';
	std::cout << "\tRegular loop, time: " << tm_common.ms() << " ms area: " << area_common << '\n';
	std::cout << "\tUnrolled loop, time: " << tm_loop_unrolled.ms() << " ms area: " << area_unrolled << "\n\n";
	}

	int main(int, char**) {
	make_bench(generate_shape_base, TotalAreaVTBL, TotalAreaVTBL4, "virtual functions");
	make_bench(generate_shape_union, TotalAreaSwitch, TotalAreaSwitch4, "switch case");
	make_bench(generate_shape_union, TotalAreaUnion, TotalAreaUnion4, "lookup table");
	make_bench(generate_shape_variant, TotalAreaVariant<shape_variant>, TotalAreaVariant4<shape_variant>, "std::variant");
	make_bench(generate_shape_variant_opt, TotalAreaVariant<shape_variant_opt>, TotalAreaVariant4<shape_variant_opt>, "std::variant optimized");
	make_bench(generate_shape_base_opt, TotalAreaVTBL, TotalAreaVTBL4, "virtual functions optimized");
	}