Flix01 · August 19, 2024 11:37
diff --git a/mult_without_mult.c b/mult_without_mult.c
 /*
 ===============================================================================
 Public Domain (www.unlicense.org)
 ===============================================================================
 This is free and unencumbered software released into the public domain.
 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
 software, either in source code form or as a compiled binary, for any purpose,
 commercial or non-commercial, and by any means.
 In jurisdictions that recognize copyright laws, the author or authors of this
 software dedicate any and all copyright interest in the software to the public
 domain. We make this dedication for the benefit of the public at large and to
 the detriment of our heirs and successors. We intend this dedication to be an
 overt act of relinquishment in perpetuity of all present and future rights to
 this software under copyright law.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 For more information, please refer to <http://unlicense.org/>
 */

 /* WHAT'S THIS?

  It's just a test I've made after having seen this (old) video:
  https://www.youtube.com/watch?v=CsMrHzp850M by Inigo Quilez.
  It's a very basic implementation of unsigned multiplication
  using only addition/subtraction hardware (*)
  Please see also: "Quarter square multiplication" here: 
  https://en.wikipedia.org/wiki/Multiplication_algorithm

  (*) 8-bit multiplications require nothing else, but 16-bit and 32-bit multiplications require some 
      (minimal) bit shift and masking operations to work (but maybe code can be rewritter without them).

  Most people won't care about this. Only people working on old/custom hardware will 
  find it interesting. Also people that are thinking about programming a manual multiplicator 
  (i.e. displaying all the passages) might take inspiration from the code inside ui_multiply(...).
  Otherwise, just forget it.

  It's useless to say that multiplications with only addition/subtraction hardware are MUCH slower!

  COMPILATION:
  gcc -o mult_without_mult mult_without_mult.c

  USAGE:

  -> Call: void init_table(void).
  -> And then you can use:
      unsigned short uc_multiply(unsigned char a, unsigned char b);
      unsigned int us_multiply(unsigned short a, unsigned short b);
      unsigned long long ui_multiply(unsigned int a, unsigned int b);
  Note that by returning a bigger (unsigned) type, we can avoid overflow checks.
 */

 #include <stdio.h>
 #include <assert.h>


 static unsigned short table[511] = { 0 };
 void init_table(void) {
  if (table[510] == 0) {
    /*for (unsigned short i = 0; i < 511; i ++) table[i]=(int)((i*i)/4);  // Multiplications not allowed! */
    unsigned short carry = 0, prev = 0;
    for (unsigned short i = 0; i < 511; i += 2) {
      prev = prev + carry; table[i] = prev;
      prev = prev + carry; table[i + 1] = prev;
      ++carry;
    }
    assert(sizeof(unsigned char) == 1);
    assert(sizeof(unsigned short) == 2);
    assert(sizeof(unsigned int) == 4);      /* only used in us_multiply(...) and ui_multiply(...) */
    assert(sizeof(unsigned long long) == 8);     /* only used in ui_multiply(...) */
  }
 }
 unsigned short uc_multiply(unsigned char a, unsigned char b) {
  assert(table[510] != 0); /* You must call: init_table(); */
  return table[(unsigned short)a + (unsigned short)b] - table[a > b ? (a - b) : (b - a)];
 }
 unsigned short uc_multiply2(unsigned short a, unsigned short b) {
  /* Version without casting, but args must still be smaller than 256 */
  assert(table[510] != 0);  /* You must call: init_table(); */
  assert(a < 256 && b < 256); /* Please use: us_multiply(...) */
  return table[a + b] - table[a > b ? (a - b) : (b - a)];
 }

 /* Stuff derived from uc_multiply(...) above */
 unsigned us_multiply(unsigned short a, unsigned short b) {
  /*
      462 *
       35 =
      -----
      2310
     1386-
     ------
     16170

    The idea is to do the same using bytes (e.g. base 256 with 8-bit chunks as digits)
  */

  /* if (a<b) {const unsigned short tmp=a;a=b;b=tmp;} // swapping values so that a is bigger is a very small optional optimization */

 # ifdef BIG_ENDIAN  
  const unsigned char a_bytes[2] = { (a >> 8),a & 0xFF };
  const unsigned char b_bytes[2] = { (b >> 8),b & 0xFF };
 # else
  const unsigned char a_bytes[2] = { a,a >> 8 };
  const unsigned char b_bytes[2] = { b,b >> 8 };
  /* *((unsigned short*) a_bytes) = a;*((unsigned short*) b_bytes) = b; // should be the same, and faster, but less robust */
 # endif  

  unsigned char a_num_bytes = 2, b_num_bytes = 2;
  // Optional optimization: minimize a_num_bytes and b_num_bytes:
  unsigned char a_finished = 0, b_finished = 0;
  for (signed char i = 1;i >= 0;i--) {
    if (!a_finished) {
      if (a_bytes[i] == 0) --a_num_bytes;
      else { a_finished = 1;if (b_finished) break; }
    }
    if (!b_finished) {
      if (b_bytes[i] == 0) --b_num_bytes;
      else { b_finished = 1;if (a_finished) break; }
    }
  }
  // End Optional Optimization

  // real calculation
  unsigned  result = 0;
  for (unsigned char bi = 0;bi < b_num_bytes;bi++) {
    const unsigned char b_byte = b_bytes[bi];
    unsigned char carry = 0;
    for (unsigned char ai = 0;ai < a_num_bytes;ai++) {
      const unsigned char a_byte = a_bytes[ai];
      unsigned short part = uc_multiply(b_byte, a_byte) + carry;carry = 0;
      if (ai != a_num_bytes - 1 && (part & 0xFF00)) { carry = part >> 8;part &= 0xFF; }    /* (part&0xFF00) == (part>255) */
      result += ((unsigned)part << ((bi + ai) * 8U));
    }
  }

  return result;
 }
 unsigned long long ui_multiply(unsigned a, unsigned b) {
  /*
      462 *
       35 =
      -----
      2310
     1386-
     ------
     16170

    The idea is to do the same using bytes (e.g. base 256 with 8-bit chunks as digits)
    Note that we could have probably used ushorts (e.g. base 512 with 16-bit chunks as digits),
    but I'don't think it's a good idea.
  */

  /* if (a<b) {const unsigned tmp=a;a=b;b=tmp;} // swapping values so that a is bigger is a very small optional optimization */

 # ifdef BIG_ENDIAN  
  const unsigned char a_bytes[4] = { a >> 24,(a >> 16) & 0xFF,(a >> 8) & 0xFF,a };
  const unsigned char b_bytes[4] = { b >> 24,(b >> 16) & 0xFF,(b >> 8) & 0xFF,b };
 # else
  const unsigned char a_bytes[4] = { a,(a >> 8) & 0xFF,(a >> 16) & 0xFF,a >> 24 };
  const unsigned char b_bytes[4] = { b,(b >> 8) & 0xFF,(b >> 16) & 0xFF,b >> 24 };
  /* *((unsigned*) a_bytes) = a;*((unsigned*) b_bytes) = b; // should be the same, and faster, but less robust */
 # endif  

  unsigned char a_num_bytes = 4, b_num_bytes = 4;
  // Optional optimization: minimize a_num_bytes and b_num_bytes:
  unsigned char a_finished = 0, b_finished = 0;
  for (signed char i = 3;i >= 0;i--) {
    if (!a_finished) {
      if (a_bytes[i] == 0) --a_num_bytes;
      else { a_finished = 1;if (b_finished) break; }
    }
    if (!b_finished) {
      if (b_bytes[i] == 0) --b_num_bytes;
      else { b_finished = 1;if (a_finished) break; }
    }
  }
  // End Optional Optimization

  // real calculation
  unsigned long long result = 0;
  for (unsigned char bi = 0;bi < b_num_bytes;bi++) {
    const unsigned char b_byte = b_bytes[bi];
    unsigned char carry = 0;
    for (unsigned char ai = 0;ai < a_num_bytes;ai++) {
      const unsigned char a_byte = a_bytes[ai];
      unsigned short part = uc_multiply(b_byte, a_byte) + carry;carry = 0;
      if (ai != a_num_bytes - 1 && (part & 0xFF00)) { carry = part >> 8;part &= 0xFF; }        /* (part&0xFF00) == (part>255) */
      result += ((unsigned long long)part << ((bi + ai) * 8UL));
    }
  }

  return result;
 }

 void TestAllPossibleMultiplications_TakesAgesToRun_NeverCallThisPlease(void) {
  /* Test code for: unsigned short uc_multiply(unsigned char a,unsigned char b) */
  printf("Full test for function uc_multiply:\n");
  for (unsigned short a = 0;a < 256;a++) {
    for (unsigned short b = 0;b < 256;b++) {
      assert(uc_multiply((unsigned char)a, (unsigned char)b) == a * b);
    }
  }

  /* Test code for: unsigned us_multiply(unsigned short a,unsigned short b)  */
  printf("Full test for function us_multiply:\n");
  for (unsigned a = 0;a < 65536;a++) {
    if (a % 256 == 255) printf("%u ", a / 256);
    for (unsigned b = 0;b < 65536;b++) {
      assert(us_multiply((unsigned short)a, (unsigned short)b) == a * b);
    }
  }
  printf("Passed!\n");

  /* Test code for: unsigned us_multiply(unsigned short a,unsigned short b)  */
  printf("Full test for function ui_multiply:\n");
  for (unsigned long long a = 0;a < 4294967296;a++) {
    if (a % 256 == 255) printf("%llu ", a / 256);
    for (unsigned long long b = 0;b < 4294967296;b++) {
      assert(ui_multiply((unsigned)a, (unsigned)b) == a * b);
    }
  }
  printf("Passed!\n");
 }

 int main(int argc, char* argv[])
 {
  init_table(); /* Mandatory to init stuff */

  /* Important: asserts must be enabled for these to work (sometimes release builds disable them): */
  assert(uc_multiply(126, 75) == 126 * 75);  /* uc_multiply returns an unsigned short */
  assert(us_multiply(64257, 46211) == 64257U * 46211U);   /* us_multiply returns an unsigned int */
  assert(ui_multiply(45682341, 89043216) == 45682341ULL * 89043216ULL);  /* ui_multiply returns an unsigned long long */

  /*TestAllPossibleMultiplications_TakesAgesToRun_NeverCallThisPlease();*/

  printf("If asserts are enabled, it could mean: test passed!\n");

  return 0;
 }
	/*
	===============================================================================
	Public Domain (www.unlicense.org)
	===============================================================================
	This is free and unencumbered software released into the public domain.
	Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
	software, either in source code form or as a compiled binary, for any purpose,
	commercial or non-commercial, and by any means.
	In jurisdictions that recognize copyright laws, the author or authors of this
	software dedicate any and all copyright interest in the software to the public
	domain. We make this dedication for the benefit of the public at large and to
	the detriment of our heirs and successors. We intend this dedication to be an
	overt act of relinquishment in perpetuity of all present and future rights to
	this software under copyright law.
	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
	ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
	WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
	For more information, please refer to <http://unlicense.org/>
	*/

	/* WHAT'S THIS?

	It's just a test I've made after having seen this (old) video:
	https://www.youtube.com/watch?v=CsMrHzp850M by Inigo Quilez.
	It's a very basic implementation of unsigned multiplication
	using only addition/subtraction hardware (*)
	Please see also: "Quarter square multiplication" here:
	https://en.wikipedia.org/wiki/Multiplication_algorithm

	(*) 8-bit multiplications require nothing else, but 16-bit and 32-bit multiplications require some
	(minimal) bit shift and masking operations to work (but maybe code can be rewritter without them).

	Most people won't care about this. Only people working on old/custom hardware will
	find it interesting. Also people that are thinking about programming a manual multiplicator
	(i.e. displaying all the passages) might take inspiration from the code inside ui_multiply(...).
	Otherwise, just forget it.

	It's useless to say that multiplications with only addition/subtraction hardware are MUCH slower!

	COMPILATION:
	gcc -o mult_without_mult mult_without_mult.c

	USAGE:

	-> Call: void init_table(void).
	-> And then you can use:
	unsigned short uc_multiply(unsigned char a, unsigned char b);
	unsigned int us_multiply(unsigned short a, unsigned short b);
	unsigned long long ui_multiply(unsigned int a, unsigned int b);
	Note that by returning a bigger (unsigned) type, we can avoid overflow checks.
	*/

	#include <stdio.h>
	#include <assert.h>


	static unsigned short table[511] = { 0 };
	void init_table(void) {
	if (table[510] == 0) {
	/for (unsigned short i = 0; i < 511; i ++) table[i]=(int)((ii)/4); // Multiplications not allowed! */
	unsigned short carry = 0, prev = 0;
	for (unsigned short i = 0; i < 511; i += 2) {
	prev = prev + carry; table[i] = prev;
	prev = prev + carry; table[i + 1] = prev;
	++carry;
	}
	assert(sizeof(unsigned char) == 1);
	assert(sizeof(unsigned short) == 2);
	assert(sizeof(unsigned int) == 4); /* only used in us_multiply(...) and ui_multiply(...) */
	assert(sizeof(unsigned long long) == 8); /* only used in ui_multiply(...) */
	}
	}
	unsigned short uc_multiply(unsigned char a, unsigned char b) {
	assert(table[510] != 0); /* You must call: init_table(); */
	return table[(unsigned short)a + (unsigned short)b] - table[a > b ? (a - b) : (b - a)];
	}
	unsigned short uc_multiply2(unsigned short a, unsigned short b) {
	/* Version without casting, but args must still be smaller than 256 */
	assert(table[510] != 0); /* You must call: init_table(); */
	assert(a < 256 && b < 256); /* Please use: us_multiply(...) */
	return table[a + b] - table[a > b ? (a - b) : (b - a)];
	}

	/* Stuff derived from uc_multiply(...) above */
	unsigned us_multiply(unsigned short a, unsigned short b) {
	/*
	462 *
	35 =
	-----
	2310
	1386-
	------
	16170

	The idea is to do the same using bytes (e.g. base 256 with 8-bit chunks as digits)
	*/

	/* if (a<b) {const unsigned short tmp=a;a=b;b=tmp;} // swapping values so that a is bigger is a very small optional optimization */

	# ifdef BIG_ENDIAN
	const unsigned char a_bytes[2] = { (a >> 8),a & 0xFF };
	const unsigned char b_bytes[2] = { (b >> 8),b & 0xFF };
	# else
	const unsigned char a_bytes[2] = { a,a >> 8 };
	const unsigned char b_bytes[2] = { b,b >> 8 };
	/* ((unsigned short) a_bytes) = a;((unsigned short) b_bytes) = b; // should be the same, and faster, but less robust */
	# endif

	unsigned char a_num_bytes = 2, b_num_bytes = 2;
	// Optional optimization: minimize a_num_bytes and b_num_bytes:
	unsigned char a_finished = 0, b_finished = 0;
	for (signed char i = 1;i >= 0;i--) {
	if (!a_finished) {
	if (a_bytes[i] == 0) --a_num_bytes;
	else { a_finished = 1;if (b_finished) break; }
	}
	if (!b_finished) {
	if (b_bytes[i] == 0) --b_num_bytes;
	else { b_finished = 1;if (a_finished) break; }
	}
	}
	// End Optional Optimization

	// real calculation
	unsigned result = 0;
	for (unsigned char bi = 0;bi < b_num_bytes;bi++) {
	const unsigned char b_byte = b_bytes[bi];
	unsigned char carry = 0;
	for (unsigned char ai = 0;ai < a_num_bytes;ai++) {
	const unsigned char a_byte = a_bytes[ai];
	unsigned short part = uc_multiply(b_byte, a_byte) + carry;carry = 0;
	if (ai != a_num_bytes - 1 && (part & 0xFF00)) { carry = part >> 8;part &= 0xFF; } /* (part&0xFF00) == (part>255) */
	result += ((unsigned)part << ((bi + ai) * 8U));
	}
	}

	return result;
	}
	unsigned long long ui_multiply(unsigned a, unsigned b) {
	/*
	462 *
	35 =
	-----
	2310
	1386-
	------
	16170

	The idea is to do the same using bytes (e.g. base 256 with 8-bit chunks as digits)
	Note that we could have probably used ushorts (e.g. base 512 with 16-bit chunks as digits),
	but I'don't think it's a good idea.
	*/

	/* if (a<b) {const unsigned tmp=a;a=b;b=tmp;} // swapping values so that a is bigger is a very small optional optimization */

	# ifdef BIG_ENDIAN
	const unsigned char a_bytes[4] = { a >> 24,(a >> 16) & 0xFF,(a >> 8) & 0xFF,a };
	const unsigned char b_bytes[4] = { b >> 24,(b >> 16) & 0xFF,(b >> 8) & 0xFF,b };
	# else
	const unsigned char a_bytes[4] = { a,(a >> 8) & 0xFF,(a >> 16) & 0xFF,a >> 24 };
	const unsigned char b_bytes[4] = { b,(b >> 8) & 0xFF,(b >> 16) & 0xFF,b >> 24 };
	/* ((unsigned) a_bytes) = a;((unsigned) b_bytes) = b; // should be the same, and faster, but less robust */
	# endif

	unsigned char a_num_bytes = 4, b_num_bytes = 4;
	// Optional optimization: minimize a_num_bytes and b_num_bytes:
	unsigned char a_finished = 0, b_finished = 0;
	for (signed char i = 3;i >= 0;i--) {
	if (!a_finished) {
	if (a_bytes[i] == 0) --a_num_bytes;
	else { a_finished = 1;if (b_finished) break; }
	}
	if (!b_finished) {
	if (b_bytes[i] == 0) --b_num_bytes;
	else { b_finished = 1;if (a_finished) break; }
	}
	}
	// End Optional Optimization

	// real calculation
	unsigned long long result = 0;
	for (unsigned char bi = 0;bi < b_num_bytes;bi++) {
	const unsigned char b_byte = b_bytes[bi];
	unsigned char carry = 0;
	for (unsigned char ai = 0;ai < a_num_bytes;ai++) {
	const unsigned char a_byte = a_bytes[ai];
	unsigned short part = uc_multiply(b_byte, a_byte) + carry;carry = 0;
	if (ai != a_num_bytes - 1 && (part & 0xFF00)) { carry = part >> 8;part &= 0xFF; } /* (part&0xFF00) == (part>255) */
	result += ((unsigned long long)part << ((bi + ai) * 8UL));
	}
	}

	return result;
	}

	void TestAllPossibleMultiplications_TakesAgesToRun_NeverCallThisPlease(void) {
	/* Test code for: unsigned short uc_multiply(unsigned char a,unsigned char b) */
	printf("Full test for function uc_multiply:\n");
	for (unsigned short a = 0;a < 256;a++) {
	for (unsigned short b = 0;b < 256;b++) {
	assert(uc_multiply((unsigned char)a, (unsigned char)b) == a * b);
	}
	}

	/* Test code for: unsigned us_multiply(unsigned short a,unsigned short b) */
	printf("Full test for function us_multiply:\n");
	for (unsigned a = 0;a < 65536;a++) {
	if (a % 256 == 255) printf("%u ", a / 256);
	for (unsigned b = 0;b < 65536;b++) {
	assert(us_multiply((unsigned short)a, (unsigned short)b) == a * b);
	}
	}
	printf("Passed!\n");

	/* Test code for: unsigned us_multiply(unsigned short a,unsigned short b) */
	printf("Full test for function ui_multiply:\n");
	for (unsigned long long a = 0;a < 4294967296;a++) {
	if (a % 256 == 255) printf("%llu ", a / 256);
	for (unsigned long long b = 0;b < 4294967296;b++) {
	assert(ui_multiply((unsigned)a, (unsigned)b) == a * b);
	}
	}
	printf("Passed!\n");
	}

	int main(int argc, char* argv[])
	{
	init_table(); /* Mandatory to init stuff */

	/* Important: asserts must be enabled for these to work (sometimes release builds disable them): */
	assert(uc_multiply(126, 75) == 126 * 75); /* uc_multiply returns an unsigned short */
	assert(us_multiply(64257, 46211) == 64257U * 46211U); /* us_multiply returns an unsigned int */
	assert(ui_multiply(45682341, 89043216) == 45682341ULL * 89043216ULL); /* ui_multiply returns an unsigned long long */

	/TestAllPossibleMultiplications_TakesAgesToRun_NeverCallThisPlease();/

	printf("If asserts are enabled, it could mean: test passed!\n");

	return 0;
	}