phargogh · December 12, 2022 22:47
diff --git a/README.md b/README.md
diff --git a/test-nodata-timings.py b/test-nodata-timings.py
 import contextlib
 import time
 import timeit

 import numpy
 import numpy.random


 # Taken from https://github.com/natcap/invest/blob/8e3bc6d3a0011c21f6de275aa76476ed25f5e95a/src/natcap/invest/utils.py
 def array_equals_nodata(array, nodata):
    """Check for the presence of ``nodata`` values in ``array``.
    The comparison supports ``numpy.nan`` and unset (``None``) nodata values.
    Args:
        array (numpy array): the array to mask for nodata values.
        nodata (number): the nodata value to check for. Supports ``numpy.nan``.
    Returns:
        A boolean numpy array with values of 1 where ``array`` is equal to
        ``nodata`` and 0 otherwise.
    """
    # If nodata is undefined, nothing matches nodata.
    if nodata is None:
        return numpy.zeros(array.shape, dtype=bool)

    # comparing an integer array against numpy.nan works correctly and is
    # faster than using numpy.isclose().
    if numpy.issubdtype(array.dtype, numpy.integer):
        return array == nodata
    return numpy.isclose(array, nodata, equal_nan=True)


 def array_equals_nodata_allfalse(array, nodata):
    """Check for the presence of ``nodata`` values in ``array``.
    The comparison supports ``numpy.nan`` and unset (``None``) nodata values.
    Args:
        array (numpy array): the array to mask for nodata values.
        nodata (number): the nodata value to check for. Supports ``numpy.nan``.
    Returns:
        A boolean numpy array with values of 1 where ``array`` is equal to
        ``nodata`` and 0 otherwise.
    """
    # If nodata is undefined, nothing matches nodata.
    if nodata is None:
        return False

    # comparing an integer array against numpy.nan works correctly and is
    # faster than using numpy.isclose().
    if numpy.issubdtype(array.dtype, numpy.integer):
        return array == nodata
    return numpy.isclose(array, nodata, equal_nan=True)


 @contextlib.contextmanager
 def my_timeit(message):
    start_time = time.time()
    yield
    elapsed = round(time.time() - start_time, 4)
    print(f"{message}: {elapsed}s")


 def main():
    n = 10000
    shape = (1000, 100)
    nodata = None

    with my_timeit("Baseline"):
        count = 0
        for nodata in range(n):
            array = numpy.random.randint(0, n, size=shape)
            mask = array[False]
            count += array[mask].size

    with my_timeit("Current InVEST"):
        count = 0
        for nodata in range(n):
            array = numpy.random.randint(0, n, size=shape)
            mask = array_equals_nodata(array, None)
            count += array[mask].size

    with my_timeit("Simplified None case"):
        count = 0
        for seed in range(n):
            array = numpy.random.randint(0, n, size=shape)
            mask = array_equals_nodata_allfalse(array, None)
            count += array[mask].size


 def main_timeit():
    for indexing_option in ("False", "numpy.zeros(shape, dtype=bool)"):
        print(f"Indexing option: {indexing_option}")
        for array_size_factor in (10, 100, 1000, 10000, 100000):
            runtime = timeit.timeit(
                f"numpy.empty(shape, dtype=numpy.float32)[{indexing_option}]",
                setup=f"import numpy; shape=(10*{array_size_factor})")
            print(f"array size: 10*{array_size_factor}: {runtime}s")


 if __name__ == '__main__':
    main()
    main_timeit()
	import contextlib
	import time
	import timeit

	import numpy
	import numpy.random


	# Taken from https://github.com/natcap/invest/blob/8e3bc6d3a0011c21f6de275aa76476ed25f5e95a/src/natcap/invest/utils.py
	def array_equals_nodata(array, nodata):
	"""Check for the presence of ``nodata`` values in ``array``.
	The comparison supports ``numpy.nan`` and unset (``None``) nodata values.
	Args:
	array (numpy array): the array to mask for nodata values.
	nodata (number): the nodata value to check for. Supports ``numpy.nan``.
	Returns:
	A boolean numpy array with values of 1 where ``array`` is equal to
	``nodata`` and 0 otherwise.
	"""
	# If nodata is undefined, nothing matches nodata.
	if nodata is None:
	return numpy.zeros(array.shape, dtype=bool)

	# comparing an integer array against numpy.nan works correctly and is
	# faster than using numpy.isclose().
	if numpy.issubdtype(array.dtype, numpy.integer):
	return array == nodata
	return numpy.isclose(array, nodata, equal_nan=True)


	def array_equals_nodata_allfalse(array, nodata):
	"""Check for the presence of ``nodata`` values in ``array``.
	The comparison supports ``numpy.nan`` and unset (``None``) nodata values.
	Args:
	array (numpy array): the array to mask for nodata values.
	nodata (number): the nodata value to check for. Supports ``numpy.nan``.
	Returns:
	A boolean numpy array with values of 1 where ``array`` is equal to
	``nodata`` and 0 otherwise.
	"""
	# If nodata is undefined, nothing matches nodata.
	if nodata is None:
	return False

	# comparing an integer array against numpy.nan works correctly and is
	# faster than using numpy.isclose().
	if numpy.issubdtype(array.dtype, numpy.integer):
	return array == nodata
	return numpy.isclose(array, nodata, equal_nan=True)


	@contextlib.contextmanager
	def my_timeit(message):
	start_time = time.time()
	yield
	elapsed = round(time.time() - start_time, 4)
	print(f"{message}: {elapsed}s")


	def main():
	n = 10000
	shape = (1000, 100)
	nodata = None

	with my_timeit("Baseline"):
	count = 0
	for nodata in range(n):
	array = numpy.random.randint(0, n, size=shape)
	mask = array[False]
	count += array[mask].size

	with my_timeit("Current InVEST"):
	count = 0
	for nodata in range(n):
	array = numpy.random.randint(0, n, size=shape)
	mask = array_equals_nodata(array, None)
	count += array[mask].size

	with my_timeit("Simplified None case"):
	count = 0
	for seed in range(n):
	array = numpy.random.randint(0, n, size=shape)
	mask = array_equals_nodata_allfalse(array, None)
	count += array[mask].size


	def main_timeit():
	for indexing_option in ("False", "numpy.zeros(shape, dtype=bool)"):
	print(f"Indexing option: {indexing_option}")
	for array_size_factor in (10, 100, 1000, 10000, 100000):
	runtime = timeit.timeit(
	f"numpy.empty(shape, dtype=numpy.float32)[{indexing_option}]",
	setup=f"import numpy; shape=(10*{array_size_factor})")
	print(f"array size: 10*{array_size_factor}: {runtime}s")


	if __name__ == '__main__':
	main()
	main_timeit()