Out of the box performance of python numerical integration is poor. Here we present three python versions of the trapezoidal rule.
import math
import time
import numpy as np
# pip3 install numba
from numba import float64, int32, int64, jit, njit, prange
@njit
def f(x):
"""function to integrate"""
return np.sin(x)
def exact(a, b):
"""exact integral value"""
return -np.cos(b) + np.cos(a)
def trap_naive(a, b, n):
"""naive trapezoidal rule implementation"""
h = (b - a) / n
s = 0.5 * (f(a) + f(b))
for i in range(1, n):
xi = a + i * h
s += f(xi)
return h * s
def trap_vec(a, b, n):
"""vectorized trapezoidal rule"""
xi = np.linspace(a, b, n + 1)
h = xi[1] - xi[0]
s = 0.5 * (f(a) + f(b)) + np.sum(f(xi[1:-1]))
return h * s
@njit(float64(float64, float64, int64), parallel=True)
def trap_numba(a, b, n):
"""trapezoidal rule via numba"""
h = (b - a) / n
s = 0.5 * (f(a) + f(b))
for i in prange(1, n):
xi = a + i * h
s += f(xi)
return h * s
def measure(func, a, b, n):
start = time.time()
result = func(a, b, n)
duration = 1000 * (time.time() - start)
error = abs(result - exact(a, b))
print(f"{func.__name__} =", result)
print("error =", error)
print(f"took = {duration:.4f} ms\n")
# integration interval
a = 0
b = 100
# number of subintervals
n = int(40e6)
# measure naive version (poorly coded python)
measure(trap_naive, a, b, n)
# measure vectorized version (pure python)
measure(trap_vec, a, b, n)
# measure numba version (non-python code)
measure(trap_numba, a, b, n)
module integration
use, intrinsic :: iso_fortran_env, only: real64
implicit none
private
public :: trap, f, exata
integer, public, parameter :: rp = real64
real(rp), public, parameter :: pi = &
3.1415926535897932384626433832795028841972_rp
real(rp), public, parameter :: e = &
2.7182818284590452353602874713526624977572_rp
contains
function trap(a, b, n) result(res)
real(rp), intent(in) :: a, b
integer, intent(in) :: n
real(rp) :: h, soma, xi, res
integer :: i
h = (b - a)/n
soma = 0.5_rp*(f(a) + f(b))
do i = 1, n - 1
xi = a + i*h
soma = soma + f(xi)
end do
res = h*soma
end function trap
function f(x) result(res)
real(rp), intent(in) :: x
real(rp) :: res
res = sin(x)
end function f
function exata(a, b) result(res)
real(rp), intent(in) :: a, b
real(rp) :: res
res = -cos(b) + cos(a)
end function exata
end module integration
program trapezoidal
use integration
implicit none
real(rp) :: a, b, res, error
integer :: n, i, start_time, end_time, rate
a = 0
b = 100
n = 40000000
call system_clock(count_rate=rate)
call system_clock(count=start_time)
res = trap(a, b, n)
call system_clock(count=end_time)
error = abs(res - exata(a, b))
write (*, "(a,e0.14)") "result = ", res
write (*, "(a,e0.14)") "error = ", error
write (*, "(a,f8.4,a)") "took = ", real(end_time - start_time, kind=rp)/real(rate, kind=rp)*1000, " ms"
end program trapezoidal
result = 0.13768112771239
error = 0.69028116556069E-13
took = 199.0000 ms
command line: gfortran -O3 ./integration.f90 -o integration && ./integration
version: GNU Fortran (GCC) 12.2.1 20221121 (Red Hat 12.2.1-4)
result = .13768112771243+00
error = .11673995103934-12
took = 17.2000 ms
command line: ifort -O3 -parallel ./integration.f90 -o integration && ./integration
version: ifort (IFORT) 2021.8.0 20221119
machine: intel i5 12600
trap_naive = 0.13768112771238514
error = 6.902811655606911e-14
took = 5349.7036 ms
trap_vec = 0.137681127712245
error = 7.110978472724128e-14
took = 365.8445 ms
trap_numba = 0.13768112771236396
error = 4.785061236134425e-14
took = 34.9724 ms
command line: python3 ./integration_numba.py
version: Python 3.10.8 (miniconda)
machine: intel i5 12600
➠ Python numba is 5x faster than gfortran.
➠ Intel fortran (in parallel mode) is more than 2x faster than Python numba.
➠ Python numba with CUDA target (with GTX 1080 Ti) is 8x faster than Intel fortran (in parallel mode).