dneuman · May 30, 2021 19:26
diff --git a/lowess.py b/lowess.py
 import numpy as np
 import pandas as pd

 def Lowess(data, f=2./3., pts=None, itn=3, order=1):
    """Fits a nonparametric regression curve to a scatterplot.

    Parameters
    ----------
    data : pandas.Series
        Data points in the scatterplot. The
        function returns the estimated (smooth) values of y.

    **Optionals**

    f : float
        The fraction of the data set to use for smoothing. A
        larger value for f will result in a smoother curve.
    pts : int
        The explicit number of data points to be used for
        smoothing instead of f.
    itn : int
        The number of robustifying iterations. The function will run
        faster with a smaller number of iterations.
    order : int
        The order of the polynomial used for fitting. Defaults to 1
        (straight line). Values < 1 are made 1. Larger values should be
        chosen based on shape of data (# of peaks and valleys + 1)

    Returns
    -------
    pandas.Series containing the smoothed data.
    """
    # Authors: Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
    #            original
    #          Dan Neuman <https://github.com/dneuman>
    #            converted to Pandas series and extended to polynomials
    # License: BSD (3-clause)

    x = np.array(data.index, dtype=float)
    # condition x-values to be between 0 and 1 to reduce errors in linalg
    x = x - x.min()
    x = x / x.max()
    y = data.values
    n = len(data)
    if pts is None:
        f = np.min([f, 1.0])
        r = int(np.ceil(f * n))
    else:  # allow use of number of points to determine smoothing
        r = int(np.min([pts, n]))
    r = min([r, n-1])
    order = max([1, order])
    # Create matrix of 1, x, x**2, x**3, etc, by row
    xm = np.array([x**j for j in range(order+1)])
    # Create weight matrix, one column per data point
    h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
    w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
    w = (1 - w ** 3) ** 3
    # Set up output
    yEst = np.zeros(n)
    delta = np.ones(n)  # Additional weights for iterations
    for iteration in range(itn):
        for i in range(n):
            weights = delta * w[:, i]
            xw = np.array([weights * x**j for j in range(order+1)])
            b = xw.dot(y)
            a = xw.dot(xm.T)
            beta = np.linalg.solve(a, b)
            yEst[i] = sum([beta[j] * x[i]**j for j in range(order+1)])
        # Set up weights to reduce effect of outlier points on next iteration
        residuals = y - yEst
        s = np.median(np.abs(residuals))
        delta = np.clip(residuals / (6.0 * s), -1, 1)
        delta = (1 - delta ** 2) ** 2
    return pd.Series(yEst, index=data.index, name='Trend')
	import numpy as np
	import pandas as pd

	def Lowess(data, f=2./3., pts=None, itn=3, order=1):
	"""Fits a nonparametric regression curve to a scatterplot.

	Parameters
	----------
	data : pandas.Series
	Data points in the scatterplot. The
	function returns the estimated (smooth) values of y.

	Optionals

	f : float
	The fraction of the data set to use for smoothing. A
	larger value for f will result in a smoother curve.
	pts : int
	The explicit number of data points to be used for
	smoothing instead of f.
	itn : int
	The number of robustifying iterations. The function will run
	faster with a smaller number of iterations.
	order : int
	The order of the polynomial used for fitting. Defaults to 1
	(straight line). Values < 1 are made 1. Larger values should be
	chosen based on shape of data (# of peaks and valleys + 1)

	Returns
	-------
	pandas.Series containing the smoothed data.
	"""
	# Authors: Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
	# original
	# Dan Neuman <https://github.com/dneuman>
	# converted to Pandas series and extended to polynomials
	# License: BSD (3-clause)

	x = np.array(data.index, dtype=float)
	# condition x-values to be between 0 and 1 to reduce errors in linalg
	x = x - x.min()
	x = x / x.max()
	y = data.values
	n = len(data)
	if pts is None:
	f = np.min([f, 1.0])
	r = int(np.ceil(f * n))
	else: # allow use of number of points to determine smoothing
	r = int(np.min([pts, n]))
	r = min([r, n-1])
	order = max([1, order])
	# Create matrix of 1, x, x2, x3, etc, by row
	xm = np.array([x**j for j in range(order+1)])
	# Create weight matrix, one column per data point
	h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
	w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
	w = (1 - w 3) 3
	# Set up output
	yEst = np.zeros(n)
	delta = np.ones(n) # Additional weights for iterations
	for iteration in range(itn):
	for i in range(n):
	weights = delta * w[:, i]
	xw = np.array([weights * x**j for j in range(order+1)])
	b = xw.dot(y)
	a = xw.dot(xm.T)
	beta = np.linalg.solve(a, b)
	yEst[i] = sum([beta[j] * x[i]**j for j in range(order+1)])
	# Set up weights to reduce effect of outlier points on next iteration
	residuals = y - yEst
	s = np.median(np.abs(residuals))
	delta = np.clip(residuals / (6.0 * s), -1, 1)
	delta = (1 - delta 2) 2
	return pd.Series(yEst, index=data.index, name='Trend')