Last active
May 30, 2021 19:26
-
-
Save dneuman/a688299c3050ad5c561b1ae680f56b61 to your computer and use it in GitHub Desktop.
Lowess Smoothing Function for Python using Pandas and Numpy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
def Lowess(data, f=2./3., pts=None, itn=3, order=1): | |
"""Fits a nonparametric regression curve to a scatterplot. | |
Parameters | |
---------- | |
data : pandas.Series | |
Data points in the scatterplot. The | |
function returns the estimated (smooth) values of y. | |
**Optionals** | |
f : float | |
The fraction of the data set to use for smoothing. A | |
larger value for f will result in a smoother curve. | |
pts : int | |
The explicit number of data points to be used for | |
smoothing instead of f. | |
itn : int | |
The number of robustifying iterations. The function will run | |
faster with a smaller number of iterations. | |
order : int | |
The order of the polynomial used for fitting. Defaults to 1 | |
(straight line). Values < 1 are made 1. Larger values should be | |
chosen based on shape of data (# of peaks and valleys + 1) | |
Returns | |
------- | |
pandas.Series containing the smoothed data. | |
""" | |
# Authors: Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr> | |
# original | |
# Dan Neuman <https://github.com/dneuman> | |
# converted to Pandas series and extended to polynomials | |
# License: BSD (3-clause) | |
x = np.array(data.index, dtype=float) | |
# condition x-values to be between 0 and 1 to reduce errors in linalg | |
x = x - x.min() | |
x = x / x.max() | |
y = data.values | |
n = len(data) | |
if pts is None: | |
f = np.min([f, 1.0]) | |
r = int(np.ceil(f * n)) | |
else: # allow use of number of points to determine smoothing | |
r = int(np.min([pts, n])) | |
r = min([r, n-1]) | |
order = max([1, order]) | |
# Create matrix of 1, x, x**2, x**3, etc, by row | |
xm = np.array([x**j for j in range(order+1)]) | |
# Create weight matrix, one column per data point | |
h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)] | |
w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0) | |
w = (1 - w ** 3) ** 3 | |
# Set up output | |
yEst = np.zeros(n) | |
delta = np.ones(n) # Additional weights for iterations | |
for iteration in range(itn): | |
for i in range(n): | |
weights = delta * w[:, i] | |
xw = np.array([weights * x**j for j in range(order+1)]) | |
b = xw.dot(y) | |
a = xw.dot(xm.T) | |
beta = np.linalg.solve(a, b) | |
yEst[i] = sum([beta[j] * x[i]**j for j in range(order+1)]) | |
# Set up weights to reduce effect of outlier points on next iteration | |
residuals = y - yEst | |
s = np.median(np.abs(residuals)) | |
delta = np.clip(residuals / (6.0 * s), -1, 1) | |
delta = (1 - delta ** 2) ** 2 | |
return pd.Series(yEst, index=data.index, name='Trend') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment