Last active
February 9, 2024 15:53
-
-
Save NSG650/d41804a056927f98db7b146243c47535 to your computer and use it in GitHub Desktop.
Random idea I had for a linear regression algorithm by using medians
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This was put in a hurry will be bad but the key idea is | |
# - we calculate the slope between every 2 point and take the median out of all those slopes | |
# - now using the median slope we calculate the value of the b intercept and take the median out of all those b intercepts | |
# the line formed is y = median_slope * x + median_intercept | |
# This is probably not the best way. I am open for suggestions. | |
import matplotlib.pyplot as plt | |
import random | |
random_slope = random.randint(1, 10) | |
random_intercept = random.randint(0, 10) | |
def median(a): | |
a.sort() | |
median_index = (len(a) + 1) // 2 | |
median = a[median_index - 1] | |
return median | |
def rise_over_run(x2, x1, y2, y1): | |
return (y2 - y1) / (x2 - x1) | |
def intercept(x, m, y): | |
return y - (m * x) | |
def line(x, m, b): | |
return m * x + b | |
data_x = [i for i in range(random.randint(0, 10), 100)] | |
data_y = [line(x, random_slope + random.random() / 10, random_intercept + random.random() / 10) for x in data_x] | |
def main(): | |
data_x.sort() | |
data_y.sort() | |
slopes = [rise_over_run(data_x[i], data_x[i + 1], data_y[i], data_y[i + 1]) for i in range(len(data_y) - 1)] | |
slope_median = median(slopes) | |
print(f"slope median is {slope_median}") | |
intercepts = [intercept(data_x[i], slope_median, data_y[i]) for i in range(len(data_y))] | |
intercept_median = median(intercepts) | |
print(f"intercept median is {intercept_median}") | |
my_y = [line(x, slope_median, intercept_median) for x in data_x] | |
print(f"line formed y = {slope_median}x + {intercept_median}") | |
errors = [(((data_y[i] - my_y[i]) ** 2) ** (1 / 2)) for i in range(len(my_y))] | |
print(f"Median error {median(errors)}") | |
plt.plot(data_x, data_y) | |
plt.plot(data_x, my_y) | |
plt.show() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment