Skip to content

Instantly share code, notes, and snippets.

@quinncnl
Created September 29, 2017 09:07
Show Gist options
  • Save quinncnl/70dd68b0b2d1a7edc69b8e1ae648ab9e to your computer and use it in GitHub Desktop.
Save quinncnl/70dd68b0b2d1a7edc69b8e1ae648ab9e to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import tree
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import load_iris
from sklearn.externals.six import StringIO
from sklearn import tree
import pydotplus
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot
from sklearn import neighbors, model_selection
from sklearn.model_selection import train_test_split
from pandas import plotting
import matplotlib.pyplot as plt
from sklearn import neighbors, model_selection, tree, ensemble
import seaborn as sns
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.linear_model import Ridge
from sklearn.svm import SVC
import os, sys
from sys import stderr
import gpxpy
import gpxpy.gpx
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
df = pd.read_csv('data.csv', header=None)
# Pandas dataframe to numpy.ndarray
X = df[[2,3]].values
y = df[1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
def KNF():
clf = KNeighborsClassifier(n_neighbors=3)
clf.fit(X_train, y_train)
return clf
def RandomForest():
forest = RandomForestClassifier(n_estimators=10, random_state=5)
forest = forest.fit(X_train, y_train)
return forest
def DecisionTree():
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
dot_data = StringIO()
tree.export_graphviz(clf, out_file=dot_data)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
graph.write_pdf("iris.pdf")
return clf
def doDecisionTreeRegressor():
y = df[0].values
X = df[2].values
regr_1 = DecisionTreeRegressor(max_depth=2)
regr_2 = DecisionTreeRegressor(max_depth=5)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
regr_1.fit(X_train, y_train)
regr_2.fit(X_train, y_train)
y_1 = regr_1.predict(X_test)
y_2 = regr_2.predict(X_test)
# Plot the results
# plt.figure(figsize=(8,6))
plt.scatter(X_test, y_1, c="darkorange", label="data")
plt.plot(X_test, y_test, color="cornflowerblue", label="max_depth=2", linewidth=2)
# plt.plot(X_test, y_2, color="yellowgreen", label="max_depth=5", linewidth=2)
plt.xlabel("data")
plt.ylabel("target")
plt.title("Decision Tree Regression")
plt.legend()
plt.show()
def visualize():
# Create color maps
cmap_light = ListedColormap(['#FFB0AA', '#FFE0AA', '#FFF4AA', '#F2FAA7', '#7AB793', '#748BA7'])
cmap_bold = ListedColormap(['#550000', '#553D00', '#4B5300', '#004011', '#041F37', '#14073A'])
h = .1 # step size in the mesh
clf = neighbors.KNeighborsClassifier(6)
clf.fit(X, y)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold,
s=10)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.show()
def get_model_quality(model):
print("Training set score: {:.3f}".format(model.score(X_train, y_train)))
print("Test set score: {:.3f}".format(model.score(X_test, y_test)))
def predict(file, model):
filename, file_extension = os.path.splitext(file)
if file_extension != '.gpx':
stderr.write('Please enter a valid GPX file.')
return
gpx_file = open(file, 'r')
gpx = gpxpy.parse(gpx_file)
if len(gpx.tracks) < 1:
return
data = gpx.tracks[0
].get_moving_data()
if (data.moving_time == 0 or data.max_speed == 0):
return
average_speed = data.moving_distance / data.moving_time
max_speed = data.max_speed
print(model.predict([[average_speed, max_speed]]))
def main():
if len(sys.argv) == 1:
#get_model_quality(KNF())
#get_model_quality(RandomForest())
get_model_quality(DecisionTree())
#visualize()
#doDecisionTreeRegressor()
else:
predict(sys.argv[1], KNF())
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment