Skip to content

Instantly share code, notes, and snippets.

@lgessler
Created August 6, 2019 18:39
Show Gist options
  • Save lgessler/b6bcf93ecf294ca6dbad2ffecf5e114f to your computer and use it in GitHub Desktop.
Save lgessler/b6bcf93ecf294ca6dbad2ffecf5e114f to your computer and use it in GitHub Desktop.
Brief demo of scikit-learn's major API's
from sklearn.linear_model import LogisticRegression, LinearRegression, Lasso
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error, accuracy_score
def main():
X, y = load_boston(return_X_y=True)
print(list(X[1, :]))
[
0.02731, # per capita crime rate by town
0.0, # proportion of residential land zoned for lots over 25,000 sq.ft.
7.07, # proportion of non-retail business acres per town
0.0, # Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
0.469, # nitric oxides concentration (parts per 10 million)
6.421, # average number of rooms per dwelling
78.9, # proportion of owner-occupied units built prior to 1940
4.9671, # weighted distances to five Boston employment centres
2.0, # index of accessibility to radial highways
242.0, # full-value property-tax rate per $10,000
17.8, # pupil-teacher ratio by town
396.9, # 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
9.14 # % lower status of the population
]
print(y[1])
21.6 # MEDV Median value of owner-occupied homes in $1000's
# -------------------- train and evaluate
X_train = X[:400]
X_test = X[400:]
y_train = y[:400]
y_test = y[400:]
## with linear regression
model = LinearRegression()
model.fit(X_train, y_train)
y_predicted = model.predict(X_test)
# The coefficients
print('Coefficients: \n', model.coef_)
# The mean squared error
print("Mean squared error: %.2f"
% mean_squared_error(y_predicted, y_test))
# Explained variance score: 1 is perfect prediction
## with another kind of statistical model
model = Lasso()
model.fit(X_train, y_train)
y_predicted = model.predict(X_test)
# The coefficients
print('Coefficients: \n', model.coef_)
# The mean squared error
print("Mean squared error: %.2f"
% mean_squared_error(y_predicted, y_test))
# Explained variance score: 1 is perfect prediction
## binary classification: over $15k or not?
y_train_binary = [1 if y > 15 else 0 for y in y_train]
y_test_binary = [1 if y > 15 else 0 for y in y_test]
print(list(zip(y_test[:15], y_test_binary[:15])))
model = LogisticRegression(solver='liblinear')
model.fit(X_train, y_train_binary)
y_predicted_binary = model.predict(X_test)
# The mean squared error
print("Accuracy: %.2f" % accuracy_score(y_predicted_binary, y_test_binary))
# Explained variance score: 1 is perfect prediction
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment