lgessler · August 6, 2019 18:39
diff --git a/sklearn_demo.py b/sklearn_demo.py
 from sklearn.linear_model import LogisticRegression, LinearRegression, Lasso
 from sklearn.datasets import load_boston
 from sklearn.metrics import mean_squared_error, accuracy_score


 def main():
    X, y = load_boston(return_X_y=True)

    print(list(X[1, :]))
    [
        0.02731,  # per capita crime rate by town
        0.0,  # proportion of residential land zoned for lots over 25,000 sq.ft.
        7.07,  # proportion of non-retail business acres per town
        0.0,  # Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        0.469,  # nitric oxides concentration (parts per 10 million)
        6.421,  # average number of rooms per dwelling
        78.9,  # proportion of owner-occupied units built prior to 1940
        4.9671,  # weighted distances to five Boston employment centres
        2.0,  # index of accessibility to radial highways
        242.0,  # full-value property-tax rate per $10,000
        17.8,  # pupil-teacher ratio by town
        396.9,  # 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
        9.14  # % lower status of the population
    ]
    print(y[1])
    21.6       # MEDV     Median value of owner-occupied homes in $1000's

    # -------------------- train and evaluate
    X_train = X[:400]
    X_test = X[400:]
    y_train = y[:400]
    y_test = y[400:]

    ## with linear regression
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_predicted = model.predict(X_test)

    # The coefficients
    print('Coefficients: \n', model.coef_)
    # The mean squared error
    print("Mean squared error: %.2f"
          % mean_squared_error(y_predicted, y_test))
    # Explained variance score: 1 is perfect prediction

    ## with another kind of statistical model
    model = Lasso()
    model.fit(X_train, y_train)
    y_predicted = model.predict(X_test)

    # The coefficients
    print('Coefficients: \n', model.coef_)
    # The mean squared error
    print("Mean squared error: %.2f"
          % mean_squared_error(y_predicted, y_test))
    # Explained variance score: 1 is perfect prediction

    ## binary classification: over $15k or not?
    y_train_binary = [1 if y > 15 else 0 for y in y_train]
    y_test_binary = [1 if y > 15 else 0 for y in y_test]
    print(list(zip(y_test[:15], y_test_binary[:15])))

    model = LogisticRegression(solver='liblinear')
    model.fit(X_train, y_train_binary)
    y_predicted_binary = model.predict(X_test)

    # The mean squared error
    print("Accuracy: %.2f" % accuracy_score(y_predicted_binary, y_test_binary))
    # Explained variance score: 1 is perfect prediction

 if __name__ == "__main__":
    main()
	from sklearn.linear_model import LogisticRegression, LinearRegression, Lasso
	from sklearn.datasets import load_boston
	from sklearn.metrics import mean_squared_error, accuracy_score


	def main():
	X, y = load_boston(return_X_y=True)

	print(list(X[1, :]))
	[
	0.02731, # per capita crime rate by town
	0.0, # proportion of residential land zoned for lots over 25,000 sq.ft.
	7.07, # proportion of non-retail business acres per town
	0.0, # Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
	0.469, # nitric oxides concentration (parts per 10 million)
	6.421, # average number of rooms per dwelling
	78.9, # proportion of owner-occupied units built prior to 1940
	4.9671, # weighted distances to five Boston employment centres
	2.0, # index of accessibility to radial highways
	242.0, # full-value property-tax rate per $10,000
	17.8, # pupil-teacher ratio by town
	396.9, # 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
	9.14 # % lower status of the population
	]
	print(y[1])
	21.6 # MEDV Median value of owner-occupied homes in $1000's

	# -------------------- train and evaluate
	X_train = X[:400]
	X_test = X[400:]
	y_train = y[:400]
	y_test = y[400:]

	## with linear regression
	model = LinearRegression()
	model.fit(X_train, y_train)
	y_predicted = model.predict(X_test)

	# The coefficients
	print('Coefficients: \n', model.coef_)
	# The mean squared error
	print("Mean squared error: %.2f"
	% mean_squared_error(y_predicted, y_test))
	# Explained variance score: 1 is perfect prediction

	## with another kind of statistical model
	model = Lasso()
	model.fit(X_train, y_train)
	y_predicted = model.predict(X_test)

	# The coefficients
	print('Coefficients: \n', model.coef_)
	# The mean squared error
	print("Mean squared error: %.2f"
	% mean_squared_error(y_predicted, y_test))
	# Explained variance score: 1 is perfect prediction

	## binary classification: over $15k or not?
	y_train_binary = [1 if y > 15 else 0 for y in y_train]
	y_test_binary = [1 if y > 15 else 0 for y in y_test]
	print(list(zip(y_test[:15], y_test_binary[:15])))

	model = LogisticRegression(solver='liblinear')
	model.fit(X_train, y_train_binary)
	y_predicted_binary = model.predict(X_test)

	# The mean squared error
	print("Accuracy: %.2f" % accuracy_score(y_predicted_binary, y_test_binary))
	# Explained variance score: 1 is perfect prediction

	if __name__ == "__main__":
	main()