Last active
July 8, 2024 11:55
-
-
Save jmquintana79/0b194c83b7e693a07dbee302cc77c749 to your computer and use it in GitHub Desktop.
References:
- How to Create Pipelines in Scikit-learn for More Efficient Data Processing: https://www.statology.org/how-create-pipelines-scikit-learn-for-more-efficient-data-processing/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.pipeline import Pipeline | |
from sklearn.compose import ColumnTransformer | |
# example models and preprocessors | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
from sklearn.impute import SimpleImputer | |
from sklearn.linear_model import LogisticRegression | |
# X, y | |
# Numerical features preprocessing | |
numerical_features = ['age', 'income'] | |
numerical_transformer = Pipeline(steps=[ | |
('imputer', SimpleImputer(strategy='median')), | |
('scaler', StandardScaler()) | |
]) | |
# Categorical features preprocessing | |
categorical_features = ['gender', 'occupation'] | |
categorical_transformer = Pipeline(steps=[ | |
('imputer', SimpleImputer(strategy='most_frequent')), | |
('onehot', OneHotEncoder(handle_unknown='ignore')) | |
]) | |
# Combine preprocessing for numerical and categorical features | |
preprocessor = ColumnTransformer( | |
transformers=[ | |
('num', numerical_transformer, numerical_features), | |
('cat', categorical_transformer, categorical_features) | |
] | |
) | |
# Create the pipeline with ML | |
pipeline = Pipeline(steps=[ | |
('preprocessor', preprocessor), | |
('classifier', LogisticRegression()) | |
]) | |
# Fit preprocessor + model | |
pipeline.fit(X, y) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment