jmquintana79 · July 8, 2024 11:55
diff --git a/pipeline_template_scikit.py b/pipeline_template_scikit.py
 import pandas as pd
 import numpy as np
 from sklearn.pipeline import Pipeline
 from sklearn.compose import ColumnTransformer
 # example models and preprocessors
 from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.impute import SimpleImputer
 from sklearn.linear_model import LogisticRegression

 # X, y

 # Numerical features preprocessing
 numerical_features = ['age', 'income']
 numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
 ])

 # Categorical features preprocessing
 categorical_features = ['gender', 'occupation']
 categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
 ])

 # Combine preprocessing for numerical and categorical features
 preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ]
 )

 # Create the pipeline with ML
 pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression())
 ])

 # Fit preprocessor + model
 pipeline.fit(X, y)
	import pandas as pd
	import numpy as np
	from sklearn.pipeline import Pipeline
	from sklearn.compose import ColumnTransformer
	# example models and preprocessors
	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	from sklearn.impute import SimpleImputer
	from sklearn.linear_model import LogisticRegression

	# X, y

	# Numerical features preprocessing
	numerical_features = ['age', 'income']
	numerical_transformer = Pipeline(steps=[
	('imputer', SimpleImputer(strategy='median')),
	('scaler', StandardScaler())
	])

	# Categorical features preprocessing
	categorical_features = ['gender', 'occupation']
	categorical_transformer = Pipeline(steps=[
	('imputer', SimpleImputer(strategy='most_frequent')),
	('onehot', OneHotEncoder(handle_unknown='ignore'))
	])

	# Combine preprocessing for numerical and categorical features
	preprocessor = ColumnTransformer(
	transformers=[
	('num', numerical_transformer, numerical_features),
	('cat', categorical_transformer, categorical_features)
	]
	)

	# Create the pipeline with ML
	pipeline = Pipeline(steps=[
	('preprocessor', preprocessor),
	('classifier', LogisticRegression())
	])

	# Fit preprocessor + model
	pipeline.fit(X, y)