Last active
May 6, 2024 22:23
-
-
Save datajoely/018607d5d721c747d742605494b822a3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_template_pipeline() -> Pipeline: | |
""" Template declareed here with real inputs, but placeholder outputs and parameters """ | |
return Pipeline( | |
[ | |
node( | |
func=create_model_inputs, | |
inputs=[ # These inputs are never overriden | |
"feat_days_since_last_shutdown", | |
"feat_days_between_shutdown_last_maintenance", | |
"feat_fte_maintenance_hours_last_6m", | |
], | |
outputs=["spine", "training_set", "test_set"], # These output placeholders are overriden | |
tags='data science' | |
), | |
node( | |
func=train_model, | |
inputs=["spine", "training_set", "model_type"], # These output placeholders are overriden | |
outputs="model_object", | |
tags='data science' | |
), | |
node( | |
func=estimate, | |
inputs=["spine", "model_object", "test_set", "model_params"], # These inputs inherit overriden outputs | |
outputs="model_output", | |
tags='data science' | |
), | |
] | |
) | |
def create_data_science_pipeline(**kwargs) -> Pipeline: | |
""" Create single modular pipeline by reusing `create_teplate_pipeline()` twice """ | |
return pipeline( # Prediction pipeline | |
create_template_pipeline(), | |
inputs={ # Overriden input params | |
"model_type": "params:model_type.sklearn", | |
"model_params": "params:hyperparams.sklearn", | |
}, | |
outputs={ # Overriden output catalog entires | |
"spine": "spine_time_series", | |
"training_set": "train_time_series", | |
"test_set": "test_time_series", | |
"model_object": "sklearn_predictor", | |
"model_output": "recommended_maintenance_schedule", | |
}, | |
) + pipeline( # Classificiation pipeline | |
create_template_pipeline(), | |
inputs={ # Prediction pipeline | |
"model_type": "params:model_type.pytorch", | |
"model_params": "params:hyperparams.pytorch", | |
}, | |
outputs={ # Overriden output catalog entires | |
"spine": "spine_equipment_level", | |
"training_set": "train_equipment_level", | |
"test_set": "test_equipment_level", | |
"model_object": "pytorch_classifier", | |
"model_output": "risk_scored_equipment", | |
}, | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The relevant catalog entries would look something like this...