Skip to content

Instantly share code, notes, and snippets.

View vanbrands's full-sized avatar

Felipe Brandão vanbrands

  • Typeform
  • Barcelona, Spain
View GitHub Profile
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.model_selection import RandomizedSearchCV
search_space = {
'preprocessing__binfeatures__n_bins': [10, 20],
'model__n_estimators': [500, 1000, 2000],
'model__class_weight': ['balanced', 'balanced_subsample']
}
preprocessing = ColumnTransformer(transformers=[
# Persist pipeline to disk
pipeline.fit(x_train, y_train)
joblib.dump(pipeline, 'model.joblib')
# Load pipeline and make prediction
pipeline = joblib.load('model.joblib')
y_hat = pipeline.predict(x)
from sklearn.compose import ColumnTransformer
# Define you transformers.
preprocessing = ColumnTransformer(transformers=[
('encode_categorical_features', OneHotEncoder(), ['categorical_column'])
], remainder='passthrough')
estimator = GradientBoostingClassifier()
# Define steps in the pipeline.
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder()
dummies = encoder.fit_transform(df['categorical_column'])
# You have you set of possible categories in a list for example.
# (it could be anything really...)
possible_categories = ['foo', 'bar]
# Convert the categorical column to type Categorical
df['categorical_column'] = pd.Categorical(
values=df['categorical_column'],
categories=possible_categories
)
import pandas as pd
dummies = pd.get_dummies(df, columns=['categorical_colum1', 'categorical_colum2'])
dummies = pd.get_dummies(df['categorical_column'])
aws cloudformation deploy /
--template-file cloudformation_notebook.yaml /
--stack-name notebook-tutorial
@vanbrands
vanbrands / cloudformation_notebook.yaml
Last active September 7, 2019 18:40
pub_cloudformation_notebook
AWSTemplateFormatVersion: '2010-09-09'
Description: 'Cloudformation simples para subir um notebook no Sagemaker'
Resources:
NotebookTutorial:
Type: AWS::SageMaker::NotebookInstance
Properties:
NotebookInstanceName: "Notebook Tutorial"
InstanceType: ml.t2.medium
RoleArn: String
VolumeSizeInGB: 20
@vanbrands
vanbrands / table_rows.sql
Created May 15, 2019 14:32
[Redshift Management] #admin
select tab.table_schema,
tab.table_name,
tinf.tbl_rows as rows
from svv_tables tab
join svv_table_info tinf
on tab.table_schema = tinf.schema
and tab.table_name = tinf.table
where tab.table_type = 'BASE TABLE'
and tab.table_schema not in('pg_catalog','information_schema')
and tinf.tbl_rows > 1