This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Grabbing the preprocessor | |
pre = fit_model.named_steps['preprocessor'] | |
# Getting the numerical and categorical features from the pipeline | |
num_feats = pre.transformers_[0][2] | |
cat_feats = pre.transformers_[1][1]['onehot']\ | |
.get_feature_names(categorical_features) | |
all_feats = num_feats+list(cat_feats) | |
# Dataframe for visual examination of coefficients |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Generated as example for Springboard mentees | |
import pandas as pd | |
df = pd.DataFrame() | |
df['code'] = ['1', '1', '2', '3', '3', '3', '3', '4', '4'] | |
df['country'] = ['usa', '', 'france', 'japan', 'japan', '', 'japan', 'brazil', 'brazil'] | |
df['extracolumn'] = ['i', 'do', 'not', 'need', 'the', 'stuff', 'in', 'this', 'column'] | |
new_df = df[['code', 'country']].drop_duplicates() | |
new_df = new_df[new_df['country'] != ''] | |
new_df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cd <path_to_directory_containing_files> && for file in *<file_type>; do mv "$file" "${file:<number_of_leading_characters_to_remove>}"; done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cosine_similarity_vec <- function(row_index, df){ | |
row <- df[row_index,] | |
mat <- df[-row_index,] | |
numerator <- rowSums(sweep(mat, MARGIN=2, row, "*")) | |
denominator <- sqrt(sum(row**2)) * sqrt(rowSums(mat**2)) | |
similarities <- numerator/denominator | |
game_numbers <- 1:dim(df)[1] | |
game_numbers <- game_numbers[! game_numbers %in% row_index] | |
df_similarity <- data.frame(game_numbers, similarities) | |
df_similarity <- df_similarity %>% arrange(desc(similarities)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#doc2vec parameters | |
vector_size = 300 | |
window_size = 15 | |
min_count = 1 | |
sampling_threshold = 1e-5 | |
negative_size = 5 | |
train_epoch = 100 | |
dm = 0 #0 = dbow; 1 = dmpv | |
worker_count = 1 #number of parallel processes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
import tweepy #https://github.com/tweepy/tweepy | |
import csv | |
#Twitter API credentials | |
consumer_key = "" | |
consumer_secret = "" | |
access_key = "" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cat mtcars.csv | |
"","mpg","cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb" | |
"Mazda RX4",21,6,160,110,3.9,2.62,16.46,0,1,4,4 | |
"Mazda RX4 Wag",21,6,160,110,3.9,2.875,17.02,0,1,4,4 | |
"Datsun 710",22.8,4,108,93,3.85,2.32,18.61,1,1,4,1 | |
"Hornet 4 Drive",21.4,6,258,110,3.08,3.215,19.44,1,0,3,1 | |
"Hornet Sportabout",18.7,8,360,175,3.15,3.44,17.02,0,0,3,2 | |
"Valiant",18.1,6,225,105,2.76,3.46,20.22,1,0,3,1 |