Cheng ✨ kyoto-cheng

🎯

Focusing

🦊 Data Scientist 🎓 Mathematical Physics Graduate 🇨🇦 UWaterloo Alumnus

kyoto-cheng / style_transfer_part3.py

Last active September 20, 2021 16:05

	# weights for each style layer
	# weighting earlier layers more will result in larger style artifacts
	# notice we are excluding `conv4_2` our content representation
	style_weights = {'conv1_1': 1.,
	'conv2_1': 0.75,
	'conv3_1': 0.2,
	'conv4_1': 0.2,
	'conv5_1': 0.2}

	content_weight = 1 # alpha

kyoto-cheng / style_transfer_part2.py

Created September 18, 2021 20:28

	def get_features(image, model, layers=None):
	""" Run an image forward through a model and get the features for
	a set of layers. Default layers are for VGGNet matching Gatys et al (2016)
	"""

	## Complete mapping layer names of PyTorch's VGGNet to names from the paper
	## Need the layers for the content and style representations of an image
	if layers is None:
	layers = {'0': 'conv1_1',
	'5': 'conv2_1',

kyoto-cheng / style_transfer_part1.py

Last active September 18, 2021 19:24

	# import resources
	%matplotlib inline
	from PIL import Image
	import matplotlib.pyplot as plt
	import numpy as np
	import torch
	import torch.optim as optim
	from torchvision import transforms, models

	# get the "features" portion of VGG19 (we will not need the "classifier" portion)

kyoto-cheng / sdv_violinplot.py

Created September 17, 2021 16:18

	sns.violinplot(x='Credit_Limit', y='Income_Category', data=data, hue="Attrition_Flag", split=True, palette="Set3", scale="width", height=4, aspect=.7).set_title('Original Data')
	sns.violinplot(x='Credit_Limit', y='Income_Category', data=new_data_model_CTGAN, hue="Attrition_Flag", split=True, palette="Set3", scale="width", height=4, aspect=.7).set_title('model_CTGAN')
	sns.violinplot(x='Credit_Limit', y='Income_Category', data=new_data_model_CopulaGAN, hue="Attrition_Flag", split=True, palette="Set3", scale="width", height=4, aspect=.7).set_title('model_CopulaGAN')

kyoto-cheng / sdv_project.py

Last active September 17, 2021 03:10

	# pip install sdv
	# importing the necesary libraries
	import numpy as np
	import pandas as pd
	import warnings
	warnings.filterwarnings('ignore')

	# import all 4 sdv models under the single table scenario
	from sdv.tabular import GaussianCopula
	from sdv.tabular import CTGAN

kyoto-cheng / nlp_modeling_ctn.py

Last active June 25, 2021 16:50

	# Fit and transform the Vectorizer based on the feature selection results X_names
	vectorizer = feature_extraction.text.CountVectorizer(vocabulary=X_names)
	vectorizer.fit(corpus)
	X_train = vectorizer.transform(corpus)

	# Testing ML models are Naive Bayes, Random Forest and Decision Trees
	NB_Classifier = naive_bayes.MultinomialNB()
	RForest_Classifier = RandomForestClassifier()
	DTree_Classifier = DecisionTreeClassifier()

kyoto-cheng / nlp_modeling.py

Last active June 25, 2021 16:29

	import re
	import pandas as pd
	import numpy as np
	import seaborn as sns
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn import feature_extraction, model_selection, naive_bayes, pipeline, manifold, preprocessing, feature_selection
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import confusion_matrix

kyoto-cheng / word_cloud.py

Last active June 25, 2021 15:38

	from wordcloud import WordCloud
	import matplotlib.pyplot as plt
	from sklearn.feature_extraction.text import CountVectorizer

	# Analyzing top frquent bi-gram words in the interview questions of type Methodology with CountVectorizer
	def counter(Q_A, category, data, n_gram_min, n_gram_max):
	data = data[data[category]==1]
	word_vectorizer = CountVectorizer(ngram_range=(n_gram_min,n_gram_max), analyzer='word')
	sparse_matrix = word_vectorizer.fit_transform(data[Q_A])
	frequencies = sum(sparse_matrix).toarray()[0]

kyoto-cheng / nlp_process.py

Last active June 25, 2021 14:55

	import re
	import pandas as pd
	import nltk
	from nltk import word_tokenize

	nltk.download('wordnet')
	nltk.download('punkt')

	lemma = nltk.wordnet.WordNetLemmatizer()

kyoto-cheng / data_modeling.py

Created June 14, 2021 22:06

	import pandas as pd
	import numpy as np
	import re
	from nltk.corpus import stopwords
	import matplotlib.pyplot as plt
	from sklearn.base import BaseEstimator, TransformerMixin
	from sklearn.model_selection import train_test_split
	from sklearn.pipeline import Pipeline
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.preprocessing import StandardScaler