Timothy Renner timothyrenner

Machine learning engineer / data scientist. Aspiring squatcher.

timothyrenner / bigfoot_streamlit_app.py

Last active June 23, 2020 13:16

Bigfoot Sightings Streamlit App

	import streamlit as st
	import pandas as pd
	import altair as alt
	import pydeck as pdk
	import os

	from dateutil.parser import parse

	try:
	from dotenv import load_dotenv, find_dotenv

timothyrenner / gfl_with_h3.py

Created December 11, 2019 13:26

Graph Fused Lasso + H3

	from h3 import h3
	from pygfl.easy import solve_gfl

	def build_neighbor_edges(hexids):
	# Hash the hexid to the position so we can easily look
	# up where the original hexid position is in the array.
	hexid_to_position = {h:ii for ii,h in enumerate(hexids)}
	edges = []
	for h in hexids:
	for n in h3.k_ring(h,1):

timothyrenner / pyspark_pandas_udf_call.py

Created January 30, 2019 22:26

Pyspark Pandas UDF Call

	data_frame.withColumn(
	"prediction",
	predict_pandas_udf(col("feature1"), col("feature2"), ...)
	)

timothyrenner / pyspark_pandas_udf_creation.py

Created January 30, 2019 22:19

Pyspark Pandas UDF Creation

	import pandas as pd

	from pyspark.sql.functions import pandas_udf
	from pyspark.sql.types import DoubleType


	@pandas_udf(returnType=DoubleType())
	def predict_pandas_udf(*features):
	""" Executes the prediction using numpy arrays.

timothyrenner / pyspark_partition_call.py

Created January 30, 2019 22:13

Pyspark Partition Call

my_data.rdd.mapPartitions(predict_partition).toDF()

timothyrenner / pyspark_partition_definition.py

Created January 30, 2019 16:06

Pyspark Partition Definition

	import pandas as pd


	# We'll need this handy list more than once. It enforces the
	# column order required by the model.
	FEATURES = ["feature1", "feature2", "feature3", ...]


	def predict_partition(rows):
	""" Calls a vectorized prediction by loading the partition into memory.

timothyrenner / pyspark_udf_call.py

Created January 30, 2019 16:02

Pyspark UDF Call

	my_df.withColumn(
	"predicted_score",
	predict_udf(col("feature1"), col("feature2"), ...)
	)

timothyrenner / pyspark_udf_creation.py

Created January 30, 2019 15:59

Pyspark UDF Creation

	from pyspark.sql.functions import udf
	from pyspark.sql.types import DoubleType


	predict_udf = udf(predict, DoubleType())

timothyrenner / pyspark_udf_definition.py

Created January 30, 2019 15:57

Pyspark UDF Definition

	import numpy as np

	def predict(*features):
	""" Performs a prediction on the features.

	Parameters
	----------
	features : List[float]
	The feature values the model needs to make a prediction.

timothyrenner / average_agreement.py

Created September 13, 2018 15:39

Average Agreement, Attempt 3

	def average_agreement(list1, list2, max_depth):

	# Empty lists evaluate to false.
	if (not list1) or (not list2):
	return 0.0

	### NEW CODE ###
	# Truncate the depth
	max_list_len = max(len(list1), len(list2))
	max_depth = min(max_depth, max_list_len)