Skip to content

Instantly share code, notes, and snippets.

@alksl
Created May 8, 2015 08:57
Show Gist options
  • Save alksl/93a5a35eccc9dd315e0f to your computer and use it in GitHub Desktop.
Save alksl/93a5a35eccc9dd315e0f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import os
import sys
import tables
import numpy as np
TASTE_PROFILE_FILE = os.path.abspath(sys.argv[1])
TASTE_PROFILE_STATISTICS_FILE = os.path.abspath(sys.argv[2])
print("TASTE_PROFILE_FILE: ", TASTE_PROFILE_FILE)
print("TASTE_PROFILE_STATISTICS_FILE: ", TASTE_PROFILE_STATISTICS_FILE)
def create_array(stat_file, name, shape):
return stat_file.create_carray(
stat_file.root,
name,
tables.Float64Atom(),
shape,
filters=tables.Filters(complevel=9, complib='zlib', fletcher32=True))
with tables.open_file(TASTE_PROFILE_STATISTICS_FILE, mode="w") as statistics_file:
with tables.open_file(TASTE_PROFILE_FILE, mode="r") as summary_file:
users = set(summary_file.root.user_playcount.cols.user_id)
user_playcount_mean = create_array(statistics_file, "user_playcount_mean", (len(users),))
user_playcount_variance = create_array(statistics_file, "user_playcount_variance", (len(users),))
user_index = 0
for user_id in users:
user_query = "user_id == {0}".format(user_id)
user_array = np.array([row['playcount'] for row in summary_file.root.user_playcount.where(user_query)])
user_playcount_mean[user_index] = np.mean(user_array)
user_playcount_variance[user_index] = np.var(user_array)
user_index += 1
songs = set(summary_file.root.song_playcount.cols.song_id)
song_playcount_mean = create_array(statistics_file, "song_playcount_mean", (len(songs),))
song_playcount_variance = create_array(statistics_file, "song_playcount_variance", (len(songs),))
song_index = 0
for song_id in songs:
song_query = "song_id == {0}".format(song_id)
song_array = np.array([row['playcount'] for row in summary_file.root.song_playcount.where(song_query)])
song_playcount_mean[song_index] = np.mean(song_array)
song_playcount_variance[song_index] = np.var(song_array)
song_index += 1
statistics_file.flush()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment