Created
May 8, 2015 08:57
-
-
Save alksl/93a5a35eccc9dd315e0f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import sys | |
import tables | |
import numpy as np | |
TASTE_PROFILE_FILE = os.path.abspath(sys.argv[1]) | |
TASTE_PROFILE_STATISTICS_FILE = os.path.abspath(sys.argv[2]) | |
print("TASTE_PROFILE_FILE: ", TASTE_PROFILE_FILE) | |
print("TASTE_PROFILE_STATISTICS_FILE: ", TASTE_PROFILE_STATISTICS_FILE) | |
def create_array(stat_file, name, shape): | |
return stat_file.create_carray( | |
stat_file.root, | |
name, | |
tables.Float64Atom(), | |
shape, | |
filters=tables.Filters(complevel=9, complib='zlib', fletcher32=True)) | |
with tables.open_file(TASTE_PROFILE_STATISTICS_FILE, mode="w") as statistics_file: | |
with tables.open_file(TASTE_PROFILE_FILE, mode="r") as summary_file: | |
users = set(summary_file.root.user_playcount.cols.user_id) | |
user_playcount_mean = create_array(statistics_file, "user_playcount_mean", (len(users),)) | |
user_playcount_variance = create_array(statistics_file, "user_playcount_variance", (len(users),)) | |
user_index = 0 | |
for user_id in users: | |
user_query = "user_id == {0}".format(user_id) | |
user_array = np.array([row['playcount'] for row in summary_file.root.user_playcount.where(user_query)]) | |
user_playcount_mean[user_index] = np.mean(user_array) | |
user_playcount_variance[user_index] = np.var(user_array) | |
user_index += 1 | |
songs = set(summary_file.root.song_playcount.cols.song_id) | |
song_playcount_mean = create_array(statistics_file, "song_playcount_mean", (len(songs),)) | |
song_playcount_variance = create_array(statistics_file, "song_playcount_variance", (len(songs),)) | |
song_index = 0 | |
for song_id in songs: | |
song_query = "song_id == {0}".format(song_id) | |
song_array = np.array([row['playcount'] for row in summary_file.root.song_playcount.where(song_query)]) | |
song_playcount_mean[song_index] = np.mean(song_array) | |
song_playcount_variance[song_index] = np.var(song_array) | |
song_index += 1 | |
statistics_file.flush() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment