Last active
March 16, 2017 06:10
-
-
Save gorkemgoknar/3af6b91318647f75c32ddab7dfd67ecb to your computer and use it in GitHub Desktop.
Calculates distinct song play count per client (in C#) , will use input.csv as input and date 10/08/2016 (10 Aug 2016) as default if not entered.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.IO; | |
using System; | |
using System.Collections.Generic; | |
class Program { | |
//client Database, will contain unique song access for each client | |
public static List < Client > clientAccessList = new List < Client > (); | |
//Dictionary for key,value pair where key = Number of song accesses by a client, value = number of clients | |
public static Dictionary < string, int > distinctCount = new Dictionary < string, int > (); | |
//format for date parsing | |
public static string dateFormat = "dd/MM/yyyy"; | |
//Information about client and songs access for a given day | |
public class Client { | |
public int clientID; | |
public DateTime dateAccessed; | |
public List < string > songAccessList; | |
} | |
//convertDate | |
//Converts from string to DateTime format according to dateFormat specified in globals | |
public static DateTime convertDate(string dateToConvert) { | |
//Returns a standartized (date only) format from a date string | |
return DateTime.ParseExact(dateToConvert, dateFormat, System.Globalization.CultureInfo.InvariantCulture); | |
} | |
//readAndParseCSV | |
//reads whitespace delimited CSV file, extracts unique client and populates clientAccessList | |
//clientAccessList will contain entry for each client with unique songs access for given accessDate | |
public static void readAndParseCSV(string fileInput, string accessDate) { | |
DateTime dateRequired = convertDate(accessDate); | |
if (!File.Exists(fileInput)) { | |
Console.Write("ERROR: Input file does not exist..."); | |
Environment.Exit(0); | |
} | |
int lineCounter = 0; | |
Console.Write("--------------\nBegin Processing.\n"); | |
using(StreamReader reader = new StreamReader(fileInput)) //using will close reader | |
{ | |
while (!reader.EndOfStream) { | |
var line = reader.ReadLine(); | |
lineCounter++; | |
Console.Write("Processing Line {0}\n", lineCounter); | |
var columns = line.Split(null); //tab or space, or whitespace delimited | |
//if first line pass it | |
if (string.Compare(columns[0], "PLAY_ID") == 0) continue; | |
/* | |
columns[0] PLAY_ID | |
columns[1] SONG_ID | |
columns[2] CLIENT_ID | |
columns[3] PLAY_TS | |
*/ | |
//first check date is the required date , will only gate date info not time | |
DateTime dateOfLine = convertDate(columns[3]); | |
//pass if date is not the date we require | |
if (dateOfLine.Date != dateRequired.Date) continue; | |
//should check if we have a client_ID in list then add song_id to this client | |
int indexOfClient = clientAccessList.FindIndex(x => x.clientID==Int32.Parse(columns[2])); | |
if (indexOfClient >= 0) { | |
Client thisClient = clientAccessList[indexOfClient]; | |
//we have the client | |
//find if this client has the song accessed today | |
int indexOfSongOfClient = thisClient.songAccessList.IndexOf(columns[1]); | |
if (indexOfSongOfClient >= 0) { | |
//song already added for client, pass for uniqueness | |
} else { | |
//song not added for client, add it | |
thisClient.songAccessList.Add(columns[1]); | |
} | |
} else { | |
//we do not have the client, ad with song id | |
Client newClient = new Client(); | |
newClient.dateAccessed = dateOfLine.Date; | |
newClient.clientID = Int32.Parse(columns[2]); | |
newClient.songAccessList = new List < string > (); | |
newClient.songAccessList.Add(columns[1]); | |
clientAccessList.Add(newClient); | |
} | |
} //end while | |
} | |
Console.Write("End of processing.\n----------------\n"); | |
} | |
public static void getSongAccessCount() { | |
// clientAccessList contains each distinc song accessed by client with given date | |
//required output is | |
//how many distinc play counts observed and how many clients | |
//populate a table where disting play counts starts fron 0 and loop until each client is finished | |
/* | |
DISTINCT_PLAY_COUNT, CLIENT_COUNT | |
0 , x | |
1 , y | |
.. | |
*/ | |
foreach(var client in clientAccessList) { | |
int songCounter = 0; | |
foreach(var song in client.songAccessList) { | |
songCounter++; | |
} | |
//number of unique songs by this client is songCounter, increase counter | |
int currentCount; | |
// currentCount will be zero if the key id doesn't exist.. | |
distinctCount.TryGetValue(songCounter.ToString(), out currentCount); | |
distinctCount[songCounter.ToString()] = currentCount + 1; | |
} | |
} | |
public static void printDistinctPlays(){ | |
// debug output, print distinct Count Table | |
foreach(KeyValuePair<string, int> pair in distinctCount) | |
{ | |
Console.WriteLine("DISTINCT_PLAY_COUNT: {0} CLIENT_COUNT: {1}", | |
pair.Key, | |
pair.Value); | |
} | |
} | |
//writeCSVOutput | |
//writes Output To CSV file | |
// | |
public static void writeCSVOutput(string outputFile) { | |
using(StreamWriter sw = new StreamWriter(outputFile)) { | |
sw.WriteLine("DISTINCT_PLAY_COUNT, CLIENT_COUNT"); | |
foreach(KeyValuePair < string, int > pair in distinctCount) { | |
sw.WriteLine(pair.Key + "," + pair.Value); | |
} | |
} | |
} | |
//outputClientsToConsole | |
//gets output to Console for debugging purposes | |
public static void outputClientsToConsole() { | |
foreach(var client in clientAccessList) { | |
Console.Write("Client ID: " + client.clientID + "\n"); | |
Console.Write("Date Access: " + client.dateAccessed.ToString(dateFormat) + "\n"); | |
foreach(var song in client.songAccessList) { | |
Console.Write("Song ID: " + song + "\n"); | |
} | |
} | |
} | |
public static void getClientCountForDistinctSong(int distinctSong){ | |
int userCount=0; | |
int maxDistinct = 0; | |
foreach(KeyValuePair<string, int> pair in distinctCount) { | |
if ( Int32.Parse(pair.Key)>maxDistinct) maxDistinct= Int32.Parse(pair.Key); | |
if( string.Compare(pair.Key,distinctSong.ToString()) == 0) | |
{ | |
if(pair.Value>userCount) userCount =pair.Value ; | |
} | |
} | |
Console.Write("Users playing " + distinctSong + " distinct songs: " + userCount + "\n"); | |
Console.Write("Max distinct : " + maxDistinct + "\n"); | |
} | |
//MAIN function | |
static void Main(string[] args) { | |
string inputFileName = "input.csv"; | |
string inputDate = "10/08/2016"; | |
int distCount = 346; | |
if ((args == null) | (args.Length <=0)) { | |
Console.WriteLine("Using input: input.csv, date:10/08/2016"); // Check for null array | |
} else if (args.Length == 1) { | |
inputFileName = args[0]; | |
} else if (args.Length ==2) { | |
inputFileName = args[0]; | |
inputDate = args[1]; | |
}else if (args.Length >2) { | |
inputFileName = args[0]; | |
inputDate = args[1]; | |
distCount = Int32.Parse(args[2]); | |
} | |
//read input file and fill client song access list for accessdate | |
readAndParseCSV(inputFileName, inputDate); | |
//put clients and accessed songs to console | |
Console.Write("--------------\nClient Output:\n"); | |
outputClientsToConsole(); | |
Console.Write("End of Client Output.\n--------------\n"); | |
getSongAccessCount(); | |
Console.Write("--------------\nDistinct Songs:\n"); | |
printDistinctPlays(); | |
Console.Write("End of Distinct Songs.\n--------------\n"); | |
if(args.Length>=0){ | |
getClientCountForDistinctSong(distCount); | |
} | |
writeCSVOutput("output.csv"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Calcultates Distinct Song Access with Clients counts for a given input.csv, Where input columnt type is "PLAY_ID SONG_ID CLIENT_ID PLAY_TS" and Date is 10/08/2016
Sample input:
PLAY_ID SONG_ID CLIENT_ID PLAY_TS
44BB190BC2493964E053CF0A000AB546 6164 249 09/08/2016 09:16:34
44BB190BC24A3964E053CF0A000AB546 544 86 10/08/2016 13:54:52
44BB190BC24B3964E053CF0A000AB546 9648 589 08/08/2016 06:08:53
Sample output
Distinct Songs:
DISTINCT_PLAY_COUNT: 1 CLIENT_COUNT: 3
Max distinct : 1
Execution:
main.exe [inputcsv] ["DATE"] [ReqDistincts]
using no input will use input.csv as input and 10/08/2016 as date with 356 ReqDistincts