Skip to content

Instantly share code, notes, and snippets.

@adamhaile
Created April 24, 2014 19:04
Show Gist options
  • Save adamhaile/11265756 to your computer and use it in GitHub Desktop.
Save adamhaile/11265756 to your computer and use it in GitHub Desktop.
TriNUG F# / Data Analytics Twitter Analysis using idiomatic functional style
namespace NewCo.TwitterAnalysis.Idiomatic
open System
type tweet = DateTime * int * string
type stockVolume = DateTime * int
type tweetProvider = string -> tweet seq
type stockVolumeProvider = string -> stockVolume seq
module Analysis =
open MathNet.Numerics.Statistics
let correlate (tweets : tweet seq) (stockVolumes : stockVolume seq) =
let tweetVolumes = tweets |> Seq.countBy (fun (d,_,_) -> d.Date)
let tweetDays = tweetVolumes |> Seq.map fst |> Set.ofSeq
let stockDays = stockVolumes |> Seq.map fst |> Set.ofSeq
let commonDays = Set.intersect tweetDays stockDays
let tweetCalendar = Map.ofSeq tweetVolumes
let stockCalendar = Map.ofSeq stockVolumes
let tweetDayVolumes = commonDays |> Seq.map (fun d -> double tweetCalendar.[d])
let stockDayVolumes = commonDays |> Seq.map (fun d -> double stockCalendar.[d])
Correlation.Pearson(tweetDayVolumes, stockDayVolumes)
let correlateProviders tweetProvider stockVolumeProvider stock =
correlate (tweetProvider stock) (stockVolumeProvider stock)
module TwitterProvider =
open System.Configuration
open Tweetinvi
let getTweets : tweetProvider = fun stock ->
let cfg = ConfigurationManager.AppSettings in
TwitterCredentials.SetCredentials(cfg.["accessToken"], cfg.["accessTokenSecret"], cfg.["consumerKey"], cfg.["consumerSecret"])
let tweets = Search.SearchTweets stock
tweets |> Seq.map (fun t -> t.CreatedAt, t.RetweetCount, t.Text)
module YahooProvider =
open FSharp.Data
type YahooStocksCsv = CsvProvider<"http://ichart.finance.yahoo.com/table.csv?s=MSFT">
let getStockVolumes : stockVolumeProvider = fun stock ->
let data = YahooStocksCsv.Load("http://ichart.finance.yahoo.com/table.csv?s=" + stock)
data.Rows |> Seq.map (fun d -> d.Date, d.Volume)
module FileProvider =
open System.IO
open System.Reflection
open FSharp.Data
type TweetsCsv = CsvProvider<"TweetData.csv">
type StockVolumesCsv = CsvProvider<"StockData.csv">
let private absolute file =
if Path.IsPathRooted file then file else
// use reflection to get the CodeBase of the executing assembly - .Location doesn't work in NUnit
let exeDir = Path.GetDirectoryName((new Uri(Assembly.GetExecutingAssembly().CodeBase)).LocalPath)
Path.Combine(exeDir, file)
let getTweets : string -> tweetProvider = fun file stocksymbol ->
let table = TweetsCsv.Load(absolute(file))
table.Rows |> Seq.map (fun r -> r.Date, r.Retweets, r.Text)
let getStockVolumes : string -> stockVolumeProvider = fun file stock ->
let table = StockVolumesCsv.Load(absolute(file))
table.Rows |> Seq.map (fun d -> d.Date, d.Volume)
module InMemoryProvider =
let getTweets : tweetProvider = fun stock ->
[
new DateTime(2014, 4, 15), 0, "Test1.1"
new DateTime(2014, 4, 16), 0, "Test2.1"
new DateTime(2014, 4, 16), 0, "Test2.2"
new DateTime(2014, 4, 17), 0, "Test3.1"
new DateTime(2014, 4, 17), 0, "Test3.2"
new DateTime(2014, 4, 17), 0, "Test3.3"
new DateTime(2014, 4, 18), 0, "Test4.1"
new DateTime(2014, 4, 18), 0, "Test4.2"
new DateTime(2014, 4, 18), 0, "Test4.3"
new DateTime(2014, 4, 18), 0, "Test4.4"
new DateTime(2014, 4, 19), 0, "Test5.1"
new DateTime(2014, 4, 19), 0, "Test5.2"
new DateTime(2014, 4, 19), 0, "Test5.3"
new DateTime(2014, 4, 19), 0, "Test5.4"
new DateTime(2014, 4, 19), 0, "Test5.5"
new DateTime(2014, 4, 20), 0, "Test6.1"
new DateTime(2014, 4, 20), 0, "Test6.2"
new DateTime(2014, 4, 20), 0, "Test6.3"
new DateTime(2014, 4, 20), 0, "Test6.4"
new DateTime(2014, 4, 20), 0, "Test6.5"
new DateTime(2014, 4, 20), 0, "Test6.6"
] |> Seq.ofList
let getStockVolumes : stockVolumeProvider = fun stock ->
[
new DateTime(2014, 4, 15), 10000
new DateTime(2014, 4, 16), 20000
new DateTime(2014, 4, 17), 30000
new DateTime(2014, 4, 18), 40000
new DateTime(2014, 4, 19), 50000
new DateTime(2014, 4, 20), 60000
] |> Seq.ofList
module Tests =
open NUnit.Framework
open FsUnit
[<TestFixture>]
type ``Analysis Tests`` ()=
[<Test>] member x.
``Analysis.correlateProviders using InMemoryProviders should return 1`` ()=
let correlateInMemory = Analysis.correlateProviders InMemoryProvider.getTweets InMemoryProvider.getStockVolumes in
correlateInMemory "IBM" |> should equal 1.0
[<Test>] member x.
``Analysis.correlateProviders using live Twitter and Yahoo on IBM should not throw an exception`` ()=
let correlateLive = Analysis.correlateProviders TwitterProvider.getTweets YahooProvider.getStockVolumes in
correlateLive "IBM" |> ignore
[<TestFixture>]
type ``TwitterProvider Tests`` () =
[<Test>] member x.
``TwitterProvider.getTweets when called with IBM should return non-null`` ()=
TwitterProvider.getTweets "IBM" |> should not' (equal null)
[<TestFixture>]
type ``YahooProvider Tests`` () =
[<Test>] member x.
``YahooProvider.getStockVolumes when called with IBM should not return non-null`` ()=
YahooProvider.getStockVolumes "IBM" |> should not' (equal null)
[<TestFixture>]
type ``FileProvider Tests`` () =
[<Test>] member x.
``FileSystemProvider.getTweets returns expected items`` ()=
FileProvider.getTweets "TweetData.csv" "IBM" |> Seq.length |> should equal 2
[<Test>] member x.
``FileSystemProvider.getStockVolumes returns expected items`` ()=
FileProvider.getStockVolumes "StockData.csv" "IBM" |> Seq.length |> should equal 2
[<TestFixture>]
type ``InMemoryProvider Tests`` ()=
[<Test>] member x.
``InMemoryProvider.getTweets returns expected items`` ()=
InMemoryProvider.getTweets "IBM" |> Seq.length |> should equal 21
[<Test>] member x.
``InMemoryProvider.getStockVolumes returns expected items`` ()=
InMemoryProvider.getStockVolumes "IBM" |> Seq.length |> should equal 6
@jamessdixon
Copy link

Love it!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment