Skip to content

Instantly share code, notes, and snippets.

@gilles-leblanc
Created May 3, 2017 22:44
Show Gist options
  • Save gilles-leblanc/033895b5b935085e782eb4ea67a81cc9 to your computer and use it in GitHub Desktop.
Save gilles-leblanc/033895b5b935085e782eb4ea67a81cc9 to your computer and use it in GitHub Desktop.
Name Generator for blog post
module NameLength
open System
open System.Configuration
open MathNet.Numerics.Distributions
// A record type that contains the mean and standard deviation of the names world length from an
// input file.
type NameLengthInfo = { mean:float; standardDeviation: float }
// Given an input string returns the NameLengthInfo record that can be later used to draw a random
// value from the normal distribution.
let internal getNameLengthInfo (input:string) : NameLengthInfo =
let names = input.Split [|' '|]
let numberOfNames = float names.Length
let namesLengths = names |> Array.map (fun name -> float name.Length)
let mean = namesLengths |> Array.average
let standardDeviation = sqrt (Array.averageBy (fun x -> (x - mean)**2.0) namesLengths)
{ mean = mean; standardDeviation = standardDeviation }
// Given a NameLengthInfo returns a random value drawn from a normal (gaussian) distribution
let internal getNameLength (nameLengthInfo:NameLengthInfo) =
let mean = nameLengthInfo.mean
let standardDeviation = nameLengthInfo.standardDeviation
let normalDistribution = Normal(mean, standardDeviation)
let length = normalDistribution.Sample() |> Math.Round |> int
let minimumLength = ConfigurationManager.AppSettings.Item("minimumNameLength") |> int
if length >= (minimumLength) then length else minimumLength
module ProbabilityTable
open System
open System.IO
open Newtonsoft.Json
open MapConverter
open NameLength
type ProbabilityTable = { probabilities:Map<string, Map<string, float>>;
nameLengthInfo:NameLengthInfo }
// Parses a string and count the total number of occurrences of substrings of size length
let rec countOccurrences input (occurrenceTable:Map<string, float>) length =
let adjLen = length - 1
match input |> Seq.toList with
| head :: tail when tail.Length >= adjLen ->
let other = Seq.take adjLen tail |> Seq.toList
let occurrence = head :: other |> Array.ofList |> String
// add current occurrence to the occurrence table
let updatedMap = match occurrenceTable.ContainsKey (occurrence) with
| true -> occurrenceTable.Add(occurrence, occurrenceTable.[occurrence] + 1.0)
| false -> occurrenceTable.Add(occurrence, 1.0)
// call the function recursively with the rest of the string
countOccurrences (tail |> Array.ofList |> String) updatedMap length
| _ -> occurrenceTable
// Return a new probability table with the key value pair added.
// Given letter X, a probability table gives a percentage for letter Y to appear following letter X.
let private addProbability (key:string) value (probabilityTable:Map<string, Map<string, float>>) length =
let mainKey = Char.ToString key.[0]
let subKey = key.[1..]
match Seq.forall Char.IsLower subKey with
| false -> probabilityTable // do not add a subkey containing a white space
| _ -> match probabilityTable.ContainsKey(mainKey) with
| true -> let subMap = Map.find mainKey probabilityTable
match subMap.ContainsKey(subKey) with
| true -> failwithf "subkey %s already added in probabilityTable" subKey
| false -> let newSubMap = subMap.Add(subKey, value)
probabilityTable.Add(mainKey, newSubMap)
| false -> let subMap = Map.empty.Add(subKey, value)
probabilityTable.Add(mainKey, subMap)
// Cumulate the submap to transform to probabilities of the form 0.75 0.25 0.0.
// Notice that the order is decreasing. Instead of using the more tradational increasing order
// of 0.25 0.75 1.0, we are presenting the values in decreasing order starting from 1 to make
// picking the right value easier later on. When we will pick the letters we will draw a random
// number and check if it is greater than the value.
let private cumulate map =
let total = Map.fold (fun acc key value -> acc + value) 0.0 map
let _, cumulativeSubMap =
// map into probability
Map.map (fun key value -> value / total) map
// fold into a cumulative probability result
|> Map.fold (fun (t, (m:Map<string, float>)) key value ->
(t - value, m.Add(key, t - value))
) (1.0, Map.empty)
Map.map (fun key (value:float) -> Math.Round(value, 6)) cumulativeSubMap
// Given an input string creates a probability table for the different letters in the string.
let buildProbabilityTable (input:string) length : ProbabilityTable =
let nameLengths = getNameLengthInfo input
let occurrencesTable = countOccurrences (input.ToLower()) Map.empty length
let adjLen = length - 1
let table = Map.fold (fun acc key value -> addProbability key value acc adjLen)
Map.empty occurrencesTable
|> Map.map (fun key value -> cumulate value)
{ probabilities = table; nameLengthInfo = nameLengths }
// Given an input file path, creates a probability table calling buildProbabilityTable
let buildProbabilityTableFromMediaFile filePath length : ProbabilityTable =
let input = File.ReadAllText(filePath)
buildProbabilityTable input length
// Given an input file path for an already built serialized probabilityTable, return this table
let buildProbabilityTableFromSerializationFile filePath length : ProbabilityTable =
let json = File.ReadAllText(filePath)
JsonConvert.DeserializeObject<ProbabilityTable>(json, mapConverter)
// Serialize a ProbabilityTable to file
let serializeProbabilityTable filePath (table:ProbabilityTable) =
let json = JsonConvert.SerializeObject table
File.WriteAllText(filePath, json)
module NameGenerator
open System
open System.Collections.Generic
open NameLength
open ProbabilityTable
let rnd = System.Random()
// Randomly returns a string from values based on it's probability
let private pickString (values:Map<string, float>) =
let randomValue = rnd.NextDouble()
let pick = values
|> Map.tryPick (fun key value -> if randomValue >= value then Some(key) else None)
match pick with
| Some v -> v
| None -> failwith "Can't pick letter"
// Recursively creates a new name.
let rec private buildName (nameSoFar:string) (charLeft:int) (probabilityTable:ProbabilityTable) =
let lastChar = Char.ToString nameSoFar.[nameSoFar.Length - 1]
let addition = match Map.containsKey lastChar probabilityTable.probabilities with
// if our character exists pick one of it's subkeys
| true -> pickString probabilityTable.probabilities.[lastChar]
// otherwise start a new sequence of character with a name starting character
| false -> pickString probabilityTable.probabilities.[" "]
let newName = nameSoFar + addition
let newCharLeft = charLeft - addition.Length
match newCharLeft with
| ln when ln > 0 -> buildName newName newCharLeft probabilityTable // we need more
| ln when ln < 0 -> newName.[0..newName.Length - 1] // we went one char to long
| _ -> newName // we are exactly where we want to be
// Given a pre-built probability table generates a random name.
let generateRandomName (probabilityTable:ProbabilityTable) =
let nameLength = int (getNameLength probabilityTable.nameLengthInfo)
// We pass in the whitespace char to start the name as this will allow us to find letters after
// spaces in our probability table. These are the letters that start name.
// We must remember to take this whitespace into account in our nameLength and later when
// returning the name
let lowerCaseName = buildName " " nameLength probabilityTable
(Char.ToUpper lowerCaseName.[1] |> Char.ToString) + lowerCaseName.[2..]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment