Last active
April 17, 2018 11:02
-
-
Save allbinmani/c1d5362cb88fb354eb9a4093b18853ca to your computer and use it in GitHub Desktop.
Markov in Java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.nio.file.Path; | |
import java.util.List; | |
import java.util.Random; | |
import java.util.ArrayList; | |
public class Markov { | |
/** Regular expression for breaking up words. */ | |
private static final String WORD_REGEX = "(?<=\\w\\W)"; | |
/** Regular expression for getting individual characters. */ | |
private static final String CHAR_REGEX = "(?<=.)"; | |
public static void main(String[] args) { | |
StringChain sc = new StringChain(2); | |
List<String> words = new ArrayList<String>(); | |
words.add("markov"); | |
words.add("chain"); | |
words.add("how"); | |
words.add("are"); | |
words.add("markov"); | |
words.add("chains"); | |
words.add("built"); | |
words.add("you"); | |
words.add("ask?"); | |
words.add("later"); | |
words.add("markov"); | |
words.add("chains"); | |
words.add("will"); | |
words.add("predict"); | |
words.add("text"); | |
sc.addItems(words.iterator()); | |
System.out.println( String.join(" ", sc.generate(10, new Random()) ) ); | |
} | |
private static void addFile(StringChain chain, String regex, Path file) { | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.HashMap; | |
import java.util.Iterator; | |
import java.util.List; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.Random; | |
import java.util.stream.Collectors; | |
public class StringChain { | |
private HashMap<String, List<Suffix>> map; | |
private int ORDER; | |
public StringChain(int order){ | |
ORDER = order; | |
map = new HashMap<String, List<Suffix>>(); | |
} | |
class Prefix { | |
public String prefix; | |
Prefix(List<String> pfx) { | |
prefix = String.join("|+|", pfx); | |
} | |
} | |
class Suffix { | |
public String suffix; | |
public int count; | |
Suffix(String sfx) { | |
suffix = sfx; | |
count = 1; | |
} | |
void increaseCount() { | |
count++; | |
} | |
} | |
private List<String> initialPrefix() { | |
ArrayList<String> prefix = new ArrayList<String>(ORDER); | |
for(int i=0; i < ORDER; i++) { | |
prefix.add("<BLANK>"); | |
} | |
return prefix; | |
} | |
public void addItems(Iterator<String>itemIter) { | |
List<String> prefix = initialPrefix(); | |
while(itemIter.hasNext()) { | |
String suffix = itemIter.next(); | |
Prefix pfx = new Prefix(prefix); | |
List<Suffix> sfxs = map.get(pfx.prefix); | |
if(sfxs == null) { | |
sfxs = new ArrayList<Suffix>(); | |
map.put(pfx.prefix, sfxs); | |
sfxs.add(new Suffix(suffix)); | |
} else { | |
// Suffix exists? | |
Suffix existing = null; | |
for(int i=0; i < sfxs.size(); i++) { | |
if(sfxs.get(i).suffix.equals(suffix)) { | |
existing = sfxs.get(i); | |
existing.increaseCount(); | |
break; | |
} | |
} | |
if(existing == null) { | |
sfxs.add(new Suffix(suffix)); | |
} | |
} | |
prefix.remove(0); | |
prefix.add(suffix); | |
} | |
} | |
public List<String> generate(int n, Random rand) { | |
List<String> prefix = initialPrefix(); | |
List<String> output = new ArrayList<String>(n); | |
for(int i=0; i < n; i++) { | |
String next = null; | |
Prefix pfx = new Prefix(prefix); | |
List<Suffix> sfxs = map.get(pfx.prefix); | |
if(sfxs == null) { | |
next = "<BLANK>"; | |
} else { | |
// Pick from sfxs | |
float sum=0; | |
float r = rand.nextFloat(); | |
int picked = 0; | |
for(int j=0; j < sfxs.size(); j++) { | |
Suffix s = sfxs.get(j); | |
sum += s.count; | |
} | |
for(int j=0; j < sfxs.size(); j++) { | |
Suffix s = sfxs.get(j); | |
float prob = (float)s.count / sum; | |
if(prob > r) { | |
picked = j; | |
break; | |
} | |
} | |
next = sfxs.get(picked).suffix; | |
} | |
if(next != null) { | |
output.add(next); | |
prefix.remove(0); | |
prefix.add(next); | |
} | |
} | |
return output; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment