Created
October 5, 2011 07:38
-
-
Save metaxy/1263869 to your computer and use it in GitHub Desktop.
test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
$data = ""; | |
$lines = Array.[](1, 30, 21, 8, 5, 19, 46, 3, 22, 20, 74, 9, 1, 86, 1, 12, 12, 8, 1, 13, | |
295, 4, 25, 96, 2, 2, 327, 37, 1, 1, 9, 34, 11, 3, 3, 9, 1, 5, 2, 13, | |
20, 79, 1, 1, 1, 1, 4, 55, 1, 17, 1, 1, 1, 4, 2, 104, 12, 235, 37); | |
$linesStart = Array.new(11842); | |
$wordList = Array.new(1000); | |
def removeStuff(string) | |
return string.delete("^a-zA-Z").downcase | |
end | |
def readFile | |
counter = 0; | |
c = 0; | |
file = File.new("/home/paul/Dokumente/schule/bwinf/EffiBriest.txt", "r") | |
while (line = file.gets) | |
line = removeStuff(line); | |
$linesStart[c] = counter; | |
$data += line | |
counter += line.size; | |
c+= 1; | |
end | |
file.close | |
end | |
def readWordList | |
file = File.new("/home/paul/Dokumente/schule/bwinf/germanwordlist_1000.txt", "r") | |
c = 0; | |
while (line = file.gets) | |
$wordList[c] = line.downcase.rstrip; | |
c+= 1; | |
end | |
file.close | |
end | |
def getString(text, lines) | |
s = ""; | |
c = 0; | |
lines.each do |x| | |
c += x; | |
s += text[c-1].chr; | |
end | |
return s; | |
end | |
def getEntropy(string) | |
prob = Hash.new(0); | |
string.each_byte do |x| | |
prob[x] = string.count(x.chr).to_f / string.size.to_f | |
end | |
entropy = 0; | |
prob.each_value do |x| | |
entropy += x * Math.log(x) / Math.log(2.0) | |
end | |
entropy = -entropy; | |
return entropy; | |
end | |
def getGermanWordsCount(string) | |
counter = 0; | |
c = 0; | |
$wordList.each do |x| | |
c += 1; | |
if(string.include?(x)) | |
counter += x.size; | |
end | |
end | |
return counter; | |
end | |
def startWithGermanWord(string) | |
$wordList.each do |x| | |
if(string.start_with?(x)) | |
return true; | |
end | |
end | |
return false; | |
end | |
def dd | |
readFile() | |
readWordList() | |
maxString = 0; | |
maxCount = 0; | |
puts "start"; | |
for i in (0..$linesStart.size) | |
text = $data[$linesStart[i]..$data.size] | |
string = getString(text, $lines); | |
#entropy = getEntropy(string) | |
#count = getGermanWordsCount(string) | |
count = 0; | |
if(startWithGermanWord(string)) | |
puts string + " i = " + i.to_s + " count = " + count.to_s; | |
end | |
#if(count >= maxCount) | |
# maxCount = count; | |
# maxString = string; | |
# puts "!!! max = " + string; | |
# end | |
#if(string.rindex("ich") != nil) | |
# puts string + " i = " + i.to_s + " entropy = " + entropy.to_s; | |
#end | |
end | |
# puts maxString + "maxCount = " + maxCOunt.to_s; | |
end | |
def test | |
readWordList() | |
data = "Ganz einfach. So geweckt und temperamentvoll und beinahe leidenschaftlich sie ist, oder | |
vielleicht auch, weil sie es ist, sie gehört nicht zu denen, die so recht eigentlich auf Liebe | |
gestellt sind, wenigstens nicht auf das, was den Namen ehrlich verdient."; | |
lines = Array.[](13, 34, 7, 13, 11, 3, 9, 58, 1); | |
puts getString(removeStuff(data),lines); | |
puts removeStuff(data); | |
puts getGermanWordsCount(removeStuff(data)); | |
end | |
test(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment