Last active
July 25, 2024 16:33
-
-
Save thomasjslone/784062d2716e07842aa224eff9402595 to your computer and use it in GitHub Desktop.
redid my old dictionary database with efficiency in mind
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#@# dictionarydatabank.rb - version 1.0 tested on 2024.07.26 - thomas j slone | |
## a sort of database, designed for dictionary builders, store billions of words by dividing massive arrays into bank files of limited size. | |
## final todos | |
## | |
## enforce add string size limit | |
## add a config file for banks so when reloading bank and string sizes dont have to be given only when making a bank for the first time | |
## add methods for geting and setting size limits and possibly renaming bank though that literally requires moving the entire directory to rename it | |
class Bank | |
def initialize *args # dir, name, banksize, entrysize, save_frequency | |
##validate init arguments | |
if args[0].to_s.length < 1 ; raise "ARGUEMENT ERROR: args[0] dir is not a valid string." ; end | |
if args[1].to_s.length < 1 ; raise "ARGUEMENT ERROR: args[1] name is not a valid string." ; end ##we could allow name to be nil it wont affect dir path other than there being two slashes | |
if args.length > 2 | |
if args[2].is_a?(Integer) == false ; raise "ARGUEMENT ERROR: args[2]: banksize is not an integer." | |
else ; if args[2] < 99 ; raise "ARGUEMENT ERROR: args[2] banksize: must be larger than 98." ; end | |
end | |
if args[3].is_a?(Integer) == false ; raise "ARGUEMENT ERROR: args[3]: entrysize is not an integer." | |
else ; if args[3] < 1 ; raise "ARGUEMENT ERROR: args[3] entrysize: must be larger than 0." ; end | |
end | |
if args[4].is_a?(Integer) == false ; raise "ARGUEMENT ERROR: args[4]: savefrequency must be an integer." | |
else ; if args[4] < 1 ; raise "ARGUEMENT ERROR: args[4] must be larger than 0." | |
end | |
end | |
##check directory | |
@dir = args[0].to_s+"/"+args[1].to_s | |
if File.directory?(@dir) == false | |
begin ; Dir.mkdir(@dir) | |
rescue ; raise "ERROR: UNABLE TO MAKE DIRECTORY: " + @dir | |
end | |
end | |
##set banksize if provided | |
if args[2] == nil | |
@bank_size_limit = 1000000 ##bytes | |
else | |
@bank_size_limit = args[1].to_i | |
end | |
##set entrysize if provided | |
if args[3] == nil | |
@entry_size_limit = 200 | |
else | |
@entry_size_limit = args[3].to_i | |
end | |
##set savefrequency if provided | |
if args[4] == nil | |
@save_frequency = 1000 | |
else | |
@save_frequency = args[4] | |
end | |
## possibly add options for file encoding and illegal/legal characters | |
@bank = nil | |
@bank_data = [] | |
@bank_size = 0 | |
@bank_seperator = "\n" | |
@unsaved_changes = 0 | |
## we do not use file extensions making referencing names in @banks using their index number simple | |
@banks = Dir.entries(@dir) ; @banks.delete(".") ; @banks.delete("..") | |
end | |
## still havent perfected checking size bank would be before/after adding, right now we take into account the size after | |
## which would cause a new bank to be created while the current one would still have space and be considered open | |
## one solution to this could be optimising banks(search later banks for words that will fit in earlier banks to make them full) | |
def add(string) ##add a string to bank | |
if @bank_size >= @bank_size_limit and (@bank_size+string.to_s.length > @bank_size_limit) ## we might only need to check the later | |
self.load_open_bank | |
end | |
@bank_data << string.to_s | |
@bank_size += string.length | |
@unsaved_changes +=1 | |
return true | |
end | |
def remove(string) | |
if @bank_data.delete(string.to_s) == nil ; return false | |
else ; @unsaved_changes += 1 ; return true | |
end | |
end | |
############################################################### | |
############################################################### | |
## THIS METHOD IS INCOMPLETE | |
def bank_add(bank,string) | |
if @banks.include?(bank.to_s) | |
begin | |
size = File.size(@dir+"/"+bank.to_s) | |
size += string.to_s.length | |
if size > @bank ;;end | |
########### figure out which one to check, if we only check one banks might go over, if we only check the second one they might never get full | |
rescue | |
raise "ERROR while accessing bank file: " + bank.to_s | |
end | |
else | |
raise "ARGUEMENT ERROR: no such bank: "+bank.to_s | |
end | |
end | |
############################################################### | |
############################################################### | |
## remove string from specified bank | |
def bank_remove(bank,string) | |
if @banks.include?(bank.to_s) | |
begin | |
f = File.open(@dir+"/"+bank,"r") ; dat = f.read.split(@bank_seperator) ; f.close | |
if dat.delete(string.to_s) == nil ; return false | |
else | |
f = File.open(@dir+"/"+bank.to_s,"w") ; f.write(dat.join(@bank_seperator)) ; f.close | |
return true | |
end | |
rescue | |
raise "ERROR accessing bank file: " + bank.to_s | |
end | |
else | |
raise "ARGUEMENT ERROR: no such bank: "+bank.to_s | |
end | |
end | |
## save loaded bank to file | |
def save_bank | |
if @bank == nil ; raise "Unable to save bank." ; end | |
begin | |
f = File.open(@dir+"/"+@bank.to_s,"w") | |
f.write(@bank_data.join(@bank_seperator)) | |
f.close | |
@unsaved_changes = 0 | |
return true | |
rescue | |
raise "ERROR occured while saving bank: " + @bank.to_s | |
end | |
end | |
##load specified bank | |
def load_bank(bank) | |
if File.file?(@dir+"/"+bank) == false | |
return false | |
end | |
begin | |
@bank = bank | |
f = File.open(@dir+"/"+@bank.to_s,"r") | |
dat = f.read | |
f.close | |
@bank_data = dat.split(@bank_seperator) ; f.close | |
@bank_size = dat.length | |
@unsaved_changes = 0 | |
return @bank | |
rescue | |
raise "ERROR: Unable to load bank data: " + bank.to_s | |
end | |
end | |
## unload current bank from memory, use if working with multiple massive banks | |
def unload_bank | |
@bank = nil | |
@bank_data = [] | |
@bank_size = 0 | |
@unsaved_changes = false | |
return true | |
end | |
## find a bank that isnt full and load it, create one if all or full or none exist | |
def load_open_bank | |
banks = Dir.entries(@dir) ; banks.delete(".") ; banks.delete("..") | |
if banks.length == 0 ## no banks exist create one | |
f = File.open(@dir+"/0","w"); f.close | |
@banks << "0" | |
self.load_bank("0") | |
else | |
##remove current bank from search list if loaded and full | |
if @bank != nil | |
if @bank_size >= @bank_size_limit ; banks.delete(@bank) ; end | |
end | |
open = false | |
banks.each do |b| | |
if File.size(@dir+"/"+b.to_s) < @bank_size_limit ## found open bank | |
open = b.split(".")[0] ; break | |
end | |
end | |
if open != false ## load found bank | |
self.load_bank(open) | |
else ## no open banks found create one | |
i = banks[-1].to_i ; i = i + 1 | |
f = File.open(@dir+"/"+i.to_s,"w") ; f.close | |
@banks << i.to_s | |
self.load_bank(i.to_s) | |
end | |
end | |
end | |
##mostly unused because this is already implemented in load open bank but hey, dont say i didnt give you the option | |
def new_bank | |
banks = Dir.entries(@dir) ; banks.delete(".") ; banks.delete("..") | |
if banks.length == 0 ; i = 0 | |
else ; i = banks[-1].to_i ; i = i + 1 | |
end | |
f = File.open(@dir+"/"+i.to_s,"w") ; f.close | |
@banks << i.to_s | |
return i | |
end | |
## delete a specified bank | |
def delete_bank(bank) | |
if @banks.include?(bank) and File.file?(@dir+"/"+bank) | |
begin ##a way to improve this later would be to check file permissions instead of just trying to delete then cleaning memory if successfull | |
File.delete(@dir+"/"+bank) | |
if @bank.to_s == bank.to_s ; self.unload_bank ; end ##if bank to be deleted is loaded, unload it | |
@banks.delete(bank) | |
return true | |
rescue | |
raise "ERROR: Unable to delete bank file: "+bank.to_s | |
end | |
else | |
raise "ARGUEMENT ERROR: No such bank: "+bank.to_s | |
end | |
end | |
##delete everything inside a bank but not the bank file | |
def empty_bank(bank) | |
if @bank == bank.to_s ##empty loaded bank and save | |
@bank_data = [] | |
@bank_size = 0 | |
self.save_bank | |
return true | |
elsif @banks.include?(bank.to_s) ##check bank files and empty the specified one | |
begin | |
f = File.open(@dir+"/"+bank.to_s,"w") ; f.write("") ; f.close | |
rescue | |
raise "ERROR accessing bank file: "+bank.to_s | |
end | |
else | |
raise "ARGUEMENT ERROR: No such bank: "+bank.to_s | |
end | |
end | |
##check loaded bank for string | |
def include(string) | |
return @bank_data.include?(string) | |
end | |
##check specified bank for string | |
def bank_include(bank,string) | |
if @banks.include?(bank.to_s) | |
found = false | |
begin | |
f = File.open(@dir+"/"+bank.to_s,"r") ; found = f.read.split(@bank_seperator).include?(string.to_s) ; f.close | |
rescue | |
raise "ERROR accessing bank: "+bank.to_s | |
end | |
return found | |
else | |
raise "ARGUEMENT ERROR, no such bank: "+bank.to_s | |
end | |
end | |
## check all bank files for string, returns false if not found or an array of banks string was found in | |
def banks_include(string) | |
found = [] | |
if @banks.length == 0 ; return false ; end ## no bank files, abort | |
banks = @banks | |
if self.include(string.to_s) ##check loaded bank first, if none loaded doesnt matter will just move on | |
found << @bank | |
banks.delete(@bank) | |
end | |
if banks.length != 0 | |
banks.each do |b| | |
if self.bank_include(string.to_s) | |
found << b | |
end | |
end | |
end | |
if found.length == 0 ; return false | |
else ; return found | |
end | |
end | |
## search loaded bank and return index of string, return an array of all instances found | |
def search(string) | |
if @bank_data.include?(string) | |
found = [] | |
@bank_data.each do |i| | |
if i == string ; found << @bank_data.index(i) ; end | |
end | |
return found | |
else | |
return false | |
end | |
end | |
## search specified bank and return an array of indexes for all instances found or false if none | |
def search_bank(bank,string) | |
if File.file?(@dir+"/"+bank) | |
found = [] | |
f = File.open(@dir + "/" + bank,"r") ; dat = f.read.split(@bank_seperator) ; f.close | |
dat.each do |i| | |
if i == string ; found << dat.index(string) ; end | |
end | |
if found.length == 0 ; return false | |
else ; return found | |
end | |
else | |
raise "No such bank file: " + bank.to_s | |
end | |
end | |
## search all banks for string, return an array of banks and indexes of all instances found or false if none | |
def search_banks(string) | |
found = [] | |
banks = @banks | |
## check current bank first if loaded | |
if @bank != nil and @bank_data.length > 0 | |
@bank_data.each do |i| | |
if i == string ; found << [@bank,@bank_data.index(string)] ; end | |
end | |
banks.delete(@bank) | |
end | |
## check remaining banks | |
if banks.length > 0 | |
banks.each do |b| | |
f = File.open(@dir+"/"b,"r") ; dat = f.read.split(@bank_seperator) ; f.close | |
dat.each do |i| | |
if i == string ; found << [b,dat.index[i]] | |
end | |
end | |
end | |
if found.length == 0 ; return false | |
else ; return found | |
end | |
end | |
## get size of current bank in bytes, this size does not include final file size with included data seperator | |
def size | |
return @bank_size | |
end | |
##get size of a bank file, this one does include seperator (bank_data.length-1 * bank_seperator.length) will tell you how many bytes your seperator will add | |
def bank_size(bank) | |
if @banks.include?(bank.to_s) | |
return File.size(@dir+"/"+bank.to_s) | |
else | |
raise "ARGUEMENT ERROR no such bank: "+bank.to_s | |
end | |
end | |
## beware if current bank is not saved size may not be accurate to the total file size | |
def banks_size | |
if @banks.length == 0 ; return 0 ; end | |
total = 0 | |
if @bank != nil | |
total += @bank_size | |
banks.delete(@bank) | |
end | |
if banks.length == 0 | |
return total | |
else | |
banks.each do |b| | |
begin ; total += File.size(@dir+"/"+b) | |
rescue ; raise "ERROR occured while accessing bank file: "+b.to_s | |
end | |
end | |
return total | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment