meesterdude · December 12, 2015 09:59
diff --git a/_before.rb b/_before.rb
 require "semantic"
 require "pp"

 corpus = Semantic::Corpus.new([], :transforms => [:LSA])
 Dir.glob("/tmp/some_texts/*.txt").each do |file|
  corpus << Semantic::Document.new(open(file), :name => File.basename(file))
 end

 corpus.build_index

 pp corpus.documents.find { |d| d[:name] == "dog.txt" }.related
diff --git a/after.rb b/after.rb
 require "semantic"
 require "pp"

 corpus = Semantic::Corpus.new([], :transforms => [:LSA])

 # The directory /tmp/some_texts/ contains Wikipedia articles about
 # various animals (beagle.txt canidae.txt cat.txt dog.txt gecko.txt
 # german_shepherd.txt reptile.txt salamander.txt)
 Dir.glob("/tmp/some_texts/*.txt").each do |file|
  # Here we add each article to the corpus (collection of documents).
  # `:name` is an attribute we're assigning to the document. There is
  # no limit in the names and amount of attributes you can attach to
  # each document.
  corpus << Semantic::Document.new(open(file), :name => File.basename(file))
 end
 # Build the index, Depending on the CPU, available memory and most
 # importantly the size of the corpus, this can take some time.
 corpus.build_index

 # Compares the article about dogs to all other articles and returns a
 # score for each article, representing the similarity.

 # In the example output we can see, that rsemantic correctly says that
 # "dog.txt" is by far the most related to
 # {german_shepherd,beagle,canidae}.txt, and not to e.g. reptiles.
 pp corpus.documents.find { |d| d[:name] == "dog.txt" }.related
 # {#<Semantic::Corpus 8 documents, @options={:transforms=>[:LSA]}>=>
 #   [#<Semantic::SearchResult:0x0000000283db30
 #     @document=#<Semantic::Document @attributes={:name=>"gecko.txt"}>,
 #     @score=0.05935175420757509>,
 #    #<Semantic::SearchResult:0x0000000283dbd0
 #     @document=#<Semantic::Document @attributes={:name=>"reptile.txt"}>,
 #     @score=0.09322815190241214>,
 #    #<Semantic::SearchResult:0x0000000283db08
 #     @document=#<Semantic::Document @attributes={:name=>"cat.txt"}>,
 #     @score=0.14170011833185264>,
 #    #<Semantic::SearchResult:0x0000000283db80
 #     @document=#<Semantic::Document @attributes={:name=>"salamander.txt"}>,
 #     @score=0.17269285169838045>,
 #    #<Semantic::SearchResult:0x0000000283db58
 #     @document=#<Semantic::Document @attributes={:name=>"canidae.txt"}>,
 #     @score=0.2107975969277189>,
 #    #<Semantic::SearchResult:0x0000000283dba8
 #     @document=#<Semantic::Document @attributes={:name=>"beagle.txt"}>,
 #     @score=0.4041096720670102>,
 #    #<Semantic::SearchResult:0x0000000283dbf8
 #     @document=#<Semantic::Document @attributes={:name=>"german_shepherd.txt"}>,
 #     @score=0.508139138880475>,
 #    #<Semantic::SearchResult:0x0000000283dc20
 #     @document=#<Semantic::Document @attributes={:name=>"dog.txt"}>,
 #     @score=1.0000000000000688>]}
	require "semantic"
	require "pp"

	corpus = Semantic::Corpus.new([], :transforms => [:LSA])
	Dir.glob("/tmp/some_texts/*.txt").each do \|file\|
	corpus << Semantic::Document.new(open(file), :name => File.basename(file))
	end

	corpus.build_index

	pp corpus.documents.find { \|d\| d[:name] == "dog.txt" }.related