Last active
January 23, 2021 07:31
-
-
Save d2a-raudenaerde/93a490e5b0d17b2fa88862473429aeb3 to your computer and use it in GitHub Desktop.
lucene-docs-retrieval-bench
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.audenaerde.lucene; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field; | |
import org.apache.lucene.document.StringField; | |
import org.apache.lucene.index.*; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.ScoreDoc; | |
import org.apache.lucene.search.TermQuery; | |
import org.apache.lucene.search.TopDocs; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.FSDirectory; | |
import org.apache.lucene.store.NRTCachingDirectory; | |
import org.openjdk.jmh.annotations.*; | |
import org.openjdk.jmh.infra.Blackhole; | |
import org.openjdk.jmh.runner.Runner; | |
import org.openjdk.jmh.runner.RunnerException; | |
import org.openjdk.jmh.runner.options.Options; | |
import org.openjdk.jmh.runner.options.OptionsBuilder; | |
import java.io.File; | |
import java.io.IOException; | |
import java.math.BigInteger; | |
import java.security.MessageDigest; | |
import java.security.NoSuchAlgorithmException; | |
import java.util.Random; | |
import java.util.concurrent.TimeUnit; | |
@BenchmarkMode(Mode.Throughput) | |
@OutputTimeUnit(TimeUnit.SECONDS) | |
@State(Scope.Benchmark) | |
@Fork(value = 2, jvmArgs = {"-Xms2G", "-Xmx2G"}) | |
@Warmup(iterations = 2) | |
@Measurement(iterations = 4) | |
public class DocRetrievalBenchmark { | |
IndexSearcher searcher; | |
Random random; | |
public void createIndex(int version) throws IOException, NoSuchAlgorithmException | |
{ | |
File f = new File("/tmp/bench" + version); | |
if (!f.exists()) { | |
Directory index = createDirectory(f); | |
IndexWriterConfig config = new IndexWriterConfig(); | |
config.setIndexDeletionPolicy(new PersistentSnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy(), index)); | |
IndexWriter writer = new IndexWriter(index, config); | |
String value2 = "Wikipedia is hosted by the Wikimedia Foundation, a non-profit organization that also hosts a range of other projects."; | |
String value3 = "Save your favorite articles to read offline, sync your reading lists across devices and customize your reading experience with the official Wikipedia app."; | |
MessageDigest digest = MessageDigest.getInstance("MD5"); | |
for (int i = 0; i < 1_000_000; i++) { | |
digest.reset(); | |
digest.update(Integer.toString(i).getBytes()); | |
String value1 = new BigInteger(1, digest.digest()).toString(16); | |
writer.addDocument(createDocument(i, value1, value2, value3)); | |
if (i % 10_000 == 0) { | |
System.out.println(i); | |
} | |
} | |
writer.commit(); | |
index.close(); | |
} | |
random = new Random(123); | |
searcher = new IndexSearcher(DirectoryReader.open(createDirectory(f))); | |
} | |
private static Document createDocument(int i, String value1, String value2, String value3) | |
{ | |
Document document = new Document(); | |
document.add(new StringField("field1", value1, Field.Store.YES)); | |
document.add(new StringField("field2", value2 + i, Field.Store.YES)); | |
document.add(new StringField("field3", value3 + i, Field.Store.YES)); | |
document.add(new StringField("field4", String.valueOf(i % 1000), Field.Store.YES)); | |
return document; | |
} | |
static Directory createDirectory(File file) throws IOException | |
{ | |
return new NRTCachingDirectory(FSDirectory.open(file.toPath()), 1.0, 10.0); | |
} | |
public static void main(String[] args) throws RunnerException { | |
Options opt = new OptionsBuilder() | |
.include(DocRetrievalBenchmark.class.getSimpleName()) | |
.forks(1) | |
.build(); | |
new Runner(opt).run(); | |
} | |
@Setup | |
public void setup() { | |
try { | |
createIndex(87); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} catch (NoSuchAlgorithmException e) { | |
e.printStackTrace(); | |
} | |
} | |
@Benchmark | |
public void retrieveDocuments(Blackhole bh) throws IOException { | |
//We find 1000 random docs. Assumed is that searching is a lot faster than retrieval. | |
TopDocs topDocs = searcher.search(new TermQuery(new Term("field4", String.valueOf(random.nextInt(1000)))), Integer.MAX_VALUE); | |
ScoreDoc[] docs = topDocs.scoreDocs; | |
for (ScoreDoc doc : docs) | |
{ | |
bh.consume(searcher.doc(doc.doc)); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<project xmlns="http://maven.apache.org/POM/4.0.0" | |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | |
<modelVersion>4.0.0</modelVersion> | |
<groupId>org.example</groupId> | |
<artifactId>lucene-bench-docs</artifactId> | |
<version>1.0-SNAPSHOT</version> | |
<properties> | |
<maven.compiler.source>11</maven.compiler.source> | |
<maven.compiler.target>11</maven.compiler.target> | |
</properties> | |
<build> | |
<plugins> | |
<plugin> | |
<groupId>org.codehaus.mojo</groupId> | |
<artifactId>exec-maven-plugin</artifactId> | |
<executions> | |
<execution> | |
<id>run-benchmarks</id> | |
<phase>integration-test</phase> | |
<goals> | |
<goal>exec</goal> | |
</goals> | |
<configuration> | |
<classpathScope>test</classpathScope> | |
<executable>java</executable> | |
<arguments> | |
<argument>-classpath</argument> | |
<classpath/> | |
<argument>org.openjdk.jmh.Main</argument> | |
<argument>.*</argument> | |
</arguments> | |
</configuration> | |
</execution> | |
</executions> | |
</plugin> | |
</plugins> | |
</build> | |
<dependencies> | |
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core --> | |
<dependency> | |
<groupId>org.apache.lucene</groupId> | |
<artifactId>lucene-core</artifactId> | |
<version>8.7.0</version> | |
</dependency> | |
<dependency> | |
<groupId>org.openjdk.jmh</groupId> | |
<artifactId>jmh-core</artifactId> | |
<version>1.21</version> | |
</dependency> | |
<dependency> | |
<groupId>org.openjdk.jmh</groupId> | |
<artifactId>jmh-generator-annprocess</artifactId> | |
<version>1.21</version> | |
</dependency> | |
<!-- <dependency>--> | |
<!-- <groupId>org.apache.lucene</groupId>--> | |
<!-- <artifactId>lucene-core</artifactId>--> | |
<!-- <version>7.5.0</version>--> | |
<!-- </dependency>--> | |
</dependencies> | |
</project> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment