Skip to content

Instantly share code, notes, and snippets.

@remeniuk
Last active August 29, 2015 14:00
Show Gist options
  • Save remeniuk/11180836 to your computer and use it in GitHub Desktop.
Save remeniuk/11180836 to your computer and use it in GitHub Desktop.
Lucene memory matcher
import org.apache.lucene.index.memory.MemoryIndex
import org.joda.time.format.{DateTimeFormat, ISODateTimeFormat}
import java.util.Date
import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer
trait LuceneUserIndex extends UserLuceneAnalyzers {
self: User =>
@transient lazy val memoryIndex = {
val memoryIndex = new MemoryIndex
for {
customField <- customFields if !customField.value.isEmpty
} memoryIndex.addField(customField.id, customField.value.mkString(" "), defaultAnalyzer)
memoryIndex.addField(f_id, user_id, defaultAnalyzer)
memoryIndex.addField(f_api_key, api_key, defaultAnalyzer)
memoryIndex.addField(f_login_type, login_type.toString, defaultAnalyzer)
memoryIndex.addField(f_application_user_id, application_user_id, defaultAnalyzer)
registration_date.map(d => memoryIndex.addField(f_registration_date, LuceneMarshaller.formatDate(d), defaultAnalyzer))
memoryIndex.addField(f_events, events, eventStringAnalyzer)
memoryIndex.addField(f_platforms, platforms.mkString(" "), defaultAnalyzer)
import profile._
nick_name.map(memoryIndex.addField(f_nick_name, _, defaultAnalyzer))
first_name.map(memoryIndex.addField(f_first_name, _, defaultAnalyzer))
last_name.map(memoryIndex.addField(f_last_name, _, defaultAnalyzer))
source.map(memoryIndex.addField(f_source, _, defaultAnalyzer))
email.map(memoryIndex.addField(f_email, _, defaultAnalyzer))
birth_day.map(d => memoryIndex.addField(f_birth_day, LuceneMarshaller.formatDate(d), defaultAnalyzer))
gender.map(g => memoryIndex.addField(f_gender, g.toString, defaultAnalyzer))
lang_code.map(memoryIndex.addField(f_lang_code, _, defaultAnalyzer))
country_code.map(memoryIndex.addField(f_country, _, defaultAnalyzer))
purchaseStats.stats_first_deposit_date.map(d => memoryIndex.addField(stats_first_deposit_date.fqn, LuceneMarshaller.formatDate(d), defaultAnalyzer))
purchaseStats.stats_last_deposit_date.map(d => memoryIndex.addField(stats_last_deposit_date.fqn, LuceneMarshaller.formatDate(d), defaultAnalyzer))
purchaseStats.stats_total_purchase_amount.map(pa => memoryIndex.addField(stats_total_purchase_amount.fqn, pa.toString, defaultAnalyzer))
purchaseStats.stats_purchases_count.map(pc => memoryIndex.addField(stats_purchase_count.fqn, pc.toString, defaultAnalyzer))
onlineStats.stats_last_login_date.map(d => memoryIndex.addField(stats_last_login_date.fqn, LuceneMarshaller.formatDate(d), defaultAnalyzer))
onlineStats.stats_logins_count.map(l => memoryIndex.addField(stats_logins_count.fqn, l.toString, defaultAnalyzer))
onlineStats.stats_total_session_time_millis.map(t => memoryIndex.addField(stats_total_session_time_millis.fqn, t.toString, defaultAnalyzer))
memoryIndex
}
}
def marshal(filter: Query): lucene.search.Query = {
filter match {
case f: and =>
val parentQuery = new BooleanQuery
f.filters.foreach {
case subFilter: neql =>
parentQuery.add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST))
parentQuery.add(subFilter.toLuceneQuery, BooleanClause.Occur.MUST_NOT)
case subFilter =>
Option(marshal(subFilter))
.foreach(parentQuery.add(_, BooleanClause.Occur.MUST))
}
parentQuery
case f: or => f.filters.map(marshal)
val parentQuery = new BooleanQuery
f.filters.foreach(subFilter => parentQuery.add(marshal(subFilter), BooleanClause.Occur.SHOULD))
parentQuery
case f: in =>
val subQuery = new BooleanQuery
f.value.asInstanceOf[Iterable[Any]].foreach {
value =>
subQuery.add(
new TermQuery(new Term(s"${f.column.fqn}", value.toString)),
BooleanClause.Occur.MUST
)
}
subQuery
case p: pattern =>
val parentQuery = new SpanOrQuery
p.value.foreach {
sequence =>
parentQuery.addClause(new SpanNearQuery(
sequence.map {
regexTerm => new SpanMultiTermQueryWrapper(new RegexpQuery(new Term(p.column.fqn, regexTerm)))
} toArray, MAX_SLOP, true
))
}
parentQuery
case other => other.toLuceneQuery
}
}
trait UserLuceneAnalyzers {
protected def defaultAnalyzer =
new PatternAnalyzer(Version.LUCENE_45, PatternAnalyzer.WHITESPACE_PATTERN, false, null)
protected def eventStringAnalyzer = {
val builder = new NormalizeCharMap.Builder();
builder.add(":", "_");
val mapping = builder.build
new Analyzer {
def createComponents(fieldName: String, reader: Reader): TokenStreamComponents = {
val source = new StandardTokenizer(Version.LUCENE_45,
new MappingCharFilter(mapping, reader))
val filter = new PatternCaptureGroupTokenFilter(source,
true, Pattern.compile("(([0-9a-z]{2})(?:[0-9a-z]{1,})?)_[0-9]+"))
return new TokenStreamComponents(source, filter)
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment