Created
May 16, 2019 12:26
-
-
Save tysm/195578566e227d869d166852c5706b01 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. | |
package com.potelo.prelude.searcher; | |
import com.potelo.prelude.hitfield.HighlightRange; | |
import com.yahoo.component.chain.dependencies.After; | |
import com.yahoo.component.chain.dependencies.Before; | |
import com.yahoo.component.chain.dependencies.Provides; | |
import com.yahoo.prelude.Index; | |
import com.yahoo.prelude.IndexFacts; | |
import com.yahoo.prelude.fastsearch.FastHit; | |
import com.yahoo.prelude.hitfield.HitField; | |
import com.yahoo.prelude.searcher.JuniperSearcher; | |
import com.yahoo.search.Query; | |
import com.yahoo.search.Result; | |
import com.yahoo.search.Searcher; | |
import com.yahoo.search.result.Hit; | |
import com.yahoo.search.searchchain.Execution; | |
import com.yahoo.search.searchchain.PhaseNames; | |
import java.util.*; | |
/** | |
* Lists all HighlightRange from Juniper highlighting tags. | |
* <p> | |
* Note: This searchers only gathers information over the position of backend binary | |
* highlighting tags in highlighted fields. Based on JuniperSearcher. | |
* | |
* @author Thalles Medrado | |
*/ | |
@After(com.yahoo.prelude.searcher.JuniperSearcher.JUNIPER_TAG_REPLACING) | |
@Provides(com.potelo.prelude.searcher.JuniperMapSearcher.HIGHLIGHTING_TAG_INFORMATION) | |
public class JuniperMapSearcher extends Searcher { | |
private final static char RAW_HIGHLIGHT_CHAR = JuniperSearcher.RAW_HIGHLIGHT_CHAR; | |
private final static char RAW_SEPARATOR_CHAR = JuniperSearcher.RAW_SEPARATOR_CHAR; | |
// The name of the field containing document type | |
private static final String MAGIC_FIELD = Hit.SDDOCNAME_FIELD; | |
public static final String HIGHLIGHTING_TAG_INFORMATION = "HighlightingTagInformation"; | |
/** | |
* Produce a List of HighlightRange from Juniper highlighting tags for each field of a Hit. | |
*/ | |
@Override | |
public Result search(Query query, Execution execution) { | |
Result result = execution.search(query); // get results from previous components in the chain. | |
processHits(query.getPresentation().getBolding(), result.hits().deepIterator(), null, | |
execution.context().getIndexFacts().newSession(query)); | |
return result; | |
} | |
@Override | |
public void fill(Result result, String summaryClass, Execution execution) { | |
int worstCase = result.getHitCount(); | |
List<Hit> hits = new ArrayList<>(worstCase); | |
for (Iterator<Hit> i = result.hits().deepIterator(); i.hasNext();) { | |
Hit sniffHit = i.next(); | |
if ( ! (sniffHit instanceof FastHit)) continue; | |
FastHit hit = (FastHit) sniffHit; | |
if (hit.isFilled(summaryClass)) continue; | |
hits.add(hit); | |
} | |
execution.fill(result, summaryClass); | |
processHits(result.getQuery().getPresentation().getBolding(), hits.iterator(), summaryClass, | |
execution.context().getIndexFacts().newSession(result.getQuery())); | |
} | |
private void processHits(boolean bolding, Iterator<Hit> hitsToProcess, | |
String summaryClass, IndexFacts.Session indexFacts) { | |
while (hitsToProcess.hasNext()) { | |
Hit hit = hitsToProcess.next(); | |
Object previousFieldValue = hit.setField("hiranges", null); | |
assert(previousFieldValue == null); | |
if ( ! (hit instanceof FastHit)) continue; | |
FastHit fastHit = (FastHit) hit; | |
if (summaryClass != null && ! fastHit.isFilled(summaryClass)) continue; | |
Object searchDefinitionField = fastHit.getField(MAGIC_FIELD); | |
if (searchDefinitionField == null) continue; | |
Map<String, Object> hiRanges = new HashMap<>(); | |
for (Index index : indexFacts.getIndexes(searchDefinitionField.toString())) { | |
if (index.getDynamicSummary() || index.getHighlightSummary()) { | |
HitField fieldValue = fastHit.buildHitField(index.getName(), true); | |
if (fieldValue != null) { | |
List<HighlightRange> fieldHiRanges = gatherPositions(fieldValue, bolding); | |
if (fieldHiRanges != null && fieldHiRanges.size() > 0) | |
hiRanges.put(index.getName(), fieldHiRanges); | |
} | |
} | |
} | |
if (hiRanges.size() > 0 ) | |
hit.setField("hiranges", hiRanges); | |
} | |
} | |
private List<HighlightRange> gatherPositions(HitField field, boolean bolding) { | |
List<HighlightRange> highlightRanges = null; | |
int tagCount = 0; | |
Character lastChar = null; | |
boolean insideHighlight = false; | |
String toProcess = field.getContent(); | |
for (int i = 0; i < toProcess.length(); ++i) { | |
char key = toProcess.charAt(i); | |
switch (key) { | |
case RAW_HIGHLIGHT_CHAR: | |
highlightRanges = initHighlightRanges(highlightRanges); | |
addPosition(bolding, insideHighlight, highlightRanges, i, tagCount, lastChar); | |
insideHighlight = !insideHighlight; | |
case RAW_SEPARATOR_CHAR: | |
tagCount++; | |
break; | |
default: | |
break; | |
} | |
lastChar = key; | |
} | |
return highlightRanges; | |
} | |
private void addPosition(boolean bolding, boolean insideHighlight, List<HighlightRange> highlightRanges, | |
int i, int tagCount, Character lastChar) { | |
if (bolding) { | |
int rawPos = i - tagCount + 1; | |
if (insideHighlight) { | |
assert(highlightRanges.size() > 0); // if insideHighlight... Doesn't make sense to fail. | |
HighlightRange lastPosition = highlightRanges.get(highlightRanges.size() - 1); | |
lastPosition.setLength(rawPos - lastPosition.getOffset()); | |
} else { | |
if (lastChar != null && lastChar.equals(RAW_HIGHLIGHT_CHAR)) { | |
// if not insideHighlight and lastChar == RAW_HIGHLIGHT_CHAR | |
// means that lastChar was a boldCloseTag and now we're on a | |
// boldOpenTag, so we should merge these intervals instead of | |
// open a new HighlightRange since JuniperSearcher does the | |
// same. | |
// Due this case, we just need to reopen the last interval | |
// and let this algorithm close the tag. | |
assert(highlightRanges.size() > 0); // if lastChar.equals(RAW_HIGHLIGHT_CHAR)... | |
HighlightRange lastPosition = highlightRanges.get(highlightRanges.size() - 1); | |
lastPosition.setLength(null); | |
} | |
else { | |
// otherwise we just need to open a new boldOpenTag :3 | |
highlightRanges.add(new HighlightRange(rawPos, null)); | |
} | |
} | |
} | |
} | |
private List<HighlightRange> initHighlightRanges(List<HighlightRange> highlightRanges) { | |
if (highlightRanges == null) | |
highlightRanges = new ArrayList<>(); | |
return highlightRanges; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment