Skip to content

Commit

Permalink
Merge pull request #21 from oreillymedia/SPIDR-1451
Browse files Browse the repository at this point in the history
SPIDR-1451 keep highest weight, de-dupe on text AND context
  • Loading branch information
anthonygroves committed Dec 11, 2019
2 parents bc28e8f + 7feb1c7 commit f743223
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 7 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>ifpress</groupId>
<artifactId>ifpress-solr-plugin</artifactId>
<version>1.5.7</version>
<version>1.5.8</version>
<name>ifpress solr plugin</name>
<description>Contains plugins to be installed in the solr server</description>
<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,13 @@
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SafariInfixSuggester extends AnalyzingInfixSuggester {

private final boolean highlight;
private Set<BytesRef> suggestionSet;
private Map<Suggestion, Long> suggestWeightMap;
private static final Logger LOG = LoggerFactory.getLogger(SafariInfixSuggester.class);

public enum Context {
Expand All @@ -40,7 +39,7 @@ public SafariInfixSuggester(

showContext = Collections.singleton(new BytesRef(new byte[] { (byte) Context.SHOW.ordinal() }));
hideContext = Collections.singleton(new BytesRef(new byte[] { (byte) Context.HIDE.ordinal() }));
suggestionSet = new HashSet<>();
suggestWeightMap = new HashMap<>();

if (!DirectoryReader.indexExists(dir)) {
// no index in place -- build an empty one so we are prepared for updates
Expand All @@ -61,7 +60,7 @@ public void update(BytesRef bytes, long weight) throws IOException {
public void build(InputIterator iter) throws IOException {
// Reset suggestion HashSet on build
LOG.info("\n\nStarting suggestion build.");
suggestionSet = new HashSet<>();
suggestWeightMap = new HashMap<>();
super.build(iter);
}

Expand All @@ -82,12 +81,32 @@ public void add(Dictionary dict) throws IOException {
}
}

// Override add method used during SuggestComponent suggest build to filter duplicates using HashSet.
/**
* Adds suggestion, only de-duplicating for the same text AND context,
* and keeping the duplicate with the highest weight.
* Calls AnalyzingInfix's add method if adding new suggestion,
* or AnalyzingInfix's update method if updating with a higher weight for existing suggestion.
* @param text BytesRef representing the text of suggestion
* @param contexts Set<BytesRef> representing the filter contexts for the suggestion
* @param weight the long weight of suggestion
* @param payload BytesRef payload of suggestion, usually used to store more metadata about suggestion
* @throws IOException
*/
@Override
public void add(BytesRef text, Set<BytesRef> contexts, long weight, BytesRef payload) throws IOException {
if(suggestionSet.add(text)) {
Suggestion suggestion = new Suggestion(text, contexts, weight, payload);
Long currentSuggestWeight = suggestWeightMap.get(suggestion);

// Add suggestion if it has not yet been added.
if(currentSuggestWeight == null) {
suggestWeightMap.put(suggestion, weight);
super.add(text, contexts, weight, payload);
}
// If suggestion was already added with a lower weight, update suggestion with this weight
else if(currentSuggestWeight.doubleValue() < weight) {
suggestWeightMap.put(suggestion, weight);
super.update(text, contexts, weight, payload);
}
}

/*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package com.ifactory.press.db.solr.spelling.suggest;

import org.apache.lucene.util.BytesRef;

import java.util.Set;

public class Suggestion {

private String text;
private String payload;
private Set<BytesRef> contexts;
private long weight;

public Suggestion(BytesRef text, Set<BytesRef> contexts, long weight, BytesRef payload) {
this.text = text != null ? text.utf8ToString() : null;
this.contexts = contexts;
this.weight = weight;
this.payload = payload != null ? payload.utf8ToString() : null;
}

public String getText() {
return text;
}

public void setText(String text) {
this.text = text;
}

public String getPayload() {
return payload;
}

public void setPayload(String payload) {
this.payload = payload;
}

public Set<BytesRef> getContexts() {
return contexts;
}

public void setContexts(Set<BytesRef> contexts) {
this.contexts = contexts;
}

public long getWeight() {
return weight;
}

public void setWeight(long weight) {
this.weight = weight;
}

/**
* Overridden equals() method that only uses text and contexts variables for equality check.
* @param other the other Object to be checked for equality to this Suggestion
* @return true if 'other' is an instance of Suggestion and has the same text and
* contexts as this Suggestion object.
*/
@Override
public boolean equals(Object other) {
if(this == other) {
return true;
}

if(!(other instanceof Suggestion)) {
return false;
}

Suggestion otherSuggestion = (Suggestion)other;

// Intentionally only using text and context to signify that two suggestions are equal.
return this.text.equals(otherSuggestion.text) && this.contexts.equals(otherSuggestion.contexts);
}

@Override
public int hashCode() {
int result = 17;
result = 31 * result + text.hashCode();
result = 31 * result + contexts.hashCode();
return result;
}
}

0 comments on commit f743223

Please sign in to comment.