SearchAbstract.java
/*
* Copyright 2019 Gregory Graham.
*
* Commercial licenses are available, please contact info@gregs.co.nz for details.
*
* This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
* To view a copy of this license, visit http://creativecommons.org/licenses/by-nc-sa/4.0/
* or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
*
* You are free to:
* Share - copy and redistribute the material in any medium or format
* Adapt - remix, transform, and build upon the material
*
* The licensor cannot revoke these freedoms as long as you follow the license terms.
* Under the following terms:
*
* Attribution -
* You must give appropriate credit, provide a link to the license, and indicate if changes were made.
* You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
* NonCommercial -
* You may not use the material for commercial purposes.
* ShareAlike -
* If you remix, transform, or build upon the material,
* you must distribute your contributions under the same license as the original.
* No additional restrictions -
* You may not apply legal terms or technological measures that legally restrict others from doing anything the
* license permits.
*
* Check the Creative Commons website for any details, legalese, and updates.
*/
package nz.co.gregs.dbvolution.expressions.search;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import nz.co.gregs.dbvolution.expressions.AnyExpression;
import nz.co.gregs.dbvolution.expressions.NumberExpression;
import nz.co.gregs.dbvolution.expressions.StringExpression;
import nz.co.gregs.dbvolution.results.ExpressionHasStandardStringResult;
/**
* Standardised searching using string terms and expression aliases.
*
* <p>
* Designed to provide easy access to complex user-driven searching such as
* 'terminator -schwarzenagger "come with me if" desc:quote author:+"james
* cameron"'.</p>
*
* <p>
* Search terms can be single words or sequence, or quoted phrases. Terms can
* also be prioritized with + and - and restricted to a single column using an
* alias followed by a colon (alias:term). Searching for any empty value can be
* done with an alias followed by empty quotes, for example description:""</p>
*
* <p>
* Use with a single column using {@link StringExpression#searchFor(nz.co.gregs.dbvolution.expressions.search.SearchString)
* } and {@link StringExpression#searchForRanking(nz.co.gregs.dbvolution.expressions.search.SearchString)
* }: marq.column(marq.name).searchFor(searchString). If you have individual
* strings use
* {@link StringExpression#searchFor(java.lang.String...)} and {@link StringExpression#searchForRanking(java.lang.String...) }.</p>
*
* <p>
* searchForRanking produces a number value that can be used for sorting. </p>
*
* @author gregorygraham
*/
public abstract class SearchAbstract {
protected String searchString;
private static final Pattern TERM_PATTERN = Pattern.compile("((\\w+):){0,1}([+-]{0,1})((\\w+)|(\"([^\"]*)\"?))");
private static final int WHOLE_MATCH_GROUP = 0;
private static final int ALIAS_GROUP = 2;
private static final int MODE_GROUP = 3;
private static final int SIMPLE_TERM_GROUP = 4;
private static final int QUOTED_TERM_GROUP = 7;
public SearchAbstract() {
}
public SearchAbstract(String search) {
this();
setSearchString(search);
}
protected SearchAbstract setSearchString(String search) {
this.searchString = search;
return this;
}
public SearchAbstract addToSearchString(String string) {
if (getSearchString()==null||getSearchString().isEmpty()) {
this.searchString = string;
} else {
this.searchString += " " + string;
}
return this;
}
public SearchAbstract addQuotedTermToSearchString(String string) {
return addToSearchString("\"" + string + "\"");
}
public SearchAbstract addPreferredTermToSearchString(String string) {
for (String str : string.split(" ")) {
addToSearchString("+" + str);
}
return this;
}
public SearchAbstract addReducedTermToSearchString(String string) {
for (String str : string.split(" ")) {
addToSearchString("-" + str);
}
return this;
}
protected final Term[] getSearchTerms() {
List<Term> terms = new ArrayList<>();
Matcher matcher = TERM_PATTERN.matcher(getSearchString());
while (matcher.find()) {
String all = matcher.group(WHOLE_MATCH_GROUP);
String alias = matcher.group(ALIAS_GROUP);
String modeStr = matcher.group(MODE_GROUP);
Mode mode = modeStr.equals("+") ? Mode.PLUS : modeStr.equals("-") ? Mode.MINUS : Mode.NORMAL;
String simpleTerm = matcher.group(SIMPLE_TERM_GROUP);
String quotedTerm = matcher.group(QUOTED_TERM_GROUP);
if (quotedTerm != null) {
terms.add(new Term(quotedTerm, true, mode, alias));
} else {
terms.add(new Term(simpleTerm, false, mode, alias));
}
}
return terms.toArray(new Term[]{});
}
protected final NumberExpression getRankingExpression(ExpressionAlias col) {
final AnyExpression<?, ?, ?> column = col.getExpr();
if (column instanceof ExpressionHasStandardStringResult) {
StringExpression stringExpression = ((ExpressionHasStandardStringResult) column).stringResult();
NumberExpression expr = new NumberExpression(0);
final Term[] searchTerms = this.getSearchTerms();
for (SearchAcross.Term term : searchTerms) {
NumberExpression newExpr = getRankingExpressionForTerm(stringExpression, term, col.getAlias());
expr = expr.plus(newExpr);
}
return expr;
}
return NumberExpression.value(-1.0);
}
protected final NumberExpression getRankingExpressionForTerm(StringExpression stringExpression, Term term, String columnAlias) {
if (term.hasString()
&& (term.hasNoAlias() || term.aliasMatches(columnAlias))) {
NumberExpression newExpr
= // the term exactly is worth the normal value
stringExpression.contains(term.getString()).ifThenElse(term.getValue(), 0.0);
// exactly as a word is worth twice the value
newExpr = newExpr.plus(stringExpression.contains(" " + term.getString() + " ").ifThenElse(term.getValue() * 2, 0.0));
// as a case-insensitive word is worth the normal value
newExpr = newExpr.plus(stringExpression.containsIgnoreCase(" " + term.getString() + " ").ifThenElse(term.getValue(), 0.0));
// as a case-insensitive sequence is worth half the value
newExpr = newExpr.plus(stringExpression.containsIgnoreCase(term.getString()).ifThenElse(term.getValue() / 2, 0.0));
return newExpr;
} else if (term.hasAlias() && term.isQuoted() && term.hasNoString() && term.aliasMatches(columnAlias)) {
return stringExpression.is("").ifThenElse(term.getValue(), 0);
} else {
return new NumberExpression(0);
}
}
/**
* @return the searchString
*/
protected final String getSearchString() {
return searchString;
}
protected SearchAbstract addAliasedTermToSearchString(String string, String alias) {
addToSearchString(alias + ":" + string);
return this;
}
public static class Term {
private final double value;
private final String string;
private final String alias;
private final boolean isQuoted;
private final Mode mode;
public static final String EMPTY_ALIAS = "";
private Term(String string, boolean isQuoted, Mode mode, String alias) {
this.string = string;
this.mode = mode;
this.isQuoted = isQuoted;
this.alias = alias == null ? EMPTY_ALIAS : alias.isEmpty() ? EMPTY_ALIAS : alias;
this.value = calculateValue();
}
public String getString() {
return string;
}
public double getValue() {
return value;
}
public boolean hasNoAlias() {
return alias.equals(EMPTY_ALIAS);
}
public final boolean hasAlias() {
return !hasNoAlias();
}
private static final Pattern WHITESPACE = Pattern.compile("\\s");
public boolean hasNoString() {
return isInvalidTerm(string);
}
public static boolean isValidTerm(String termString) {
return !isInvalidTerm(termString);
}
public static boolean isInvalidTerm(String termString) {
return WHITESPACE.matcher(termString).replaceAll("").isEmpty();
}
public final boolean hasString() {
return !hasNoString();
}
public String getAlias() {
return alias;
}
static final double CONTAINS_EXACT_MATCH_VALUE = 10000;
static final double CONTAINS_INSENSITIVE_MATCH_VALUE = 1000;
static final double CONTAINS_QUOTED_SEARCH_WORD_VALUE = 100;
static final double CONTAINS_WANTED_SEARCH_WORD_VALUE = 100;
static final double CONTAINS_SEARCH_WORD_VALUE = 10;
static final double CONTAINS_UNWANTED_SEARCH_WORD_VALUE = -2;
private double calculateValue() {
return (11.0 + (isQuoted() ? 9 : 0))
* (hasAlias() ? 10 : 1)
* (isPlus() ? 10 : 1)
* (isMinus() ? -7 : 1);
}
public boolean isMinus() {
return this.mode.equals(Mode.MINUS);
}
public boolean isPlus() {
return this.mode.equals(Mode.PLUS);
}
public boolean isQuoted() {
return this.isQuoted;
}
public boolean aliasMatches(String columnAlias) {
return getAlias().equalsIgnoreCase(columnAlias);
}
}
public static enum Mode {
PLUS(),
MINUS(),
NORMAL();
private Mode() {
}
}
}