Remove results below a certain score threshold in Solr/Lucene?

You could write your own Collector that would ignore collecting those documents that the scorer places below your threshold. Below is a simple example of this using Lucene.Net 2.9.1.2 and C#. You’ll need to modify the example if you want to keep the calculated score.

using System;
using System.Collections.Generic;
using Lucene.Net.Index;
using Lucene.Net.Search;

public class ScoreLimitingCollector : Collector {
    private readonly Single _lowerInclusiveScore;
    private readonly List<Int32> _docIds = new List<Int32>();
    private Scorer _scorer;
    private Int32 _docBase;

    public IEnumerable<Int32> DocumentIds {
        get { return _docIds; }
    }

    public ScoreLimitingCollector(Single lowerInclusiveScore) {
        _lowerInclusiveScore = lowerInclusiveScore;
    }

    public override void SetScorer(Scorer scorer) {
        _scorer = scorer;
    }

    public override void Collect(Int32 doc) {
        var score = _scorer.Score();
        if (_lowerInclusiveScore <= score)
            _docIds.Add(_docBase + doc);
    }

    public override void SetNextReader(IndexReader reader, Int32 docBase) {
        _docBase = docBase;
    }

    public override bool AcceptsDocsOutOfOrder() {
        return true;
    }
}

Leave a Comment