From be24e9a05a0dff1a0c37990e068eb0b21499c90e Mon Sep 17 00:00:00 2001 From: yonik Date: Sat, 6 Sep 2014 10:50:01 -0400 Subject: [PATCH] use a bitset to create sorted int set between collectLimit and smallSetSize --- .../solr/search/DedupDocSetCollector.java | 46 +++++++++++++------ solr/native/make.sh | 1 + 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/search/DedupDocSetCollector.java b/solr/core/src/java/org/apache/solr/search/DedupDocSetCollector.java index 9d569112606..75680675932 100644 --- a/solr/core/src/java/org/apache/solr/search/DedupDocSetCollector.java +++ b/solr/core/src/java/org/apache/solr/search/DedupDocSetCollector.java @@ -19,6 +19,7 @@ import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Collector; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Scorer; import org.apache.solr.core.HS; @@ -33,15 +34,18 @@ public class DedupDocSetCollector extends Collector implements AutoCloseable { private long buffer; private BitDocSetNative bits; + private int globalPos = 0; private int pos=0; private final int maxDoc; private final int smallSetSize; + private final int collectLimit; private int base; private final int bufferSize = HS.BUFFER_SIZE_BYTES >>> 2; private List bufferList; public DedupDocSetCollector(int smallSetSize, int maxDoc) { this.smallSetSize = smallSetSize; + this.collectLimit = Math.min((smallSetSize>>1) + 5, smallSetSize); this.maxDoc = maxDoc; allocBuffer(); } @@ -62,21 +66,22 @@ public void collect(int doc) throws IOException { } private int bufferedSize() { - int nBuffers = bufferList==null ? 0 : bufferList.size(); - return (nBuffers * bufferSize) + pos; + return globalPos + pos; } private void newBuffer() { - if (bits == null && bufferedSize() > smallSetSize) { - bits = new BitDocSetNative(maxDoc); + assert pos == bufferSize; + globalPos += pos; + pos = 0; // do this here so bufferedSize will work + + if (bits == null && bufferedSize() > collectLimit) { + bits = new BitDocSetNative(maxDoc); } // if we've already transitioned to a bitset, then just set the bits // and reuse this buffer. if (bits != null) { - assert pos == bufferSize; - setBits(buffer, pos); - pos = 0; + setBits(buffer, bufferSize); return; } @@ -87,7 +92,6 @@ private void newBuffer() { bufferList.add(buffer); buffer = 0; // zero out in case allocBuffer fails allocBuffer(); - pos = 0; } private void setBits(long buf, int sz) { @@ -99,11 +103,23 @@ private void setBits(long buf, int sz) { **/ } + private static DocSet makeSmallSet(BitDocSetNative bits) throws IOException { + int numDocs = (int)bits.cardinality(); + long answer = HS.allocArray(numDocs, 4, false); + DocIdSetIterator iter = bits.docIterator(); + for(int i=0; i smallSetSize) { + + if (bits == null && sz > collectLimit) { bits = new BitDocSetNative(maxDoc); } @@ -116,9 +132,13 @@ public DocSet getDocSet() { } } - DocSet answer = bits; - bits = null; // null out so we know we don't need to free later - return answer; + if (sz > smallSetSize) { + DocSet answer = bits; + bits = null; // null out so we know we don't need to free later + return answer; + } else { + return makeSmallSet(bits); + } } // make a small set diff --git a/solr/native/make.sh b/solr/native/make.sh index 98cd1400463..ac8962cffe5 100755 --- a/solr/native/make.sh +++ b/solr/native/make.sh @@ -69,6 +69,7 @@ javah -d $BUILD/inc -force -classpath ${CLASSES} org.apache.solr.search.facet.Si CPPFILES="$CLASS.cpp docset.cpp facet.cpp" INC="$JNI_INC -I$BUILD/inc" $GPP $DEBUG $OPT -Wall $CFLAGS $INC -shared -fPIC $CPPFILES -o $BUILD/$OUT +#$GPP -S $DEBUG $OPT -Wall $CFLAGS $INC -shared -fPIC docset.cpp $GPP $OPT -Wall $CFLAGS $INC -fPIC $CPPFILES test.cpp -o $BUILD/test.exe # $GPP -S $OPT -Wall $CFLAGS $INC -fPIC $CPPFILES test.cpp