Skip to content

Commit

Permalink
use a bitset to create sorted int set between collectLimit and smallS…
Browse files Browse the repository at this point in the history
…etSize
  • Loading branch information
yonik committed Sep 6, 2014
1 parent 41c997f commit be24e9a
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 13 deletions.
46 changes: 33 additions & 13 deletions solr/core/src/java/org/apache/solr/search/DedupDocSetCollector.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Scorer;
import org.apache.solr.core.HS;

Expand All @@ -33,15 +34,18 @@
public class DedupDocSetCollector extends Collector implements AutoCloseable {
private long buffer;
private BitDocSetNative bits;
private int globalPos = 0;
private int pos=0;
private final int maxDoc;
private final int smallSetSize;
private final int collectLimit;
private int base;
private final int bufferSize = HS.BUFFER_SIZE_BYTES >>> 2;
private List<Long> bufferList;

public DedupDocSetCollector(int smallSetSize, int maxDoc) {
this.smallSetSize = smallSetSize;
this.collectLimit = Math.min((smallSetSize>>1) + 5, smallSetSize);
this.maxDoc = maxDoc;
allocBuffer();
}
Expand All @@ -62,21 +66,22 @@ public void collect(int doc) throws IOException {
}

private int bufferedSize() {
int nBuffers = bufferList==null ? 0 : bufferList.size();
return (nBuffers * bufferSize) + pos;
return globalPos + pos;
}

private void newBuffer() {
if (bits == null && bufferedSize() > smallSetSize) {
bits = new BitDocSetNative(maxDoc);
assert pos == bufferSize;
globalPos += pos;
pos = 0; // do this here so bufferedSize will work

if (bits == null && bufferedSize() > collectLimit) {
bits = new BitDocSetNative(maxDoc);
}

// if we've already transitioned to a bitset, then just set the bits
// and reuse this buffer.
if (bits != null) {
assert pos == bufferSize;
setBits(buffer, pos);
pos = 0;
setBits(buffer, bufferSize);
return;
}

Expand All @@ -87,7 +92,6 @@ private void newBuffer() {
bufferList.add(buffer);
buffer = 0; // zero out in case allocBuffer fails
allocBuffer();
pos = 0;
}

private void setBits(long buf, int sz) {
Expand All @@ -99,11 +103,23 @@ private void setBits(long buf, int sz) {
**/
}

private static DocSet makeSmallSet(BitDocSetNative bits) throws IOException {
int numDocs = (int)bits.cardinality();
long answer = HS.allocArray(numDocs, 4, false);
DocIdSetIterator iter = bits.docIterator();
for(int i=0; i<numDocs; i++) {
int docid = iter.nextDoc();
HS.setInt(answer, i, docid);
}
assert iter.nextDoc() == DocIdSetIterator.NO_MORE_DOCS;
return new SortedIntDocSetNative(answer, numDocs);
}


public DocSet getDocSet() {
public DocSet getDocSet() throws IOException {
int sz = bufferedSize();
if (bits == null && sz > smallSetSize) {

if (bits == null && sz > collectLimit) {
bits = new BitDocSetNative(maxDoc);
}

Expand All @@ -116,9 +132,13 @@ public DocSet getDocSet() {
}
}

DocSet answer = bits;
bits = null; // null out so we know we don't need to free later
return answer;
if (sz > smallSetSize) {
DocSet answer = bits;
bits = null; // null out so we know we don't need to free later
return answer;
} else {
return makeSmallSet(bits);
}
}

// make a small set
Expand Down
1 change: 1 addition & 0 deletions solr/native/make.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ javah -d $BUILD/inc -force -classpath ${CLASSES} org.apache.solr.search.facet.Si
CPPFILES="$CLASS.cpp docset.cpp facet.cpp"
INC="$JNI_INC -I$BUILD/inc"
$GPP $DEBUG $OPT -Wall $CFLAGS $INC -shared -fPIC $CPPFILES -o $BUILD/$OUT
#$GPP -S $DEBUG $OPT -Wall $CFLAGS $INC -shared -fPIC docset.cpp

$GPP $OPT -Wall $CFLAGS $INC -fPIC $CPPFILES test.cpp -o $BUILD/test.exe
# $GPP -S $OPT -Wall $CFLAGS $INC -fPIC $CPPFILES test.cpp
Expand Down

0 comments on commit be24e9a

Please sign in to comment.