Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Partial/incremental decompression of clusters #411

Closed
wants to merge 13 commits into from
97 changes: 97 additions & 0 deletions src/decodeddatastream.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Copyright (C) 2020 Veloman Yunkan
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/

#ifndef ZIM_DECODECDATASTREAM_H
#define ZIM_DECODECDATASTREAM_H

#include "compression.h"
#include "idatastream.h"

namespace zim
{

template<typename Decoder>
class DecodedDataStream : public IDataStream
{
private: // constants
enum { CHUNK_SIZE = 1024 };
veloman-yunkan marked this conversation as resolved.
Show resolved Hide resolved

public: // functions
DecodedDataStream(std::unique_ptr<IDataStream> inputData, size_t inputSize)
: encodedDataStream_(std::move(inputData))
, inputBytesLeft_(inputSize)
, encodedDataChunk_()
{
Decoder::init_stream_decoder(&decoderState_, nullptr);
readNextChunk();
}

~DecodedDataStream()
{
Decoder::stream_end_decode(&decoderState_);
}

private: // functions
void readNextChunk()
{
const size_t n = std::min(size_t(CHUNK_SIZE), inputBytesLeft_);
encodedDataChunk_ = encodedDataStream_->readBlob(n);
inputBytesLeft_ -= n;
// XXX: ugly C-style cast (casting away constness) on the next line
decoderState_.next_in = (unsigned char*)encodedDataChunk_.data();
decoderState_.avail_in = encodedDataChunk_.size();
}

CompStatus decodeMoreBytes()
{
CompStep step = CompStep::STEP;
if ( decoderState_.avail_in == 0 )
{
if ( inputBytesLeft_ == 0 )
step = CompStep::FINISH;
else
readNextChunk();
}

return Decoder::stream_run_decode(&decoderState_, step);
}

void readImpl(void* buf, size_t nbytes) override
{
decoderState_.next_out = (unsigned char*)buf;
decoderState_.avail_out = nbytes;
while ( decoderState_.avail_out != 0 )
{
decodeMoreBytes();
}
}

private: // types
typedef typename Decoder::stream_t DecoderState;

private: // data
std::unique_ptr<IDataStream> encodedDataStream_;
size_t inputBytesLeft_; // count of bytes left in the input stream
DecoderState decoderState_;
IDataStream::Blob encodedDataChunk_;
};

} // namespace zim

#endif // ZIM_DECODECDATASTREAM_H
1 change: 1 addition & 0 deletions src/idatastream.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class IDataStream
typedef std::shared_ptr<const char> DataPtr;

public: // functions
Blob() : data_(), size_(0) {}
Blob(const DataPtr& data, size_t size) : data_(data) , size_(size) {}

const char* data() const { return data_.get(); }
Expand Down
100 changes: 100 additions & 0 deletions test/decodeddatastream.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright (C) 2020 Veloman Yunkan
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/

#include "decodeddatastream.h"
#include "bufdatastream.h"

#include "gtest/gtest.h"

namespace
{

template<class CompressionInfo>
std::string
compress(const std::string& data)
{
zim::Compressor<CompressionInfo> compressor(data.size());
compressor.init(const_cast<char*>(data.c_str()));
compressor.feed(data.c_str(), data.size());
zim::zsize_t comp_size;
const auto comp_data = compressor.get_data(&comp_size);
return std::string(comp_data.get(), comp_size.v);
}

std::string operator*(const std::string& s, unsigned N)
{
std::string result;
for (unsigned i=0; i<N; i++)
result += s;
return result;
}

std::string toString(const zim::IDataStream::Blob& blob)
{
return std::string(blob.data(), blob.size());
}

template<typename T>
class DecodedDataStreamTest : public testing::Test {
protected:
typedef T CompressionInfo;
};

using CompressionTypes = ::testing::Types<
LZMA_INFO,
ZSTD_INFO
#if defined(ENABLE_ZLIB)
,ZIP_INFO
#endif
>;

TYPED_TEST_CASE(DecodedDataStreamTest, CompressionTypes);

TYPED_TEST(DecodedDataStreamTest, justCompressedData) {
typedef typename TestFixture::CompressionInfo CompressionInfo;

const int N = 10;
const std::string s("DecodedDataStream should work correctly");
const std::string compData = compress<CompressionInfo>(s*N);
veloman-yunkan marked this conversation as resolved.
Show resolved Hide resolved

std::unique_ptr<zim::IDataStream> bds(new zim::BufDataStream(compData.data(), compData.size()));
zim::DecodedDataStream<CompressionInfo> dds(std::move(bds), compData.size());
for (int i=0; i<N; i++)
{
ASSERT_EQ(s, toString(dds.readBlob(s.size()))) << "i: " << i;
}
}

TYPED_TEST(DecodedDataStreamTest, compressedDataFollowedByGarbage) {
typedef typename TestFixture::CompressionInfo CompressionInfo;

const int N = 10;
const std::string s("DecodedDataStream should work correctly");
const std::string compData = compress<CompressionInfo>(s*N);
const std::string inputData = compData + std::string(10, '\0');

std::unique_ptr<zim::IDataStream> bds(new zim::BufDataStream(inputData.data(), inputData.size()));
zim::DecodedDataStream<CompressionInfo> dds(std::move(bds), inputData.size());
for (int i=0; i<N; i++)
{
ASSERT_EQ(s, toString(dds.readBlob(s.size()))) << "i: " << i;
}
}

} // unnamed namespace
3 changes: 2 additions & 1 deletion test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ tests = [
'compression',
'impl_find',
'idatastream',
'readerdatastreamwrapper'
'readerdatastreamwrapper',
'decodeddatastream'
]

if gtest_dep.found() and not meson.is_cross_build()
Expand Down