Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Partial/incremental decompression of clusters #411

Closed
wants to merge 13 commits into from
33 changes: 33 additions & 0 deletions src/idatastream.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright (C) 2020 Veloman Yunkan
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/

#include "idatastream.h"

namespace zim
{

IDataStream::Blob
IDataStream::readBlobImpl(size_t size)
{
std::shared_ptr<char> buf(new char[size], std::default_delete<char[]>());
readImpl(buf.get(), size);
return Blob(buf, size);
}

} // namespace zim
116 changes: 116 additions & 0 deletions src/idatastream.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
* Copyright (C) 2020 Veloman Yunkan
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/

#ifndef ZIM_IDATASTREAM_H
#define ZIM_IDATASTREAM_H

#include <exception>
#include <memory>

#include "endian_tools.h"

namespace zim
{

// IDataStream is a simple interface for sequential iteration over a stream
// of values of built-in/primitive types and/or opaque binary objects (blobs).
// An example usage:
//
// void foo(IDataStream& s)
// {
// const uint32_t n = s.read<uint32_t>();
// for(uint32_t i=0; i < n; ++i)
// {
// const uint16_t blobSize = s.read<uint16_t>();
// IDataStream::Blob blob = s.readBlob(blobSize);
// bar(blob, blobSize);
// }
// }
//
class IDataStream
{
public: // types
class Blob
{
private: // types
typedef std::shared_ptr<const char> DataPtr;

public: // functions
Blob(const DataPtr& data, size_t size) : data_(data) , size_(size) {}

const char* data() const { return data_.get(); }
size_t size() const { return size_; }

private: // data
DataPtr data_;
size_t size_;
};
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not using zim::Blob ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My intention is to promote IDataStream::Blob to zim::Blob, so eventually we will use zim::Blob here 😉

This version of a blob implementation has several advantages over the current version of zim::Blob

  1. It is a little more lightweight
  2. and still can replace, after minor enhancements, both zim::Blob and zim::Buffer


public: // functions
virtual ~IDataStream() {}

// Reads a value of the said type from the stream
//
// For best portability this function should be used with types of known
// bit-width (int32_t, uint16_t, etc) rather than builtin types with
// unknown bit-width (int, unsigned, etc).
template<typename T> T read();

// Reads a blob of the specified size from the stream
Blob readBlob(size_t size);

private: // virtual methods
// Reads exactly 'nbytes' bytes into the provided buffer 'buf'
// (which must be at least that big). Throws an exception if
// more bytes are requested than can be retrieved.
virtual void readImpl(void* buf, size_t nbytes) = 0;

// By default a blob is returned as an independent object owning
// its own buffer. However, the function readBlobImpl() can be
// overriden so that it returns a blob referring to arbitrary
// pre-existing memory.
virtual Blob readBlobImpl(size_t size);
};

////////////////////////////////////////////////////////////////////////////////
// Implementation of IDataStream
////////////////////////////////////////////////////////////////////////////////

// XXX: Assuming that opaque binary data retrieved via 'readImpl()'
// XXX: is encoded in little-endian form.
template<typename T>
inline T
IDataStream::read()
{
const size_t N = sizeof(T);
char buf[N];
readImpl(&buf, N);
return fromLittleEndian<T>(buf); // XXX: This handles only integral types
}

inline
IDataStream::Blob
IDataStream::readBlob(size_t size)
{
return readBlobImpl(size);
}

} // namespace zim

#endif // ZIM_IDATASTREAM_H
1 change: 1 addition & 0 deletions src/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ common_sources = [
'levenshtein.cpp',
'tools.cpp',
'compression.cpp',
'idatastream.cpp',
'writer/creator.cpp',
'writer/article.cpp',
'writer/cluster.cpp',
Expand Down
58 changes: 58 additions & 0 deletions test/idatastream.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright (C) 2020 Veloman Yunkan
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/

#include "idatastream.h"

#include "gtest/gtest.h"

namespace
{

using zim::IDataStream;

// Implement the IDataStream interface in the simplest way
class InfiniteZeroStream : public IDataStream
{
void readImpl(void* buf, size_t nbytes) { memset(buf, 0, nbytes); }
};

// ... and test that it compiles and works as intended

TEST(IDataStream, read)
{
InfiniteZeroStream izs;
IDataStream& ids = izs;
EXPECT_EQ(0, ids.read<int>());
EXPECT_EQ(0L, ids.read<long>());

// zim::fromLittleEndian() handles only integer types
// EXPECT_EQ(0.0, ids.read<double>());
}

TEST(IDataStream, readBlob)
{
const size_t N = 16;
const char zerobuf[N] = {0};
InfiniteZeroStream izs;
IDataStream& ids = izs;
const IDataStream::Blob blob = ids.readBlob(N);
EXPECT_EQ(0, memcmp(blob.data(), zerobuf, N));
}

} // unnamed namespace
3 changes: 2 additions & 1 deletion test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ tests = [
'iterator',
'find',
'compression',
'impl_find'
'impl_find',
'idatastream'
]

if gtest_dep.found() and not meson.is_cross_build()
Expand Down