Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Streamreader #421

Merged
merged 29 commits into from
Sep 23, 2020
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
bd6b210
Introduce SharedBuffer.
mgautierfr Sep 14, 2020
da21218
Dropped MemoryViewBuffer
veloman-yunkan Sep 9, 2020
2767c04
Dropped MemoryBuffer
veloman-yunkan Sep 9, 2020
269c942
Dropped MMapBuffer
veloman-yunkan Sep 9, 2020
fe9754f
Remove SharedBuffer and make Buffer the only class to contain data.
mgautierfr Sep 14, 2020
6382f68
Blob do not depend of Buffer.
mgautierfr Sep 14, 2020
2a025ec
Do not use external shared_ptr to keep buffer memory alive.
mgautierfr Sep 14, 2020
f19fd25
Introduced zim::IDataStream
veloman-yunkan Aug 28, 2020
86ef980
IStreamReader allow to get a reader.
mgautierfr Sep 14, 2020
a4ed832
Enter DecodedDataStream
veloman-yunkan Aug 30, 2020
695fb9f
Adapt DecoderStreamReader to wrap a Reader instead of a InputStream.
mgautierfr Sep 15, 2020
227df39
zim::ReaderDataStreamWrapper
veloman-yunkan Aug 29, 2020
9c469f8
Adapt RawStreamReader to wrap a reader.
mgautierfr Sep 15, 2020
b8f3eb7
Enter BufDataStream
veloman-yunkan Aug 30, 2020
d796085
Adapt BufferStreamer to wrap a `Buffer` instead of raw data.
mgautierfr Sep 16, 2020
480780a
Got rid of read_size() in cluster.cpp
veloman-yunkan Aug 29, 2020
1b5f8e7
Make the Cluster use `IStreamReader`.
mgautierfr Sep 15, 2020
76c60b4
Make `Dirent` use BufferStreamer.
mgautierfr Sep 15, 2020
9d358d4
Make FileHeader use `BufferStreamer`.
mgautierfr Sep 15, 2020
8b83dc1
Faster Blob/Buffer constructor for non-owned data case
veloman-yunkan Sep 8, 2020
04c4020
fixup! Adapt BufferStreamer to wrap a `Buffer` instead of raw data.
mgautierfr Sep 17, 2020
39533c7
fixup! Adapt DecoderStreamReader to wrap a Reader instead of a InputS…
mgautierfr Sep 17, 2020
04843d9
fixup! Adapt RawStreamReader to wrap a reader
mgautierfr Sep 17, 2020
4672b19
Move `BufferReader` to its own file.
mgautierfr Sep 17, 2020
b3e64fe
Remove `Buffer.as` method.
mgautierfr Sep 17, 2020
8a816f2
fixup! Do not use external shared_ptr to keep buffer memo
mgautierfr Sep 23, 2020
12218e2
Rename tempfile.(cpp|h) to tools.(cpp|h)
mgautierfr Sep 23, 2020
f5e682d
Move `write_to_buffer` test function to a generic helper function.
mgautierfr Sep 23, 2020
004afcb
Remove a few useless empty lines
kelson42 Sep 23, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions include/zim/blob.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,25 @@

namespace zim
{
class Buffer;
class Blob
{
const char* _data;
size_type _size;
std::shared_ptr<const Buffer> _buffer;
public: // types
using DataPtr = std::shared_ptr<const char>;

public:
public: // functions
Blob();
Blob(const char* data, size_type size);
Blob(std::shared_ptr<const Buffer> buffer);
Blob(const DataPtr& buffer, size_type size);

operator std::string() const { return std::string(_data, _size); }
const char* data() const { return _data; }
const char* end() const { return _data + _size; }
operator std::string() const { return std::string(_data.get(), _size); }
const char* data() const { return _data.get(); }
const char* end() const { return _data.get() + _size; }
size_type size() const { return _size; }

private:
DataPtr _data;
size_type _size;

};

inline std::ostream& operator<< (std::ostream& out, const Blob& blob)
Expand Down
2 changes: 1 addition & 1 deletion include/zim/fileheader.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ namespace zim
{}

void write(int out_fd) const;
void read(std::shared_ptr<const Buffer> buffer);
void read(const Buffer& buffer);

// Do some sanity check, raise a ZimFileFormateError is
// something is wrong.
Expand Down
20 changes: 15 additions & 5 deletions src/blob.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,33 @@

namespace zim {

namespace
{

struct NoDelete
{
template<class T> void operator()(T*) {}
};

} // unnamed namespace


Blob::Blob()
: _data(0),
_size(0)
{}

Blob::Blob(const char* data, size_type size)
: _data(data),
: _data(DataPtr(data, NoDelete())),
_size(size)
{
ASSERT(size, <, SIZE_MAX);
ASSERT(data, <, (void*)(SIZE_MAX-size));
}

Blob::Blob(std::shared_ptr<const Buffer> buffer)
: _data(buffer->data()),
_size(size_type(buffer->size())),
_buffer(buffer)
Blob::Blob(const DataPtr& buffer, size_type size)
: _data(buffer),
_size(size)
{}


Expand Down
120 changes: 34 additions & 86 deletions src/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,111 +34,59 @@

namespace zim {

namespace {

class SubBuffer : public Buffer {
public:
SubBuffer(const std::shared_ptr<const Buffer> src, offset_t offset, zsize_t size)
: Buffer(size),
_data(src, src->data(offset))
{
ASSERT(offset.v, <=, src->size().v);
ASSERT(offset.v+size.v, <=, src->size().v);
}

const char* dataImpl(offset_t offset) const {
return _data.get() + offset.v;
}

private:
const std::shared_ptr<const char> _data;
namespace
{

struct NoDelete
{
template<class T> void operator()(T*) {}
};

} // unnamed namespace

std::shared_ptr<const Buffer> Buffer::sub_buffer(offset_t offset, zsize_t size) const
const Buffer Buffer::sub_buffer(offset_t offset, zsize_t size) const
{
return std::make_shared<SubBuffer>(shared_from_this(), offset, size);
ASSERT(offset.v, <=, m_size.v);
ASSERT(offset.v+size.v, <=, m_size.v);
auto sub_data = DataPtr(m_data, data(offset));
return Buffer(sub_data, size);
}

////////////////////////////////////////////////////////////////////////////////
// MemoryViewBuffer
////////////////////////////////////////////////////////////////////////////////

MemoryViewBuffer::MemoryViewBuffer(const char* buffer, zsize_t size)
: Buffer(size)
, _data(buffer)
{}

const char*
MemoryViewBuffer::dataImpl(offset_t offset) const {
return _data + offset.v;
const Buffer Buffer::makeBuffer(const DataPtr& data, zsize_t size)
{
return Buffer(data, size);
}

////////////////////////////////////////////////////////////////////////////////
// MemoryBuffer
////////////////////////////////////////////////////////////////////////////////

MemoryBuffer::MemoryBuffer(zsize_t size)
: Buffer(size)
, _data(new char[size.v])
{}

MemoryBuffer::MemoryBuffer(std::unique_ptr<char[]> buffer, zsize_t size)
: Buffer(size)
, _data(std::move(buffer))
{}

const char*
MemoryBuffer::dataImpl(offset_t offset) const {
return _data.get() + offset.v;
const Buffer Buffer::makeBuffer(const char* data, zsize_t size)
{
return Buffer(DataPtr(data, NoDelete()), size);
}

////////////////////////////////////////////////////////////////////////////////
// MMapBuffer
////////////////////////////////////////////////////////////////////////////////

#ifdef ENABLE_USE_MMAP
MMapBuffer::MMapBuffer(int fd, offset_t offset, zsize_t size):
Buffer(size),
_offset(0)
Buffer Buffer::makeBuffer(zsize_t size)
{
offset_t pa_offset(offset.v & ~(sysconf(_SC_PAGE_SIZE) - 1));
_offset = offset-pa_offset;
#if defined(__APPLE__) || defined(__OpenBSD__)
#define MAP_FLAGS MAP_PRIVATE
#elif defined(__FreeBSD__)
#define MAP_FLAGS MAP_PRIVATE|MAP_PREFAULT_READ
#else
#define MAP_FLAGS MAP_PRIVATE|MAP_POPULATE
#endif
#if !MMAP_SUPPORT_64
if(pa_offset.v >= INT32_MAX) {
throw MMapException();
}
#endif
_data = (char*)mmap(NULL, size.v + _offset.v, PROT_READ, MAP_FLAGS, fd, pa_offset.v);
if (_data == MAP_FAILED )
{
std::ostringstream s;
s << "Cannot mmap size " << size.v << " at off " << offset.v << " : " << strerror(errno);
throw std::runtime_error(s.str());
}
#undef MAP_FLAGS
return Buffer(DataPtr(new char[size.v], std::default_delete<char[]>()), size);
}

MMapBuffer::~MMapBuffer()
Buffer::Buffer(const DataPtr& data, zsize_t size)
: m_size(size),
m_data(data)
{
munmap(_data, size_.v + _offset.v);
ASSERT(m_size.v, <, SIZE_MAX);
}

const char*
MMapBuffer::dataImpl(offset_t offset) const
{
offset += _offset;
return _data + offset.v;
Buffer::data(offset_t offset) const {
ASSERT(offset.v, <=, m_size.v);
return m_data.get() + offset.v;
}

char*
Buffer::data(offset_t offset) {
ASSERT(offset.v, <=, m_size.v);
// We know we can do this cast as the only way to get a non const Buffer is
// to use the factory allocating the memory for us.
return const_cast<char*>(m_data.get() + offset.v);
}

#endif // ENABLE_USE_MMAP

} //zim
88 changes: 22 additions & 66 deletions src/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,89 +29,45 @@
#include "zim_types.h"
#include "endian_tools.h"
#include "debug.h"
#include <zim/blob.h>

namespace zim {

class Buffer : public std::enable_shared_from_this<Buffer> {
public:
explicit Buffer(zsize_t size)
: size_(size)
{
ASSERT(size_.v, <, SIZE_MAX);
};
class Buffer {
public: // types
typedef std::shared_ptr<const char> DataPtr;

Buffer(const Buffer& ) = delete;
void operator=(const Buffer& ) = delete;
public: // functions
static const Buffer makeBuffer(const char* data, zsize_t size);
static const Buffer makeBuffer(const DataPtr& data, zsize_t size);
Comment on lines +41 to +42
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const qualifiers on return types offer no real protection if we talk about return-by-value of a type with reference semantics

static Buffer makeBuffer(zsize_t size);

virtual ~Buffer() {};
const char* data(offset_t offset=offset_t(0)) const {
ASSERT(offset.v, <=, size_.v);
return dataImpl(offset);
}
const char* data(offset_t offset=offset_t(0)) const;
char* data(offset_t offset=offset_t(0));

char at(offset_t offset) const {
return *(data(offset));
return *(data(offset));
}
zsize_t size() const { return size_; }
std::shared_ptr<const Buffer> sub_buffer(offset_t offset, zsize_t size) const;
zsize_t size() const { return m_size; }
const Buffer sub_buffer(offset_t offset, zsize_t size) const;

template<typename T>
T as(offset_t offset) const {
ASSERT(offset.v, <, size_.v);
ASSERT(offset.v+sizeof(T), <=, size_.v);
ASSERT(offset.v, <, m_size.v);
ASSERT(offset.v+sizeof(T), <=, m_size.v);
return fromLittleEndian<T>(data(offset));
}
mgautierfr marked this conversation as resolved.
Show resolved Hide resolved

protected:
virtual const char* dataImpl(offset_t offset) const = 0;

protected:
const zsize_t size_;
};


class MemoryViewBuffer : public Buffer {
public:
MemoryViewBuffer(const char* buffer, zsize_t size);
operator Blob() const { return Blob(m_data, m_size.v); }

protected:
const char* dataImpl(offset_t offset) const;
private: // functions
Buffer(const DataPtr& data, zsize_t size);

protected:
const char* const _data;
private: // data
zsize_t m_size;
DataPtr m_data;
};

class MemoryBuffer : public Buffer {
public:
explicit MemoryBuffer(zsize_t size);
MemoryBuffer(std::unique_ptr<char[]> buffer, zsize_t size);

char* buf() { return _data.get(); }

protected:
const char* dataImpl(offset_t offset) const;

private:
const std::unique_ptr<char[]> _data;
};


#ifdef ENABLE_USE_MMAP
class MMapException : std::exception {};

class MMapBuffer : public Buffer {
public:
MMapBuffer(int fd, offset_t offset, zsize_t size);
~MMapBuffer();

const char* dataImpl(offset_t offset) const;

private:
offset_t _offset;
char* _data;
};
#endif

};
} // zim namespace

#endif //ZIM_BUFFER_H_
11 changes: 6 additions & 5 deletions src/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ zsize_t read_size(const Reader* reader, bool isExtended, offset_t offset)
return _read_size<uint32_t>(reader, offset);
}

std::shared_ptr<const Buffer>
const Buffer
getClusterBuffer(const Reader& zimReader, offset_t offset, CompressionType comp)
{
zsize_t uncompressed_size(0);
Expand All @@ -80,7 +80,8 @@ getClusterBuffer(const Reader& zimReader, offset_t offset, CompressionType comp)
default:
throw std::logic_error("compressions should not be something else than zimcompLzma, zimComZip or zimcompZstd.");
}
return std::make_shared<MemoryBuffer>(std::move(uncompressed_data), uncompressed_size);
auto shared_data = std::shared_ptr<const char>(uncompressed_data.release(), std::default_delete<char[]>());
return Buffer::makeBuffer(shared_data, uncompressed_size);
}

std::unique_ptr<const Reader>
Expand Down Expand Up @@ -161,7 +162,7 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp
offset_t current = offset_t(sizeof(OFFSET_TYPE));
while (--n_offset)
{
OFFSET_TYPE new_offset = buffer->as<OFFSET_TYPE>(current);
OFFSET_TYPE new_offset = buffer.as<OFFSET_TYPE>(current);
ASSERT(new_offset, >=, offset);
ASSERT(new_offset, <=, reader->size().v);

Expand All @@ -187,7 +188,7 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp
return Blob();
}
auto buffer = reader->get_buffer(offsets[blob_index_type(n)], blobSize);
return Blob(buffer);
return buffer;
} else {
return Blob();
}
Expand All @@ -206,7 +207,7 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp
}
offset += offsets[blob_index_type(n)];
auto buffer = reader->get_buffer(offset, size);
return Blob(buffer);
return buffer;
} else {
return Blob();
}
Expand Down
Loading