From bd6b21024ca28c9a1eb296840167e70bd229cb0e Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 14 Sep 2020 15:38:57 +0200 Subject: [PATCH 01/29] Introduce SharedBuffer. The `SharedBuffer` name is pretty badly chosen, but the plan is to make it replace `Buffer` so the name is only temporary. --- src/buffer.cpp | 31 +++++++++++++++++++++++++++++++ src/buffer.h | 16 ++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/src/buffer.cpp b/src/buffer.cpp index 955206aab..51d2475a0 100644 --- a/src/buffer.cpp +++ b/src/buffer.cpp @@ -61,6 +61,37 @@ std::shared_ptr Buffer::sub_buffer(offset_t offset, zsize_t size) return std::make_shared(shared_from_this(), offset, size); } +//////////////////////////////////////////////////////////////////////////////// +// SharedBufferBuffer +//////////////////////////////////////////////////////////////////////////////// + +namespace +{ + +struct NoDelete +{ + template void operator()(T*) {} +}; + +} // unnamed namespace + + +SharedBuffer::SharedBuffer(const char* data, zsize_t size) + : Buffer(size), + m_data(data, NoDelete()) +{} + +SharedBuffer::SharedBuffer(const DataPtr& data, zsize_t size) + : Buffer(size), + m_data(data) +{} + +const char* +SharedBuffer::dataImpl(offset_t offset) const { + return m_data.get() + offset.v; +} + + //////////////////////////////////////////////////////////////////////////////// // MemoryViewBuffer //////////////////////////////////////////////////////////////////////////////// diff --git a/src/buffer.h b/src/buffer.h index 0bc28f8b0..3833d60a2 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -70,6 +70,22 @@ class Buffer : public std::enable_shared_from_this { }; +class SharedBuffer : public Buffer { + public: // types + typedef std::shared_ptr DataPtr; + + public: // functions + SharedBuffer(const char* data, zsize_t size); + SharedBuffer(const DataPtr& data, zsize_t size); + + protected: + const char* dataImpl(offset_t offset) const; + + private: //data + DataPtr m_data; +}; + + class MemoryViewBuffer : public Buffer { public: MemoryViewBuffer(const char* buffer, zsize_t size); From da2121895cb535781d1d795ec61011a288f5f9b0 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Wed, 9 Sep 2020 19:21:45 +0400 Subject: [PATCH 02/29] Dropped MemoryViewBuffer --- src/buffer.cpp | 14 -------------- src/buffer.h | 11 ----------- src/fileimpl.cpp | 2 +- 3 files changed, 1 insertion(+), 26 deletions(-) diff --git a/src/buffer.cpp b/src/buffer.cpp index 51d2475a0..5bd84ff81 100644 --- a/src/buffer.cpp +++ b/src/buffer.cpp @@ -92,20 +92,6 @@ SharedBuffer::dataImpl(offset_t offset) const { } -//////////////////////////////////////////////////////////////////////////////// -// MemoryViewBuffer -//////////////////////////////////////////////////////////////////////////////// - -MemoryViewBuffer::MemoryViewBuffer(const char* buffer, zsize_t size) - : Buffer(size) - , _data(buffer) -{} - -const char* -MemoryViewBuffer::dataImpl(offset_t offset) const { - return _data + offset.v; -} - //////////////////////////////////////////////////////////////////////////////// // MemoryBuffer //////////////////////////////////////////////////////////////////////////////// diff --git a/src/buffer.h b/src/buffer.h index 3833d60a2..36bdc952e 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -86,17 +86,6 @@ class SharedBuffer : public Buffer { }; -class MemoryViewBuffer : public Buffer { - public: - MemoryViewBuffer(const char* buffer, zsize_t size); - - protected: - const char* dataImpl(offset_t offset) const; - - protected: - const char* const _data; -}; - class MemoryBuffer : public Buffer { public: explicit MemoryBuffer(zsize_t size); diff --git a/src/fileimpl.cpp b/src/fileimpl.cpp index f413fa97c..3b8f4505e 100644 --- a/src/fileimpl.cpp +++ b/src/fileimpl.cpp @@ -314,7 +314,7 @@ offset_t readOffset(const Reader& reader, size_t idx) while (true) { bufferDirentZone.reserve(size_type(bufferSize)); zimReader->read(bufferDirentZone.data(), indexOffset, bufferSize); - const MemoryViewBuffer direntBuffer(bufferDirentZone.data(), bufferSize); + const SharedBuffer direntBuffer(bufferDirentZone.data(), bufferSize); try { dirent = std::make_shared(direntBuffer); } catch (InvalidSize&) { From 2767c04bc7dac57cedabf9b06a3cd1547390327c Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Wed, 9 Sep 2020 19:25:21 +0400 Subject: [PATCH 03/29] Dropped MemoryBuffer --- src/buffer.cpp | 24 +++++------------------- src/buffer.h | 16 +--------------- src/cluster.cpp | 3 ++- src/file_reader.cpp | 4 ++-- test/cluster.cpp | 4 ++-- test/dirent.cpp | 7 +++---- test/header.cpp | 4 ++-- 7 files changed, 17 insertions(+), 45 deletions(-) diff --git a/src/buffer.cpp b/src/buffer.cpp index 5bd84ff81..23e24c154 100644 --- a/src/buffer.cpp +++ b/src/buffer.cpp @@ -86,31 +86,17 @@ SharedBuffer::SharedBuffer(const DataPtr& data, zsize_t size) m_data(data) {} +SharedBuffer::SharedBuffer(zsize_t size) + : Buffer(size), + m_data(new char[size.v], std::default_delete()) +{} + const char* SharedBuffer::dataImpl(offset_t offset) const { return m_data.get() + offset.v; } -//////////////////////////////////////////////////////////////////////////////// -// MemoryBuffer -//////////////////////////////////////////////////////////////////////////////// - -MemoryBuffer::MemoryBuffer(zsize_t size) - : Buffer(size) - , _data(new char[size.v]) -{} - -MemoryBuffer::MemoryBuffer(std::unique_ptr buffer, zsize_t size) - : Buffer(size) - , _data(std::move(buffer)) -{} - -const char* -MemoryBuffer::dataImpl(offset_t offset) const { - return _data.get() + offset.v; -} - //////////////////////////////////////////////////////////////////////////////// // MMapBuffer //////////////////////////////////////////////////////////////////////////////// diff --git a/src/buffer.h b/src/buffer.h index 36bdc952e..9035bb31e 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -77,6 +77,7 @@ class SharedBuffer : public Buffer { public: // functions SharedBuffer(const char* data, zsize_t size); SharedBuffer(const DataPtr& data, zsize_t size); + SharedBuffer(zsize_t size); protected: const char* dataImpl(offset_t offset) const; @@ -86,21 +87,6 @@ class SharedBuffer : public Buffer { }; -class MemoryBuffer : public Buffer { - public: - explicit MemoryBuffer(zsize_t size); - MemoryBuffer(std::unique_ptr buffer, zsize_t size); - - char* buf() { return _data.get(); } - - protected: - const char* dataImpl(offset_t offset) const; - - private: - const std::unique_ptr _data; -}; - - #ifdef ENABLE_USE_MMAP class MMapException : std::exception {}; diff --git a/src/cluster.cpp b/src/cluster.cpp index 1e2c26bf4..a4991bb69 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -80,7 +80,8 @@ getClusterBuffer(const Reader& zimReader, offset_t offset, CompressionType comp) default: throw std::logic_error("compressions should not be something else than zimcompLzma, zimComZip or zimcompZstd."); } - return std::make_shared(std::move(uncompressed_data), uncompressed_size); + auto shared_data = std::shared_ptr(uncompressed_data.release(), std::default_delete()); + return std::make_shared(shared_data, uncompressed_size); } std::unique_ptr diff --git a/src/file_reader.cpp b/src/file_reader.cpp index 0936ae92c..a3e2ec0c1 100644 --- a/src/file_reader.cpp +++ b/src/file_reader.cpp @@ -144,8 +144,8 @@ std::shared_ptr FileReader::get_buffer(offset_t offset, zsize_t si // The range is several part, or we are on Windows. // We will have to do some memory copies :/ // [TODO] Use Windows equivalent for mmap. - auto ret_buffer = std::make_shared(size); - read(ret_buffer->buf(), offset, size); + auto ret_buffer = std::make_shared(size); + read(const_cast(ret_buffer->data()), offset, size); return ret_buffer; } } diff --git a/test/cluster.cpp b/test/cluster.cpp index c7899fd27..67fb873b8 100644 --- a/test/cluster.cpp +++ b/test/cluster.cpp @@ -70,9 +70,9 @@ std::shared_ptr write_to_buffer(zim::writer::Cluster& cluster) cluster.write(tmp_fd); auto size = lseek(tmp_fd, 0, SEEK_END); - auto buf = std::make_shared(zim::zsize_t(size)); + auto buf = std::make_shared(zim::zsize_t(size)); lseek(tmp_fd, 0, SEEK_SET); - if (read(tmp_fd, buf->buf(), size) == -1) + if (read(tmp_fd, const_cast(buf->data()), size) == -1) throw std::runtime_error("Cannot read"); return buf; } diff --git a/test/dirent.cpp b/test/dirent.cpp index a26be8452..7ff6e0a7a 100644 --- a/test/dirent.cpp +++ b/test/dirent.cpp @@ -45,17 +45,16 @@ namespace using zim::unittests::TempFile; -std::unique_ptr write_to_buffer(zim::writer::Dirent& dirent) +std::shared_ptr write_to_buffer(zim::writer::Dirent& dirent) { const TempFile tmpFile("test_dirent"); const auto tmp_fd = tmpFile.fd(); dirent.write(tmp_fd); auto size = lseek(tmp_fd, 0, SEEK_END); - typedef zim::MemoryBuffer BufType; - std::unique_ptr buf(new BufType(zim::zsize_t(size))); + auto buf = std::make_shared(zim::zsize_t(size)); lseek(tmp_fd, 0, SEEK_SET); - if (read(tmp_fd, buf->buf(), size) == -1) + if (read(tmp_fd, const_cast(buf->data()), size) == -1) throw std::runtime_error("Cannot read"); return buf; diff --git a/test/header.cpp b/test/header.cpp index d475b8a94..5f8bfa8fc 100644 --- a/test/header.cpp +++ b/test/header.cpp @@ -50,9 +50,9 @@ std::shared_ptr write_to_buffer(zim::Fileheader &header) header.write(tmp_fd); auto size = lseek(tmp_fd, 0, SEEK_END); - auto buf = std::make_shared(zim::zsize_t(size)); + auto buf = std::make_shared(zim::zsize_t(size)); lseek(tmp_fd, 0, SEEK_SET); - if (read(tmp_fd, buf->buf(), size) == -1) + if (read(tmp_fd, const_cast(buf->data()), size) == -1) throw std::runtime_error("Cannot read"); return buf; } From 269c9427a46fc4621542a6a0c08e3ea2da9721ea Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Wed, 9 Sep 2020 20:02:21 +0400 Subject: [PATCH 04/29] Dropped MMapBuffer --- src/buffer.cpp | 48 -------------------------------------- src/buffer.h | 17 -------------- src/file_reader.cpp | 57 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 56 insertions(+), 66 deletions(-) diff --git a/src/buffer.cpp b/src/buffer.cpp index 23e24c154..bb692275d 100644 --- a/src/buffer.cpp +++ b/src/buffer.cpp @@ -96,52 +96,4 @@ SharedBuffer::dataImpl(offset_t offset) const { return m_data.get() + offset.v; } - -//////////////////////////////////////////////////////////////////////////////// -// MMapBuffer -//////////////////////////////////////////////////////////////////////////////// - -#ifdef ENABLE_USE_MMAP -MMapBuffer::MMapBuffer(int fd, offset_t offset, zsize_t size): - Buffer(size), - _offset(0) -{ - offset_t pa_offset(offset.v & ~(sysconf(_SC_PAGE_SIZE) - 1)); - _offset = offset-pa_offset; -#if defined(__APPLE__) || defined(__OpenBSD__) - #define MAP_FLAGS MAP_PRIVATE -#elif defined(__FreeBSD__) - #define MAP_FLAGS MAP_PRIVATE|MAP_PREFAULT_READ -#else - #define MAP_FLAGS MAP_PRIVATE|MAP_POPULATE -#endif -#if !MMAP_SUPPORT_64 - if(pa_offset.v >= INT32_MAX) { - throw MMapException(); - } -#endif - _data = (char*)mmap(NULL, size.v + _offset.v, PROT_READ, MAP_FLAGS, fd, pa_offset.v); - if (_data == MAP_FAILED ) - { - std::ostringstream s; - s << "Cannot mmap size " << size.v << " at off " << offset.v << " : " << strerror(errno); - throw std::runtime_error(s.str()); - } -#undef MAP_FLAGS -} - -MMapBuffer::~MMapBuffer() -{ - munmap(_data, size_.v + _offset.v); -} - -const char* -MMapBuffer::dataImpl(offset_t offset) const -{ - offset += _offset; - return _data + offset.v; -} - -#endif // ENABLE_USE_MMAP - } //zim diff --git a/src/buffer.h b/src/buffer.h index 9035bb31e..4e98f809d 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -86,23 +86,6 @@ class SharedBuffer : public Buffer { DataPtr m_data; }; - -#ifdef ENABLE_USE_MMAP -class MMapException : std::exception {}; - -class MMapBuffer : public Buffer { - public: - MMapBuffer(int fd, offset_t offset, zsize_t size); - ~MMapBuffer(); - - const char* dataImpl(offset_t offset) const; - - private: - offset_t _offset; - char* _data; -}; -#endif - }; #endif //ZIM_BUFFER_H_ diff --git a/src/file_reader.cpp b/src/file_reader.cpp index a3e2ec0c1..af6027bd6 100644 --- a/src/file_reader.cpp +++ b/src/file_reader.cpp @@ -32,6 +32,11 @@ #include +#ifndef _WIN32 +# include +# include +#endif + #if defined(_MSC_VER) # include # include @@ -120,6 +125,56 @@ void FileReader::read(char* dest, offset_t offset, zsize_t size) const { ASSERT(size.v, ==, 0U); } +#ifdef ENABLE_USE_MMAP +namespace +{ + +class MMapException : std::exception {}; + +char* +mmapReadOnly(int fd, offset_type offset, size_type size) +{ +#if defined(__APPLE__) || defined(__OpenBSD__) + const auto MAP_FLAGS = MAP_PRIVATE; +#elif defined(__FreeBSD__) + const auto MAP_FLAGS = MAP_PRIVATE|MAP_PREFAULT_READ; +#else + const auto MAP_FLAGS = MAP_PRIVATE|MAP_POPULATE; +#endif + + const auto p = (char*)mmap(NULL, size, PROT_READ, MAP_FLAGS, fd, offset); + if (p == MAP_FAILED ) + { + std::ostringstream s; + s << "Cannot mmap size " << size << " at off " << offset + << " : " << strerror(errno); + throw std::runtime_error(s.str()); + } + return p; +} + +SharedBuffer::DataPtr +makeMmappedBuffer(int fd, offset_t offset, zsize_t size) +{ + const offset_type pageAlignedOffset(offset.v & ~(sysconf(_SC_PAGE_SIZE) - 1)); + const size_t alignmentAdjustment = offset.v - pageAlignedOffset; + size += alignmentAdjustment; + +#if !MMAP_SUPPORT_64 + if(pageAlignedOffset >= INT32_MAX) { + throw MMapException(); + } +#endif + char* const mmappedAddress = mmapReadOnly(fd, pageAlignedOffset, size.v); + const auto munmapDeleter = [mmappedAddress, size](char* ) { + munmap(mmappedAddress, size.v); + }; + + return SharedBuffer::DataPtr(mmappedAddress+alignmentAdjustment, munmapDeleter); +} + +} // unnamed namespace +#endif // ENABLE_USE_MMAP std::shared_ptr FileReader::get_buffer(offset_t offset, zsize_t size) const { ASSERT(size, <=, _size); @@ -137,7 +192,7 @@ std::shared_ptr FileReader::get_buffer(offset_t offset, zsize_t si auto local_offset = offset + _offset - range.min; ASSERT(size, <=, part->size()); int fd = part->fhandle().getNativeHandle(); - return std::make_shared(fd, local_offset, size); + return std::make_shared(makeMmappedBuffer(fd, local_offset, size), size); } catch(MMapException& e) #endif { From fe9754f53f3e444bed539c9215f455355da45fea Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 14 Sep 2020 16:45:10 +0200 Subject: [PATCH 05/29] Remove SharedBuffer and make Buffer the only class to contain data. `Buffer` is no more a abstract class but the real class containing the data. This is mostly a (size aware) wrapper around a `shared_ptr`. --- src/buffer.cpp | 61 ++++++++++++++------------------------------- src/buffer.h | 50 +++++++++++-------------------------- src/cluster.cpp | 2 +- src/file_reader.cpp | 8 +++--- src/fileimpl.cpp | 2 +- test/cluster.cpp | 2 +- test/dirent.cpp | 2 +- test/header.cpp | 2 +- 8 files changed, 42 insertions(+), 87 deletions(-) diff --git a/src/buffer.cpp b/src/buffer.cpp index bb692275d..84424240a 100644 --- a/src/buffer.cpp +++ b/src/buffer.cpp @@ -34,37 +34,6 @@ namespace zim { -namespace { - -class SubBuffer : public Buffer { - public: - SubBuffer(const std::shared_ptr src, offset_t offset, zsize_t size) - : Buffer(size), - _data(src, src->data(offset)) - { - ASSERT(offset.v, <=, src->size().v); - ASSERT(offset.v+size.v, <=, src->size().v); - } - - const char* dataImpl(offset_t offset) const { - return _data.get() + offset.v; - } - - private: - const std::shared_ptr _data; -}; - -} // unnamed namespace - -std::shared_ptr Buffer::sub_buffer(offset_t offset, zsize_t size) const -{ - return std::make_shared(shared_from_this(), offset, size); -} - -//////////////////////////////////////////////////////////////////////////////// -// SharedBufferBuffer -//////////////////////////////////////////////////////////////////////////////// - namespace { @@ -75,24 +44,32 @@ struct NoDelete } // unnamed namespace +std::shared_ptr Buffer::sub_buffer(offset_t offset, zsize_t size) const +{ + ASSERT(offset.v, <=, m_size.v); + ASSERT(offset.v+size.v, <=, m_size.v); + auto sub_data = DataPtr(m_data, data(offset)); + return std::make_shared(sub_data, size); +} -SharedBuffer::SharedBuffer(const char* data, zsize_t size) - : Buffer(size), - m_data(data, NoDelete()) -{} - -SharedBuffer::SharedBuffer(const DataPtr& data, zsize_t size) - : Buffer(size), +Buffer::Buffer(const DataPtr& data, zsize_t size) + : m_size(size), m_data(data) +{ + ASSERT(m_size.v, <, SIZE_MAX); +} + +Buffer::Buffer(const char* data, zsize_t size) + : Buffer(DataPtr(data, NoDelete()), size) {} -SharedBuffer::SharedBuffer(zsize_t size) - : Buffer(size), - m_data(new char[size.v], std::default_delete()) +Buffer::Buffer(zsize_t size) + : Buffer(DataPtr(new char[size.v], std::default_delete()), size) {} const char* -SharedBuffer::dataImpl(offset_t offset) const { +Buffer::data(offset_t offset) const { + ASSERT(offset.v, <=, m_size.v); return m_data.get() + offset.v; } diff --git a/src/buffer.h b/src/buffer.h index 4e98f809d..3405bb61a 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -33,59 +33,37 @@ namespace zim { class Buffer : public std::enable_shared_from_this { - public: - explicit Buffer(zsize_t size) - : size_(size) - { - ASSERT(size_.v, <, SIZE_MAX); - }; + public: // types + typedef std::shared_ptr DataPtr; + + public: // functions + explicit Buffer(const char* data, zsize_t size); + explicit Buffer(const DataPtr& data, zsize_t size); + explicit Buffer(zsize_t size); Buffer(const Buffer& ) = delete; void operator=(const Buffer& ) = delete; - virtual ~Buffer() {}; - const char* data(offset_t offset=offset_t(0)) const { - ASSERT(offset.v, <=, size_.v); - return dataImpl(offset); - } + const char* data(offset_t offset=offset_t(0)) const; char at(offset_t offset) const { - return *(data(offset)); + return *(data(offset)); } - zsize_t size() const { return size_; } + zsize_t size() const { return m_size; } std::shared_ptr sub_buffer(offset_t offset, zsize_t size) const; template T as(offset_t offset) const { - ASSERT(offset.v, <, size_.v); - ASSERT(offset.v+sizeof(T), <=, size_.v); + ASSERT(offset.v, <, m_size.v); + ASSERT(offset.v+sizeof(T), <=, m_size.v); return fromLittleEndian(data(offset)); } - protected: - virtual const char* dataImpl(offset_t offset) const = 0; - - protected: - const zsize_t size_; -}; - - -class SharedBuffer : public Buffer { - public: // types - typedef std::shared_ptr DataPtr; - - public: // functions - SharedBuffer(const char* data, zsize_t size); - SharedBuffer(const DataPtr& data, zsize_t size); - SharedBuffer(zsize_t size); - - protected: - const char* dataImpl(offset_t offset) const; - private: //data + const zsize_t m_size; DataPtr m_data; }; -}; +} // zim namespace #endif //ZIM_BUFFER_H_ diff --git a/src/cluster.cpp b/src/cluster.cpp index a4991bb69..f4ea9a28b 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -81,7 +81,7 @@ getClusterBuffer(const Reader& zimReader, offset_t offset, CompressionType comp) throw std::logic_error("compressions should not be something else than zimcompLzma, zimComZip or zimcompZstd."); } auto shared_data = std::shared_ptr(uncompressed_data.release(), std::default_delete()); - return std::make_shared(shared_data, uncompressed_size); + return std::make_shared(shared_data, uncompressed_size); } std::unique_ptr diff --git a/src/file_reader.cpp b/src/file_reader.cpp index af6027bd6..fc3130963 100644 --- a/src/file_reader.cpp +++ b/src/file_reader.cpp @@ -153,7 +153,7 @@ mmapReadOnly(int fd, offset_type offset, size_type size) return p; } -SharedBuffer::DataPtr +Buffer::DataPtr makeMmappedBuffer(int fd, offset_t offset, zsize_t size) { const offset_type pageAlignedOffset(offset.v & ~(sysconf(_SC_PAGE_SIZE) - 1)); @@ -170,7 +170,7 @@ makeMmappedBuffer(int fd, offset_t offset, zsize_t size) munmap(mmappedAddress, size.v); }; - return SharedBuffer::DataPtr(mmappedAddress+alignmentAdjustment, munmapDeleter); + return Buffer::DataPtr(mmappedAddress+alignmentAdjustment, munmapDeleter); } } // unnamed namespace @@ -192,14 +192,14 @@ std::shared_ptr FileReader::get_buffer(offset_t offset, zsize_t si auto local_offset = offset + _offset - range.min; ASSERT(size, <=, part->size()); int fd = part->fhandle().getNativeHandle(); - return std::make_shared(makeMmappedBuffer(fd, local_offset, size), size); + return std::make_shared(makeMmappedBuffer(fd, local_offset, size), size); } catch(MMapException& e) #endif { // The range is several part, or we are on Windows. // We will have to do some memory copies :/ // [TODO] Use Windows equivalent for mmap. - auto ret_buffer = std::make_shared(size); + auto ret_buffer = std::make_shared(size); read(const_cast(ret_buffer->data()), offset, size); return ret_buffer; } diff --git a/src/fileimpl.cpp b/src/fileimpl.cpp index 3b8f4505e..176df182a 100644 --- a/src/fileimpl.cpp +++ b/src/fileimpl.cpp @@ -314,7 +314,7 @@ offset_t readOffset(const Reader& reader, size_t idx) while (true) { bufferDirentZone.reserve(size_type(bufferSize)); zimReader->read(bufferDirentZone.data(), indexOffset, bufferSize); - const SharedBuffer direntBuffer(bufferDirentZone.data(), bufferSize); + const Buffer direntBuffer(bufferDirentZone.data(), bufferSize); try { dirent = std::make_shared(direntBuffer); } catch (InvalidSize&) { diff --git a/test/cluster.cpp b/test/cluster.cpp index 67fb873b8..f46067a9d 100644 --- a/test/cluster.cpp +++ b/test/cluster.cpp @@ -70,7 +70,7 @@ std::shared_ptr write_to_buffer(zim::writer::Cluster& cluster) cluster.write(tmp_fd); auto size = lseek(tmp_fd, 0, SEEK_END); - auto buf = std::make_shared(zim::zsize_t(size)); + auto buf = std::make_shared(zim::zsize_t(size)); lseek(tmp_fd, 0, SEEK_SET); if (read(tmp_fd, const_cast(buf->data()), size) == -1) throw std::runtime_error("Cannot read"); diff --git a/test/dirent.cpp b/test/dirent.cpp index 7ff6e0a7a..c54011057 100644 --- a/test/dirent.cpp +++ b/test/dirent.cpp @@ -52,7 +52,7 @@ std::shared_ptr write_to_buffer(zim::writer::Dirent& dirent) dirent.write(tmp_fd); auto size = lseek(tmp_fd, 0, SEEK_END); - auto buf = std::make_shared(zim::zsize_t(size)); + auto buf = std::make_shared(zim::zsize_t(size)); lseek(tmp_fd, 0, SEEK_SET); if (read(tmp_fd, const_cast(buf->data()), size) == -1) throw std::runtime_error("Cannot read"); diff --git a/test/header.cpp b/test/header.cpp index 5f8bfa8fc..fcd4d4a1f 100644 --- a/test/header.cpp +++ b/test/header.cpp @@ -50,7 +50,7 @@ std::shared_ptr write_to_buffer(zim::Fileheader &header) header.write(tmp_fd); auto size = lseek(tmp_fd, 0, SEEK_END); - auto buf = std::make_shared(zim::zsize_t(size)); + auto buf = std::make_shared(zim::zsize_t(size)); lseek(tmp_fd, 0, SEEK_SET); if (read(tmp_fd, const_cast(buf->data()), size) == -1) throw std::runtime_error("Cannot read"); From 6382f686d72ec78d9539f78c1a3dc2d0e01f662a Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 14 Sep 2020 17:16:56 +0200 Subject: [PATCH 06/29] Blob do not depend of Buffer. We don't need to keep a reference to the `Buffer` to keep the data alive. We can simply use the internal `shared_ptr`. --- include/zim/blob.h | 21 ++++++++++++--------- src/blob.cpp | 20 +++++++++++++++----- src/buffer.h | 3 +++ src/cluster.cpp | 4 ++-- 4 files changed, 32 insertions(+), 16 deletions(-) diff --git a/include/zim/blob.h b/include/zim/blob.h index 928394e8d..46d324be0 100644 --- a/include/zim/blob.h +++ b/include/zim/blob.h @@ -29,22 +29,25 @@ namespace zim { - class Buffer; class Blob { - const char* _data; - size_type _size; - std::shared_ptr _buffer; + public: // types + using DataPtr = std::shared_ptr; - public: + public: // functions Blob(); Blob(const char* data, size_type size); - Blob(std::shared_ptr buffer); + Blob(const DataPtr& buffer, size_type size); - operator std::string() const { return std::string(_data, _size); } - const char* data() const { return _data; } - const char* end() const { return _data + _size; } + operator std::string() const { return std::string(_data.get(), _size); } + const char* data() const { return _data.get(); } + const char* end() const { return _data.get() + _size; } size_type size() const { return _size; } + + private: + DataPtr _data; + size_type _size; + }; inline std::ostream& operator<< (std::ostream& out, const Blob& blob) diff --git a/src/blob.cpp b/src/blob.cpp index fe5b82f90..18c24d142 100644 --- a/src/blob.cpp +++ b/src/blob.cpp @@ -24,23 +24,33 @@ namespace zim { +namespace +{ + +struct NoDelete +{ + template void operator()(T*) {} +}; + +} // unnamed namespace + + Blob::Blob() : _data(0), _size(0) {} Blob::Blob(const char* data, size_type size) - : _data(data), + : _data(DataPtr(data, NoDelete())), _size(size) { ASSERT(size, <, SIZE_MAX); ASSERT(data, <, (void*)(SIZE_MAX-size)); } -Blob::Blob(std::shared_ptr buffer) - : _data(buffer->data()), - _size(size_type(buffer->size())), - _buffer(buffer) +Blob::Blob(const DataPtr& buffer, size_type size) + : _data(buffer), + _size(size) {} diff --git a/src/buffer.h b/src/buffer.h index 3405bb61a..b80e73f15 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -29,6 +29,7 @@ #include "zim_types.h" #include "endian_tools.h" #include "debug.h" +#include namespace zim { @@ -59,6 +60,8 @@ class Buffer : public std::enable_shared_from_this { return fromLittleEndian(data(offset)); } + operator Blob() const { return Blob(m_data, m_size.v); } + private: //data const zsize_t m_size; DataPtr m_data; diff --git a/src/cluster.cpp b/src/cluster.cpp index f4ea9a28b..b69bcd65e 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -188,7 +188,7 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp return Blob(); } auto buffer = reader->get_buffer(offsets[blob_index_type(n)], blobSize); - return Blob(buffer); + return *buffer; } else { return Blob(); } @@ -207,7 +207,7 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp } offset += offsets[blob_index_type(n)]; auto buffer = reader->get_buffer(offset, size); - return Blob(buffer); + return *buffer; } else { return Blob(); } From 2a025ec09adfcc076ca31acc1b71aeee471d7dcc Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 14 Sep 2020 17:31:06 +0200 Subject: [PATCH 07/29] Do not use external shared_ptr to keep buffer memory alive. As we now use a internal `shared_ptr` to properly free the buffer memory (and make `Blob` do the same instead of using a shared_ptr), we can directly use a `Buffer` instead of a `shared_ptr` --- include/zim/fileheader.h | 2 +- src/buffer.cpp | 36 ++++++++++++++++++++++++++---------- src/buffer.h | 21 +++++++++++---------- src/cluster.cpp | 10 +++++----- src/file_reader.cpp | 28 +++++++++++++--------------- src/file_reader.h | 8 ++++---- src/fileheader.cpp | 28 ++++++++++++++-------------- src/fileimpl.cpp | 36 ++++++++++++++++++------------------ src/reader.h | 8 ++++---- test/cluster.cpp | 8 ++++---- test/dirent.cpp | 16 ++++++++-------- test/header.cpp | 6 +++--- 12 files changed, 111 insertions(+), 96 deletions(-) diff --git a/include/zim/fileheader.h b/include/zim/fileheader.h index 4d678098e..019b52b90 100644 --- a/include/zim/fileheader.h +++ b/include/zim/fileheader.h @@ -72,7 +72,7 @@ namespace zim {} void write(int out_fd) const; - void read(std::shared_ptr buffer); + void read(const Buffer& buffer); // Do some sanity check, raise a ZimFileFormateError is // something is wrong. diff --git a/src/buffer.cpp b/src/buffer.cpp index 84424240a..a089ea1cd 100644 --- a/src/buffer.cpp +++ b/src/buffer.cpp @@ -44,12 +44,27 @@ struct NoDelete } // unnamed namespace -std::shared_ptr Buffer::sub_buffer(offset_t offset, zsize_t size) const +const Buffer Buffer::sub_buffer(offset_t offset, zsize_t size) const { ASSERT(offset.v, <=, m_size.v); ASSERT(offset.v+size.v, <=, m_size.v); auto sub_data = DataPtr(m_data, data(offset)); - return std::make_shared(sub_data, size); + return Buffer(sub_data, size); +} + +const Buffer Buffer::makeBuffer(const DataPtr& data, zsize_t size) +{ + return Buffer(data, size); +} + +const Buffer Buffer::makeBuffer(const char* data, zsize_t size) +{ + return Buffer(DataPtr(data, NoDelete()), size); +} + +Buffer Buffer::makeBuffer(zsize_t size) +{ + return Buffer(DataPtr(new char[size.v], std::default_delete()), size); } Buffer::Buffer(const DataPtr& data, zsize_t size) @@ -59,18 +74,19 @@ Buffer::Buffer(const DataPtr& data, zsize_t size) ASSERT(m_size.v, <, SIZE_MAX); } -Buffer::Buffer(const char* data, zsize_t size) - : Buffer(DataPtr(data, NoDelete()), size) -{} - -Buffer::Buffer(zsize_t size) - : Buffer(DataPtr(new char[size.v], std::default_delete()), size) -{} - const char* Buffer::data(offset_t offset) const { ASSERT(offset.v, <=, m_size.v); return m_data.get() + offset.v; } +char* +Buffer::data(offset_t offset) { + ASSERT(offset.v, <=, m_size.v); + // We know we can do this cast as the only way to get a non const Buffer is + // to use the factory allocating the memory for us. + return const_cast(m_data.get() + offset.v); +} + + } //zim diff --git a/src/buffer.h b/src/buffer.h index b80e73f15..53db43d48 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -33,25 +33,23 @@ namespace zim { -class Buffer : public std::enable_shared_from_this { +class Buffer { public: // types typedef std::shared_ptr DataPtr; public: // functions - explicit Buffer(const char* data, zsize_t size); - explicit Buffer(const DataPtr& data, zsize_t size); - explicit Buffer(zsize_t size); - - Buffer(const Buffer& ) = delete; - void operator=(const Buffer& ) = delete; + static const Buffer makeBuffer(const char* data, zsize_t size); + static const Buffer makeBuffer(const DataPtr& data, zsize_t size); + static Buffer makeBuffer(zsize_t size); const char* data(offset_t offset=offset_t(0)) const; + char* data(offset_t offset=offset_t(0)); char at(offset_t offset) const { return *(data(offset)); } zsize_t size() const { return m_size; } - std::shared_ptr sub_buffer(offset_t offset, zsize_t size) const; + const Buffer sub_buffer(offset_t offset, zsize_t size) const; template T as(offset_t offset) const { @@ -62,8 +60,11 @@ class Buffer : public std::enable_shared_from_this { operator Blob() const { return Blob(m_data, m_size.v); } - private: //data - const zsize_t m_size; + private: // functions + Buffer(const DataPtr& data, zsize_t size); + + private: // data + zsize_t m_size; DataPtr m_data; }; diff --git a/src/cluster.cpp b/src/cluster.cpp index b69bcd65e..dd300e991 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -58,7 +58,7 @@ zsize_t read_size(const Reader* reader, bool isExtended, offset_t offset) return _read_size(reader, offset); } -std::shared_ptr +const Buffer getClusterBuffer(const Reader& zimReader, offset_t offset, CompressionType comp) { zsize_t uncompressed_size(0); @@ -81,7 +81,7 @@ getClusterBuffer(const Reader& zimReader, offset_t offset, CompressionType comp) throw std::logic_error("compressions should not be something else than zimcompLzma, zimComZip or zimcompZstd."); } auto shared_data = std::shared_ptr(uncompressed_data.release(), std::default_delete()); - return std::make_shared(shared_data, uncompressed_size); + return Buffer::makeBuffer(shared_data, uncompressed_size); } std::unique_ptr @@ -162,7 +162,7 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp offset_t current = offset_t(sizeof(OFFSET_TYPE)); while (--n_offset) { - OFFSET_TYPE new_offset = buffer->as(current); + OFFSET_TYPE new_offset = buffer.as(current); ASSERT(new_offset, >=, offset); ASSERT(new_offset, <=, reader->size().v); @@ -188,7 +188,7 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp return Blob(); } auto buffer = reader->get_buffer(offsets[blob_index_type(n)], blobSize); - return *buffer; + return buffer; } else { return Blob(); } @@ -207,7 +207,7 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp } offset += offsets[blob_index_type(n)]; auto buffer = reader->get_buffer(offset, size); - return *buffer; + return buffer; } else { return Blob(); } diff --git a/src/file_reader.cpp b/src/file_reader.cpp index fc3130963..8dd4e7ce3 100644 --- a/src/file_reader.cpp +++ b/src/file_reader.cpp @@ -176,7 +176,7 @@ makeMmappedBuffer(int fd, offset_t offset, zsize_t size) } // unnamed namespace #endif // ENABLE_USE_MMAP -std::shared_ptr FileReader::get_buffer(offset_t offset, zsize_t size) const { +const Buffer FileReader::get_buffer(offset_t offset, zsize_t size) const { ASSERT(size, <=, _size); #ifdef ENABLE_USE_MMAP try { @@ -192,15 +192,15 @@ std::shared_ptr FileReader::get_buffer(offset_t offset, zsize_t si auto local_offset = offset + _offset - range.min; ASSERT(size, <=, part->size()); int fd = part->fhandle().getNativeHandle(); - return std::make_shared(makeMmappedBuffer(fd, local_offset, size), size); + return Buffer::makeBuffer(makeMmappedBuffer(fd, local_offset, size), size); } catch(MMapException& e) #endif { // The range is several part, or we are on Windows. // We will have to do some memory copies :/ // [TODO] Use Windows equivalent for mmap. - auto ret_buffer = std::make_shared(size); - read(const_cast(ret_buffer->data()), offset, size); + auto ret_buffer = Buffer::makeBuffer(size); + read(const_cast(ret_buffer.data()), offset, size); return ret_buffer; } } @@ -221,45 +221,43 @@ std::unique_ptr FileReader::sub_reader(offset_t offset, zsize_t si //BufferReader::BufferReader(std::shared_ptr source) // : source(source) {} -std::shared_ptr BufferReader::get_buffer(offset_t offset, zsize_t size) const +const Buffer BufferReader::get_buffer(offset_t offset, zsize_t size) const { - return source->sub_buffer(offset, size); + return source.sub_buffer(offset, size); } std::unique_ptr BufferReader::sub_reader(offset_t offset, zsize_t size) const { - //auto source_addr = source->data(0); auto sub_buff = get_buffer(offset, size); - //auto buff_addr = sub_buff->data(0); std::unique_ptr sub_read(new BufferReader(sub_buff)); return sub_read; } zsize_t BufferReader::size() const { - return source->size(); + return source.size(); } offset_t BufferReader::offset() const { - return offset_t((offset_type)(static_cast(source->data(offset_t(0))))); + return offset_t((offset_type)(static_cast(source.data(offset_t(0))))); } void BufferReader::read(char* dest, offset_t offset, zsize_t size) const { - ASSERT(offset.v, <, source->size().v); - ASSERT(offset+offset_t(size.v), <=, offset_t(source->size().v)); + ASSERT(offset.v, <, source.size().v); + ASSERT(offset+offset_t(size.v), <=, offset_t(source.size().v)); if (! size ) { return; } - memcpy(dest, source->data(offset), size.v); + memcpy(dest, source.data(offset), size.v); } char BufferReader::read(offset_t offset) const { - ASSERT(offset.v, <, source->size().v); + ASSERT(offset.v, <, source.size().v); char dest; - dest = *source->data(offset); + dest = *source.data(offset); return dest; } diff --git a/src/file_reader.h b/src/file_reader.h index 65d4aaaff..94656bdb0 100644 --- a/src/file_reader.h +++ b/src/file_reader.h @@ -36,7 +36,7 @@ class FileReader : public Reader { char read(offset_t offset) const; void read(char* dest, offset_t offset, zsize_t size) const; - std::shared_ptr get_buffer(offset_t offset, zsize_t size) const; + const Buffer get_buffer(offset_t offset, zsize_t size) const; std::unique_ptr sub_reader(offset_t offest, zsize_t size) const; @@ -51,7 +51,7 @@ class FileReader : public Reader { class BufferReader : public Reader { public: - BufferReader(std::shared_ptr source) + BufferReader(const Buffer& source) : source(source) {} virtual ~BufferReader() {}; @@ -60,11 +60,11 @@ class BufferReader : public Reader { void read(char* dest, offset_t offset, zsize_t size) const; char read(offset_t offset) const; - std::shared_ptr get_buffer(offset_t offset, zsize_t size) const; + const Buffer get_buffer(offset_t offset, zsize_t size) const; std::unique_ptr sub_reader(offset_t offset, zsize_t size) const; private: - std::shared_ptr source; + const Buffer source; }; }; diff --git a/src/fileheader.cpp b/src/fileheader.cpp index 0e4899009..1bb465874 100644 --- a/src/fileheader.cpp +++ b/src/fileheader.cpp @@ -62,9 +62,9 @@ namespace zim _write(out_fd, header, Fileheader::size); } - void Fileheader::read(std::shared_ptr buffer) + void Fileheader::read(const Buffer& buffer) { - uint32_t magicNumber = buffer->as(offset_t(0)); + uint32_t magicNumber = buffer.as(offset_t(0)); if (magicNumber != Fileheader::zimMagic) { log_error("invalid magic number " << magicNumber << " found - " @@ -72,7 +72,7 @@ namespace zim throw ZimFileFormatError("Invalid magic number"); } - uint16_t major_version = buffer->as(offset_t(4)); + uint16_t major_version = buffer.as(offset_t(4)); if (major_version != zimClassicMajorVersion && major_version != zimExtendedMajorVersion) { log_error("invalid zimfile major version " << major_version << " found - " @@ -81,21 +81,21 @@ namespace zim } setMajorVersion(major_version); - setMinorVersion(buffer->as(offset_t(6))); + setMinorVersion(buffer.as(offset_t(6))); Uuid uuid; - std::copy(buffer->data(offset_t(8)), buffer->data(offset_t(24)), uuid.data); + std::copy(buffer.data(offset_t(8)), buffer.data(offset_t(24)), uuid.data); setUuid(uuid); - setArticleCount(buffer->as(offset_t(24))); - setClusterCount(buffer->as(offset_t(28))); - setUrlPtrPos(buffer->as(offset_t(32))); - setTitleIdxPos(buffer->as(offset_t(40))); - setClusterPtrPos(buffer->as(offset_t(48))); - setMimeListPos(buffer->as(offset_t(56))); - setMainPage(buffer->as(offset_t(64))); - setLayoutPage(buffer->as(offset_t(68))); - setChecksumPos(buffer->as(offset_t(72))); + setArticleCount(buffer.as(offset_t(24))); + setClusterCount(buffer.as(offset_t(28))); + setUrlPtrPos(buffer.as(offset_t(32))); + setTitleIdxPos(buffer.as(offset_t(40))); + setClusterPtrPos(buffer.as(offset_t(48))); + setMimeListPos(buffer.as(offset_t(56))); + setMainPage(buffer.as(offset_t(64))); + setLayoutPage(buffer.as(offset_t(68))); + setChecksumPos(buffer.as(offset_t(72))); sanity_check(); } diff --git a/src/fileimpl.cpp b/src/fileimpl.cpp index 176df182a..a92c756cc 100644 --- a/src/fileimpl.cpp +++ b/src/fileimpl.cpp @@ -151,7 +151,7 @@ offset_t readOffset(const Reader& reader, size_t idx) offset_t current = offset_t(0); while (current.v < size.v) { - offset_type len = strlen(buffer->data(current)); + offset_type len = strlen(buffer.data(current)); if (len == 0) { break; @@ -161,7 +161,7 @@ offset_t readOffset(const Reader& reader, size_t idx) throw(ZimFileFormatError("Error getting mimelists.")); } - std::string mimeType(buffer->data(current), len); + std::string mimeType(buffer.data(current), len); mimeTypes.push_back(mimeType); current += (len + 1); @@ -314,7 +314,7 @@ offset_t readOffset(const Reader& reader, size_t idx) while (true) { bufferDirentZone.reserve(size_type(bufferSize)); zimReader->read(bufferDirentZone.data(), indexOffset, bufferSize); - const Buffer direntBuffer(bufferDirentZone.data(), bufferSize); + auto direntBuffer = Buffer::makeBuffer(bufferDirentZone.data(), bufferSize); try { dirent = std::make_shared(direntBuffer); } catch (InvalidSize&) { @@ -461,27 +461,27 @@ offset_t readOffset(const Reader& reader, size_t idx) if (!header.hasChecksum()) return std::string(); - std::shared_ptr chksum; try { - chksum = zimReader->get_buffer(offset_t(header.getChecksumPos()), zsize_t(16)); + auto chksum = zimReader->get_buffer(offset_t(header.getChecksumPos()), zsize_t(16)); + + char hexdigest[33]; + hexdigest[32] = '\0'; + static const char hex[] = "0123456789abcdef"; + char* p = hexdigest; + for (int i = 0; i < 16; ++i) + { + uint8_t v = chksum.at(offset_t(i)); + *p++ = hex[v >> 4]; + *p++ = hex[v & 0xf]; + } + log_debug("chksum=" << hexdigest); + return hexdigest; } catch (...) { log_warn("error reading checksum"); return std::string(); } - char hexdigest[33]; - hexdigest[32] = '\0'; - static const char hex[] = "0123456789abcdef"; - char* p = hexdigest; - for (int i = 0; i < 16; ++i) - { - uint8_t v = chksum->at(offset_t(i)); - *p++ = hex[v >> 4]; - *p++ = hex[v & 0xf]; - } - log_debug("chksum=" << hexdigest); - return hexdigest; } bool FileImpl::verify() @@ -519,7 +519,7 @@ offset_t readOffset(const Reader& reader, size_t idx) auto chksumFile = zimReader->get_buffer(offset_t(header.getChecksumPos()), zsize_t(16)); zim_MD5Final(chksumCalc, &md5ctx); - if (std::memcmp(chksumFile->data(), chksumCalc, 16) != 0) + if (std::memcmp(chksumFile.data(), chksumCalc, 16) != 0) { return false; } diff --git a/src/reader.h b/src/reader.h index 0ed47142a..7269b7029 100644 --- a/src/reader.h +++ b/src/reader.h @@ -26,9 +26,9 @@ #include "endian_tools.h" #include "debug.h" -namespace zim { +#include "buffer.h" -class Buffer; +namespace zim { class Reader { public: @@ -47,8 +47,8 @@ class Reader { } virtual char read(offset_t offset) const = 0; - virtual std::shared_ptr get_buffer(offset_t offset, zsize_t size) const = 0; - std::shared_ptr get_buffer(offset_t offset) const { + virtual const Buffer get_buffer(offset_t offset, zsize_t size) const = 0; + const Buffer get_buffer(offset_t offset) const { return get_buffer(offset, zsize_t(size().v-offset.v)); } virtual std::unique_ptr sub_reader(offset_t offset, zsize_t size) const = 0; diff --git a/test/cluster.cpp b/test/cluster.cpp index f46067a9d..7ce08d292 100644 --- a/test/cluster.cpp +++ b/test/cluster.cpp @@ -62,7 +62,7 @@ namespace using zim::unittests::TempFile; -std::shared_ptr write_to_buffer(zim::writer::Cluster& cluster) +zim::Buffer write_to_buffer(zim::writer::Cluster& cluster) { const TempFile tmpFile("test_cluster"); cluster.close(); @@ -70,9 +70,9 @@ std::shared_ptr write_to_buffer(zim::writer::Cluster& cluster) cluster.write(tmp_fd); auto size = lseek(tmp_fd, 0, SEEK_END); - auto buf = std::make_shared(zim::zsize_t(size)); + auto buf = zim::Buffer::makeBuffer(zim::zsize_t(size)); lseek(tmp_fd, 0, SEEK_SET); - if (read(tmp_fd, const_cast(buf->data()), size) == -1) + if (read(tmp_fd, buf.data(), size) == -1) throw std::runtime_error("Cannot read"); return buf; } @@ -239,7 +239,7 @@ TEST(ClusterTest, read_write_extended_cluster) std::string blob2("abcdefghijklmnopqrstuvwxyz"); zim::size_type bigger_than_4g = 1024LL*1024LL*1024LL*4LL+1024LL; - std::shared_ptr buffer; + auto buffer = zim::Buffer::makeBuffer(nullptr, zim::zsize_t(0)); { char* blob3 = nullptr; try { diff --git a/test/dirent.cpp b/test/dirent.cpp index c54011057..5888572a5 100644 --- a/test/dirent.cpp +++ b/test/dirent.cpp @@ -45,16 +45,16 @@ namespace using zim::unittests::TempFile; -std::shared_ptr write_to_buffer(zim::writer::Dirent& dirent) +const zim::Buffer write_to_buffer(zim::writer::Dirent& dirent) { const TempFile tmpFile("test_dirent"); const auto tmp_fd = tmpFile.fd(); dirent.write(tmp_fd); auto size = lseek(tmp_fd, 0, SEEK_END); - auto buf = std::make_shared(zim::zsize_t(size)); + auto buf = zim::Buffer::makeBuffer(zim::zsize_t(size)); lseek(tmp_fd, 0, SEEK_SET); - if (read(tmp_fd, const_cast(buf->data()), size) == -1) + if (read(tmp_fd, buf.data(), size) == -1) throw std::runtime_error("Cannot read"); return buf; @@ -108,7 +108,7 @@ TEST(DirentTest, read_write_article_dirent) ASSERT_EQ(dirent.getVersion(), 0U); auto buffer = write_to_buffer(dirent); - zim::Dirent dirent2(*buffer); + zim::Dirent dirent2(buffer); ASSERT_TRUE(!dirent2.isRedirect()); ASSERT_EQ(dirent2.getNamespace(), 'A'); @@ -133,7 +133,7 @@ TEST(DirentTest, read_write_article_dirent_unicode) ASSERT_EQ(dirent.getBlobNumber().v, 1234U); auto buffer = write_to_buffer(dirent); - zim::Dirent dirent2(*buffer); + zim::Dirent dirent2(buffer); ASSERT_TRUE(!dirent2.isRedirect()); ASSERT_EQ(dirent2.getNamespace(), 'A'); @@ -158,7 +158,7 @@ TEST(DirentTest, read_write_redirect_dirent) ASSERT_EQ(dirent.getRedirectIndex().v, 321U); auto buffer = write_to_buffer(dirent); - zim::Dirent dirent2(*buffer); + zim::Dirent dirent2(buffer); ASSERT_TRUE(dirent2.isRedirect()); ASSERT_EQ(dirent2.getNamespace(), 'A'); @@ -180,7 +180,7 @@ TEST(DirentTest, read_write_linktarget_dirent) ASSERT_EQ(dirent.getUrl(), "Bar"); auto buffer = write_to_buffer(dirent); - zim::Dirent dirent2(*buffer); + zim::Dirent dirent2(buffer); ASSERT_TRUE(!dirent2.isRedirect()); ASSERT_TRUE(dirent2.isLinktarget()); @@ -203,7 +203,7 @@ TEST(DirentTest, read_write_deleted_dirent) ASSERT_EQ(dirent.getUrl(), "Bar"); auto buffer = write_to_buffer(dirent); - zim::Dirent dirent2(*buffer); + zim::Dirent dirent2(buffer); ASSERT_TRUE(!dirent2.isRedirect()); ASSERT_TRUE(!dirent2.isLinktarget()); diff --git a/test/header.cpp b/test/header.cpp index fcd4d4a1f..19b490695 100644 --- a/test/header.cpp +++ b/test/header.cpp @@ -43,16 +43,16 @@ namespace using zim::unittests::TempFile; -std::shared_ptr write_to_buffer(zim::Fileheader &header) +zim::Buffer write_to_buffer(zim::Fileheader &header) { const TempFile tmpFile("test_header"); const auto tmp_fd = tmpFile.fd(); header.write(tmp_fd); auto size = lseek(tmp_fd, 0, SEEK_END); - auto buf = std::make_shared(zim::zsize_t(size)); + auto buf = zim::Buffer::makeBuffer(zim::zsize_t(size)); lseek(tmp_fd, 0, SEEK_SET); - if (read(tmp_fd, const_cast(buf->data()), size) == -1) + if (read(tmp_fd, buf.data(), size) == -1) throw std::runtime_error("Cannot read"); return buf; } From f19fd257b2936c10159e9661d38b71c8ac90b66c Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Fri, 28 Aug 2020 19:27:47 +0400 Subject: [PATCH 08/29] Introduced zim::IDataStream --- src/idatastream.cpp | 33 ++++++++++++ src/idatastream.h | 116 +++++++++++++++++++++++++++++++++++++++++++ src/meson.build | 1 + test/idatastream.cpp | 58 ++++++++++++++++++++++ test/meson.build | 3 +- 5 files changed, 210 insertions(+), 1 deletion(-) create mode 100644 src/idatastream.cpp create mode 100644 src/idatastream.h create mode 100644 test/idatastream.cpp diff --git a/src/idatastream.cpp b/src/idatastream.cpp new file mode 100644 index 000000000..aff0b95ac --- /dev/null +++ b/src/idatastream.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "idatastream.h" + +namespace zim +{ + +IDataStream::Blob +IDataStream::readBlobImpl(size_t size) +{ + std::shared_ptr buf(new char[size], std::default_delete()); + readImpl(buf.get(), size); + return Blob(buf, size); +} + +} // namespace zim diff --git a/src/idatastream.h b/src/idatastream.h new file mode 100644 index 000000000..368ffffef --- /dev/null +++ b/src/idatastream.h @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZIM_IDATASTREAM_H +#define ZIM_IDATASTREAM_H + +#include +#include + +#include "endian_tools.h" + +namespace zim +{ + +// IDataStream is a simple interface for sequential iteration over a stream +// of values of built-in/primitive types and/or opaque binary objects (blobs). +// An example usage: +// +// void foo(IDataStream& s) +// { +// const uint32_t n = s.read(); +// for(uint32_t i=0; i < n; ++i) +// { +// const uint16_t blobSize = s.read(); +// IDataStream::Blob blob = s.readBlob(blobSize); +// bar(blob, blobSize); +// } +// } +// +class IDataStream +{ +public: // types + class Blob + { + private: // types + typedef std::shared_ptr DataPtr; + + public: // functions + Blob(const DataPtr& data, size_t size) : data_(data) , size_(size) {} + + const char* data() const { return data_.get(); } + size_t size() const { return size_; } + + private: // data + DataPtr data_; + size_t size_; + }; + +public: // functions + virtual ~IDataStream() {} + + // Reads a value of the said type from the stream + // + // For best portability this function should be used with types of known + // bit-width (int32_t, uint16_t, etc) rather than builtin types with + // unknown bit-width (int, unsigned, etc). + template T read(); + + // Reads a blob of the specified size from the stream + Blob readBlob(size_t size); + +private: // virtual methods + // Reads exactly 'nbytes' bytes into the provided buffer 'buf' + // (which must be at least that big). Throws an exception if + // more bytes are requested than can be retrieved. + virtual void readImpl(void* buf, size_t nbytes) = 0; + + // By default a blob is returned as an independent object owning + // its own buffer. However, the function readBlobImpl() can be + // overriden so that it returns a blob referring to arbitrary + // pre-existing memory. + virtual Blob readBlobImpl(size_t size); +}; + +//////////////////////////////////////////////////////////////////////////////// +// Implementation of IDataStream +//////////////////////////////////////////////////////////////////////////////// + +// XXX: Assuming that opaque binary data retrieved via 'readImpl()' +// XXX: is encoded in little-endian form. +template +inline T +IDataStream::read() +{ + const size_t N = sizeof(T); + char buf[N]; + readImpl(&buf, N); + return fromLittleEndian(buf); // XXX: This handles only integral types +} + +inline +IDataStream::Blob +IDataStream::readBlob(size_t size) +{ + return readBlobImpl(size); +} + +} // namespace zim + +#endif // ZIM_IDATASTREAM_H diff --git a/src/meson.build b/src/meson.build index 4d3ca342b..8a2f9627c 100644 --- a/src/meson.build +++ b/src/meson.build @@ -26,6 +26,7 @@ common_sources = [ 'levenshtein.cpp', 'tools.cpp', 'compression.cpp', + 'idatastream.cpp', 'writer/creator.cpp', 'writer/article.cpp', 'writer/cluster.cpp', diff --git a/test/idatastream.cpp b/test/idatastream.cpp new file mode 100644 index 000000000..d6aa4025b --- /dev/null +++ b/test/idatastream.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "idatastream.h" + +#include "gtest/gtest.h" + +namespace +{ + +using zim::IDataStream; + +// Implement the IDataStream interface in the simplest way +class InfiniteZeroStream : public IDataStream +{ + void readImpl(void* buf, size_t nbytes) { memset(buf, 0, nbytes); } +}; + +// ... and test that it compiles and works as intended + +TEST(IDataStream, read) +{ + InfiniteZeroStream izs; + IDataStream& ids = izs; + EXPECT_EQ(0, ids.read()); + EXPECT_EQ(0L, ids.read()); + + // zim::fromLittleEndian() handles only integer types + // EXPECT_EQ(0.0, ids.read()); +} + +TEST(IDataStream, readBlob) +{ + const size_t N = 16; + const char zerobuf[N] = {0}; + InfiniteZeroStream izs; + IDataStream& ids = izs; + const IDataStream::Blob blob = ids.readBlob(N); + EXPECT_EQ(0, memcmp(blob.data(), zerobuf, N)); +} + +} // unnamed namespace diff --git a/test/meson.build b/test/meson.build index 4f88eae77..762ae97c2 100644 --- a/test/meson.build +++ b/test/meson.build @@ -11,7 +11,8 @@ tests = [ 'iterator', 'find', 'compression', - 'impl_find' + 'impl_find', + 'idatastream' ] if gtest_dep.found() and not meson.is_cross_build() From 86ef980d81636d328f9132f46b9914459135d90e Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 14 Sep 2020 18:06:47 +0200 Subject: [PATCH 09/29] IStreamReader allow to get a reader. - Rename `IDataStream` to `IStreamReader` - `IStreamReader` allow to get a (sub)reader instead of a `IDataStream::Blob`. While it may seem a bit odd, it allows streamReader to delay the actual read when we really want the data. --- src/{idatastream.cpp => istreamreader.cpp} | 13 +++--- src/{idatastream.h => istreamreader.h} | 47 ++++----------------- src/meson.build | 2 +- test/{idatastream.cpp => istreamreader.cpp} | 25 ++++++----- test/meson.build | 2 +- 5 files changed, 32 insertions(+), 57 deletions(-) rename src/{idatastream.cpp => istreamreader.cpp} (76%) rename src/{idatastream.h => istreamreader.h} (72%) rename test/{idatastream.cpp => istreamreader.cpp} (66%) diff --git a/src/idatastream.cpp b/src/istreamreader.cpp similarity index 76% rename from src/idatastream.cpp rename to src/istreamreader.cpp index aff0b95ac..889cc901b 100644 --- a/src/idatastream.cpp +++ b/src/istreamreader.cpp @@ -17,17 +17,18 @@ * */ -#include "idatastream.h" +#include "istreamreader.h" +#include "file_reader.h" namespace zim { -IDataStream::Blob -IDataStream::readBlobImpl(size_t size) +std::unique_ptr +IStreamReader::sub_reader(zsize_t size) { - std::shared_ptr buf(new char[size], std::default_delete()); - readImpl(buf.get(), size); - return Blob(buf, size); + auto buffer = Buffer::makeBuffer(size); + readImpl(buffer.data(), size); + return std::unique_ptr(new BufferReader(buffer)); } } // namespace zim diff --git a/src/idatastream.h b/src/istreamreader.h similarity index 72% rename from src/idatastream.h rename to src/istreamreader.h index 368ffffef..74055a225 100644 --- a/src/idatastream.h +++ b/src/istreamreader.h @@ -24,6 +24,7 @@ #include #include "endian_tools.h" +#include "reader.h" namespace zim { @@ -43,27 +44,10 @@ namespace zim // } // } // -class IDataStream +class IStreamReader { -public: // types - class Blob - { - private: // types - typedef std::shared_ptr DataPtr; - - public: // functions - Blob(const DataPtr& data, size_t size) : data_(data) , size_(size) {} - - const char* data() const { return data_.get(); } - size_t size() const { return size_; } - - private: // data - DataPtr data_; - size_t size_; - }; - public: // functions - virtual ~IDataStream() {} + virtual ~IStreamReader() = default; // Reads a value of the said type from the stream // @@ -73,19 +57,13 @@ class IDataStream template T read(); // Reads a blob of the specified size from the stream - Blob readBlob(size_t size); + virtual std::unique_ptr sub_reader(zsize_t size); private: // virtual methods // Reads exactly 'nbytes' bytes into the provided buffer 'buf' // (which must be at least that big). Throws an exception if // more bytes are requested than can be retrieved. - virtual void readImpl(void* buf, size_t nbytes) = 0; - - // By default a blob is returned as an independent object owning - // its own buffer. However, the function readBlobImpl() can be - // overriden so that it returns a blob referring to arbitrary - // pre-existing memory. - virtual Blob readBlobImpl(size_t size); + virtual void readImpl(char* buf, zsize_t nbytes) = 0; }; //////////////////////////////////////////////////////////////////////////////// @@ -96,21 +74,14 @@ class IDataStream // XXX: is encoded in little-endian form. template inline T -IDataStream::read() +IStreamReader::read() { - const size_t N = sizeof(T); - char buf[N]; - readImpl(&buf, N); + const zsize_t N(sizeof(T)); + char buf[N.v]; + readImpl(buf, N); return fromLittleEndian(buf); // XXX: This handles only integral types } -inline -IDataStream::Blob -IDataStream::readBlob(size_t size) -{ - return readBlobImpl(size); -} - } // namespace zim #endif // ZIM_IDATASTREAM_H diff --git a/src/meson.build b/src/meson.build index 8a2f9627c..28caaf74e 100644 --- a/src/meson.build +++ b/src/meson.build @@ -26,7 +26,7 @@ common_sources = [ 'levenshtein.cpp', 'tools.cpp', 'compression.cpp', - 'idatastream.cpp', + 'istreamreader.cpp', 'writer/creator.cpp', 'writer/article.cpp', 'writer/cluster.cpp', diff --git a/test/idatastream.cpp b/test/istreamreader.cpp similarity index 66% rename from test/idatastream.cpp rename to test/istreamreader.cpp index d6aa4025b..50786c831 100644 --- a/test/idatastream.cpp +++ b/test/istreamreader.cpp @@ -17,27 +17,27 @@ * */ -#include "idatastream.h" +#include "istreamreader.h" #include "gtest/gtest.h" namespace { -using zim::IDataStream; +using zim::IStreamReader; -// Implement the IDataStream interface in the simplest way -class InfiniteZeroStream : public IDataStream +// Implement the IStreamReader interface in the simplest way +class InfiniteZeroStream : public IStreamReader { - void readImpl(void* buf, size_t nbytes) { memset(buf, 0, nbytes); } + void readImpl(char* buf, zim::zsize_t nbytes) { memset(buf, 0, nbytes.v); } }; // ... and test that it compiles and works as intended -TEST(IDataStream, read) +TEST(IStreamReader, read) { InfiniteZeroStream izs; - IDataStream& ids = izs; + IStreamReader& ids = izs; EXPECT_EQ(0, ids.read()); EXPECT_EQ(0L, ids.read()); @@ -45,14 +45,17 @@ TEST(IDataStream, read) // EXPECT_EQ(0.0, ids.read()); } -TEST(IDataStream, readBlob) +TEST(IStreamReader, sub_reader) { const size_t N = 16; const char zerobuf[N] = {0}; InfiniteZeroStream izs; - IDataStream& ids = izs; - const IDataStream::Blob blob = ids.readBlob(N); - EXPECT_EQ(0, memcmp(blob.data(), zerobuf, N)); + IStreamReader& ids = izs; + auto subReader = ids.sub_reader(zim::zsize_t(N)); + EXPECT_EQ(subReader->size().v, N); + auto buffer = subReader->get_buffer(zim::offset_t(0), zim::zsize_t(N)); + EXPECT_EQ(buffer.size().v, N); + EXPECT_EQ(0, memcmp(buffer.data(), zerobuf, N)); } } // unnamed namespace diff --git a/test/meson.build b/test/meson.build index 762ae97c2..7e58266cd 100644 --- a/test/meson.build +++ b/test/meson.build @@ -12,7 +12,7 @@ tests = [ 'find', 'compression', 'impl_find', - 'idatastream' + 'istreamreader' ] if gtest_dep.found() and not meson.is_cross_build() From a4ed8320f60e21104a519d0fdcc36255fd425530 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 30 Aug 2020 14:20:53 +0400 Subject: [PATCH 10/29] Enter DecodedDataStream --- src/decodeddatastream.h | 97 +++++++++++++++++++++++++++++++++++ test/decodeddatastream.cpp | 100 +++++++++++++++++++++++++++++++++++++ test/meson.build | 3 +- 3 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 src/decodeddatastream.h create mode 100644 test/decodeddatastream.cpp diff --git a/src/decodeddatastream.h b/src/decodeddatastream.h new file mode 100644 index 000000000..ca8830b10 --- /dev/null +++ b/src/decodeddatastream.h @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZIM_DECODECDATASTREAM_H +#define ZIM_DECODECDATASTREAM_H + +#include "compression.h" +#include "idatastream.h" + +namespace zim +{ + +template +class DecodedDataStream : public IDataStream +{ +private: // constants + enum { CHUNK_SIZE = 1024 }; + +public: // functions + DecodedDataStream(std::unique_ptr inputData, size_t inputSize) + : encodedDataStream_(std::move(inputData)) + , inputBytesLeft_(inputSize) + , encodedDataChunk_() + { + Decoder::init_stream_decoder(&decoderState_, nullptr); + readNextChunk(); + } + + ~DecodedDataStream() + { + Decoder::stream_end_decode(&decoderState_); + } + +private: // functions + void readNextChunk() + { + const size_t n = std::min(size_t(CHUNK_SIZE), inputBytesLeft_); + encodedDataChunk_ = encodedDataStream_->readBlob(n); + inputBytesLeft_ -= n; + // XXX: ugly C-style cast (casting away constness) on the next line + decoderState_.next_in = (unsigned char*)encodedDataChunk_.data(); + decoderState_.avail_in = encodedDataChunk_.size(); + } + + CompStatus decodeMoreBytes() + { + CompStep step = CompStep::STEP; + if ( decoderState_.avail_in == 0 ) + { + if ( inputBytesLeft_ == 0 ) + step = CompStep::FINISH; + else + readNextChunk(); + } + + return Decoder::stream_run_decode(&decoderState_, step); + } + + void readImpl(void* buf, size_t nbytes) override + { + decoderState_.next_out = (unsigned char*)buf; + decoderState_.avail_out = nbytes; + while ( decoderState_.avail_out != 0 ) + { + decodeMoreBytes(); + } + } + +private: // types + typedef typename Decoder::stream_t DecoderState; + +private: // data + std::unique_ptr encodedDataStream_; + size_t inputBytesLeft_; // count of bytes left in the input stream + DecoderState decoderState_; + IDataStream::Blob encodedDataChunk_; +}; + +} // namespace zim + +#endif // ZIM_DECODECDATASTREAM_H diff --git a/test/decodeddatastream.cpp b/test/decodeddatastream.cpp new file mode 100644 index 000000000..e86f3a0f9 --- /dev/null +++ b/test/decodeddatastream.cpp @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "decodeddatastream.h" +#include "bufdatastream.h" + +#include "gtest/gtest.h" + +namespace +{ + +template +std::string +compress(const std::string& data) +{ + zim::Compressor compressor(data.size()); + compressor.init(const_cast(data.c_str())); + compressor.feed(data.c_str(), data.size()); + zim::zsize_t comp_size; + const auto comp_data = compressor.get_data(&comp_size); + return std::string(comp_data.get(), comp_size.v); +} + +std::string operator*(const std::string& s, unsigned N) +{ + std::string result; + for (unsigned i=0; i +class DecodedDataStreamTest : public testing::Test { + protected: + typedef T CompressionInfo; +}; + +using CompressionTypes = ::testing::Types< + LZMA_INFO, + ZSTD_INFO +#if defined(ENABLE_ZLIB) + ,ZIP_INFO +#endif +>; + +TYPED_TEST_CASE(DecodedDataStreamTest, CompressionTypes); + +TYPED_TEST(DecodedDataStreamTest, justCompressedData) { + typedef typename TestFixture::CompressionInfo CompressionInfo; + + const int N = 10; + const std::string s("DecodedDataStream should work correctly"); + const std::string compData = compress(s*N); + + std::unique_ptr bds(new zim::BufDataStream(compData.data(), compData.size())); + zim::DecodedDataStream dds(std::move(bds), compData.size()); + for (int i=0; i(s*N); + const std::string inputData = compData + std::string(10, '\0'); + + std::unique_ptr bds(new zim::BufDataStream(inputData.data(), inputData.size())); + zim::DecodedDataStream dds(std::move(bds), inputData.size()); + for (int i=0; i Date: Tue, 15 Sep 2020 10:15:30 +0200 Subject: [PATCH 11/29] Adapt DecoderStreamReader to wrap a Reader instead of a InputStream. --- ...odeddatastream.h => decoderstreamreader.h} | 53 ++++++++++--------- ...datastream.cpp => decoderstreamreader.cpp} | 42 ++++++++------- test/meson.build | 2 +- 3 files changed, 52 insertions(+), 45 deletions(-) rename src/{decodeddatastream.h => decoderstreamreader.h} (53%) rename test/{decodeddatastream.cpp => decoderstreamreader.cpp} (55%) diff --git a/src/decodeddatastream.h b/src/decoderstreamreader.h similarity index 53% rename from src/decodeddatastream.h rename to src/decoderstreamreader.h index ca8830b10..e151b9d25 100644 --- a/src/decodeddatastream.h +++ b/src/decoderstreamreader.h @@ -21,62 +21,64 @@ #define ZIM_DECODECDATASTREAM_H #include "compression.h" -#include "idatastream.h" +#include "istreamreader.h" namespace zim { template -class DecodedDataStream : public IDataStream +class DecoderStreamReader : public IStreamReader { private: // constants enum { CHUNK_SIZE = 1024 }; public: // functions - DecodedDataStream(std::unique_ptr inputData, size_t inputSize) - : encodedDataStream_(std::move(inputData)) - , inputBytesLeft_(inputSize) - , encodedDataChunk_() + DecoderStreamReader(std::shared_ptr inputReader) + : m_encodedDataReader(inputReader), + m_currentInputOffset(0), + m_inputBytesLeft(inputReader->size()), + m_encodedDataChunk(Buffer::makeBuffer(zsize_t(CHUNK_SIZE))) { - Decoder::init_stream_decoder(&decoderState_, nullptr); + Decoder::init_stream_decoder(&m_decoderState, nullptr); readNextChunk(); } - ~DecodedDataStream() + ~DecoderStreamReader() { - Decoder::stream_end_decode(&decoderState_); + Decoder::stream_end_decode(&m_decoderState); } private: // functions void readNextChunk() { - const size_t n = std::min(size_t(CHUNK_SIZE), inputBytesLeft_); - encodedDataChunk_ = encodedDataStream_->readBlob(n); - inputBytesLeft_ -= n; + const auto n = std::min(zsize_t(CHUNK_SIZE), m_inputBytesLeft); + m_encodedDataChunk = m_encodedDataReader->get_buffer(m_currentInputOffset, n); + m_currentInputOffset += n; + m_inputBytesLeft -= n; // XXX: ugly C-style cast (casting away constness) on the next line - decoderState_.next_in = (unsigned char*)encodedDataChunk_.data(); - decoderState_.avail_in = encodedDataChunk_.size(); + m_decoderState.next_in = (unsigned char*)m_encodedDataChunk.data(); + m_decoderState.avail_in = m_encodedDataChunk.size().v; } CompStatus decodeMoreBytes() { CompStep step = CompStep::STEP; - if ( decoderState_.avail_in == 0 ) + if ( m_decoderState.avail_in == 0 ) { - if ( inputBytesLeft_ == 0 ) + if ( m_inputBytesLeft.v == 0 ) step = CompStep::FINISH; else readNextChunk(); } - return Decoder::stream_run_decode(&decoderState_, step); + return Decoder::stream_run_decode(&m_decoderState, step); } - void readImpl(void* buf, size_t nbytes) override + void readImpl(char* buf, zsize_t nbytes) override { - decoderState_.next_out = (unsigned char*)buf; - decoderState_.avail_out = nbytes; - while ( decoderState_.avail_out != 0 ) + m_decoderState.next_out = (unsigned char*)buf; + m_decoderState.avail_out = nbytes.v; + while ( m_decoderState.avail_out != 0 ) { decodeMoreBytes(); } @@ -86,10 +88,11 @@ class DecodedDataStream : public IDataStream typedef typename Decoder::stream_t DecoderState; private: // data - std::unique_ptr encodedDataStream_; - size_t inputBytesLeft_; // count of bytes left in the input stream - DecoderState decoderState_; - IDataStream::Blob encodedDataChunk_; + std::shared_ptr m_encodedDataReader; + offset_t m_currentInputOffset; + zsize_t m_inputBytesLeft; // count of bytes left in the input stream + DecoderState m_decoderState; + Buffer m_encodedDataChunk; }; } // namespace zim diff --git a/test/decodeddatastream.cpp b/test/decoderstreamreader.cpp similarity index 55% rename from test/decodeddatastream.cpp rename to test/decoderstreamreader.cpp index e86f3a0f9..3d64ee47c 100644 --- a/test/decodeddatastream.cpp +++ b/test/decoderstreamreader.cpp @@ -17,8 +17,7 @@ * */ -#include "decodeddatastream.h" -#include "bufdatastream.h" +#include "decoderstreamreader.h" #include "gtest/gtest.h" @@ -45,13 +44,13 @@ std::string operator*(const std::string& s, unsigned N) return result; } -std::string toString(const zim::IDataStream::Blob& blob) +std::string toString(const zim::Buffer& buffer) { - return std::string(blob.data(), blob.size()); + return std::string(buffer.data(), buffer.size().v); } template -class DecodedDataStreamTest : public testing::Test { +class DecoderStreamReaderTest : public testing::Test { protected: typedef T CompressionInfo; }; @@ -64,36 +63,41 @@ using CompressionTypes = ::testing::Types< #endif >; -TYPED_TEST_CASE(DecodedDataStreamTest, CompressionTypes); +TYPED_TEST_CASE(DecoderStreamReaderTest, CompressionTypes); -TYPED_TEST(DecodedDataStreamTest, justCompressedData) { +TYPED_TEST(DecoderStreamReaderTest, justCompressedData) { typedef typename TestFixture::CompressionInfo CompressionInfo; const int N = 10; - const std::string s("DecodedDataStream should work correctly"); - const std::string compData = compress(s*N); + const std::string s("DecoderStreamReader should work correctly"); + const std::string compDataStr = compress(s*N); + auto compData = zim::Buffer::makeBuffer(compDataStr.data(), zim::zsize_t(compDataStr.size())); - std::unique_ptr bds(new zim::BufDataStream(compData.data(), compData.size())); - zim::DecodedDataStream dds(std::move(bds), compData.size()); + auto compReader = std::make_shared(compData); + zim::DecoderStreamReader dds(compReader); for (int i=0; iget_buffer(zim::offset_t(0), zim::zsize_t(s.size())))) << "i: " << i; } } -TYPED_TEST(DecodedDataStreamTest, compressedDataFollowedByGarbage) { +TYPED_TEST(DecoderStreamReaderTest, compressedDataFollowedByGarbage) { typedef typename TestFixture::CompressionInfo CompressionInfo; const int N = 10; - const std::string s("DecodedDataStream should work correctly"); - const std::string compData = compress(s*N); - const std::string inputData = compData + std::string(10, '\0'); + const std::string s("DecoderStreamReader should work correctly"); + std::string compDataStr = compress(s*N); + compDataStr += std::string(10, '\0'); - std::unique_ptr bds(new zim::BufDataStream(inputData.data(), inputData.size())); - zim::DecodedDataStream dds(std::move(bds), inputData.size()); + auto compData = zim::Buffer::makeBuffer(compDataStr.data(), zim::zsize_t(compDataStr.size())); + auto compReader = std::make_shared(compData); + + zim::DecoderStreamReader dds(compReader); for (int i=0; iget_buffer(zim::offset_t(0), zim::zsize_t(s.size())))) << "i: " << i; } } diff --git a/test/meson.build b/test/meson.build index 3a8adc2a3..6e0d743ed 100644 --- a/test/meson.build +++ b/test/meson.build @@ -13,7 +13,7 @@ tests = [ 'compression', 'impl_find', 'istreamreader', - 'decodeddatastream' + 'decoderstreamreader' ] if gtest_dep.found() and not meson.is_cross_build() From 227df3966a712b6c32a6cf29bb5e52af31818adc Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sat, 29 Aug 2020 13:58:16 +0400 Subject: [PATCH 12/29] zim::ReaderDataStreamWrapper --- src/readerdatastreamwrapper.h | 52 ++++++++++++++++++++++++++++ test/meson.build | 3 +- test/readerdatastreamwrapper.cpp | 58 ++++++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 src/readerdatastreamwrapper.h create mode 100644 test/readerdatastreamwrapper.cpp diff --git a/src/readerdatastreamwrapper.h b/src/readerdatastreamwrapper.h new file mode 100644 index 000000000..85aeb7395 --- /dev/null +++ b/src/readerdatastreamwrapper.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZIM_READERDATASTREAMWRAPPER_H +#define ZIM_READERDATASTREAMWRAPPER_H + +#include "idatastream.h" +#include "reader.h" +#include "debug.h" + +namespace zim +{ + +class ReaderDataStreamWrapper : public IDataStream +{ +public: // functions + explicit ReaderDataStreamWrapper(const zim::Reader* reader) + : reader_(*reader) + , readerPos_(0) + {} + + void readImpl(void* buf, size_t nbytes) override + { + ASSERT(readerPos_.v + nbytes, <=, reader_.size().v); + reader_.read(static_cast(buf), readerPos_, zsize_t(nbytes)); + readerPos_ += nbytes; + } + +private: // data + const Reader& reader_; + offset_t readerPos_; +}; + +} // namespace zim + +#endif // ZIM_READERDATASTREAMWRAPPER_H diff --git a/test/meson.build b/test/meson.build index 6e0d743ed..6eb91f795 100644 --- a/test/meson.build +++ b/test/meson.build @@ -13,7 +13,8 @@ tests = [ 'compression', 'impl_find', 'istreamreader', - 'decoderstreamreader' + 'decoderstreamreader', + 'readerdatastreamwrapper' ] if gtest_dep.found() and not meson.is_cross_build() diff --git a/test/readerdatastreamwrapper.cpp b/test/readerdatastreamwrapper.cpp new file mode 100644 index 000000000..91fa6389f --- /dev/null +++ b/test/readerdatastreamwrapper.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "readerdatastreamwrapper.h" +#include "buffer.h" +#include "file_reader.h" + +#include "gtest/gtest.h" + +namespace +{ + +using namespace zim; + +std::shared_ptr memoryViewBuffer(const char* str, size_t size) +{ + return std::make_shared(str, zsize_t(size)); +} + +std::string toString(const IDataStream::Blob& blob) +{ + return std::string(blob.data(), blob.size()); +} + +TEST(ReaderDataStreamWrapper, shouldJustWork) +{ + char data[] = "abcdefghijklmnopqrstuvwxyz"; + toLittleEndian(uint32_t(1234), data); + toLittleEndian(int64_t(-987654321), data+18); + + const BufferReader bufReader(memoryViewBuffer(data, sizeof(data))); + const Reader* readerPtr = &bufReader; + + ReaderDataStreamWrapper rdsw(readerPtr); + + ASSERT_EQ(1234, rdsw.read()); + ASSERT_EQ("efgh", toString(rdsw.readBlob(4))); + ASSERT_EQ("ijklmnopqr", toString(rdsw.readBlob(10))); + ASSERT_EQ(-987654321, rdsw.read()); +} + +} // unnamed namespace From 9c469f83130439930b84142b266cbbe01ded8da4 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 15 Sep 2020 10:35:51 +0200 Subject: [PATCH 13/29] Adapt RawStreamReader to wrap a reader. --- ...rdatastreamwrapper.h => rawstreamreader.h} | 33 +++++++++++-------- test/meson.build | 2 +- ...astreamwrapper.cpp => rawstreamreader.cpp} | 26 +++++++-------- 3 files changed, 32 insertions(+), 29 deletions(-) rename src/{readerdatastreamwrapper.h => rawstreamreader.h} (60%) rename test/{readerdatastreamwrapper.cpp => rawstreamreader.cpp} (63%) diff --git a/src/readerdatastreamwrapper.h b/src/rawstreamreader.h similarity index 60% rename from src/readerdatastreamwrapper.h rename to src/rawstreamreader.h index 85aeb7395..20b1d0182 100644 --- a/src/readerdatastreamwrapper.h +++ b/src/rawstreamreader.h @@ -17,34 +17,41 @@ * */ -#ifndef ZIM_READERDATASTREAMWRAPPER_H -#define ZIM_READERDATASTREAMWRAPPER_H +#ifndef ZIM_RAWSTREAMREADER_H +#define ZIM_RAWSTREAMREADER_H -#include "idatastream.h" +#include "istreamreader.h" #include "reader.h" #include "debug.h" namespace zim { -class ReaderDataStreamWrapper : public IDataStream +class RawStreamReader : public IStreamReader { public: // functions - explicit ReaderDataStreamWrapper(const zim::Reader* reader) - : reader_(*reader) - , readerPos_(0) + explicit RawStreamReader(std::shared_ptr reader) + : m_reader(reader), + m_readerPos(0) {} - void readImpl(void* buf, size_t nbytes) override + void readImpl(char* buf, zsize_t nbytes) override { - ASSERT(readerPos_.v + nbytes, <=, reader_.size().v); - reader_.read(static_cast(buf), readerPos_, zsize_t(nbytes)); - readerPos_ += nbytes; + m_reader->read(static_cast(buf), m_readerPos, zsize_t(nbytes)); + m_readerPos += nbytes; } + std::unique_ptr sub_reader(zsize_t nbytes) override + { + auto reader = m_reader->sub_reader(m_readerPos, nbytes); + m_readerPos += nbytes; + return reader; + } + + private: // data - const Reader& reader_; - offset_t readerPos_; + std::shared_ptr m_reader; + offset_t m_readerPos; }; } // namespace zim diff --git a/test/meson.build b/test/meson.build index 6eb91f795..87662a878 100644 --- a/test/meson.build +++ b/test/meson.build @@ -14,7 +14,7 @@ tests = [ 'impl_find', 'istreamreader', 'decoderstreamreader', - 'readerdatastreamwrapper' + 'rawstreamreader' ] if gtest_dep.found() and not meson.is_cross_build() diff --git a/test/readerdatastreamwrapper.cpp b/test/rawstreamreader.cpp similarity index 63% rename from test/readerdatastreamwrapper.cpp rename to test/rawstreamreader.cpp index 91fa6389f..a79268912 100644 --- a/test/readerdatastreamwrapper.cpp +++ b/test/rawstreamreader.cpp @@ -17,7 +17,7 @@ * */ -#include "readerdatastreamwrapper.h" +#include "rawstreamreader.h" #include "buffer.h" #include "file_reader.h" @@ -28,14 +28,9 @@ namespace using namespace zim; -std::shared_ptr memoryViewBuffer(const char* str, size_t size) +std::string toString(const Buffer& buffer) { - return std::make_shared(str, zsize_t(size)); -} - -std::string toString(const IDataStream::Blob& blob) -{ - return std::string(blob.data(), blob.size()); + return std::string(buffer.data(), buffer.size().v); } TEST(ReaderDataStreamWrapper, shouldJustWork) @@ -44,15 +39,16 @@ TEST(ReaderDataStreamWrapper, shouldJustWork) toLittleEndian(uint32_t(1234), data); toLittleEndian(int64_t(-987654321), data+18); - const BufferReader bufReader(memoryViewBuffer(data, sizeof(data))); - const Reader* readerPtr = &bufReader; + auto reader = std::make_shared(Buffer::makeBuffer(data, zsize_t(sizeof(data)))); - ReaderDataStreamWrapper rdsw(readerPtr); + RawStreamReader rdr(reader); - ASSERT_EQ(1234, rdsw.read()); - ASSERT_EQ("efgh", toString(rdsw.readBlob(4))); - ASSERT_EQ("ijklmnopqr", toString(rdsw.readBlob(10))); - ASSERT_EQ(-987654321, rdsw.read()); + ASSERT_EQ(1234, rdr.read()); + auto subbuffer = rdr.sub_reader(zsize_t(4))->get_buffer(offset_t(0), zsize_t(4)); + ASSERT_EQ("efgh", toString(subbuffer)); + subbuffer = rdr.sub_reader(zsize_t(10))->get_buffer(offset_t(0), zsize_t(10)); + ASSERT_EQ("ijklmnopqr", toString(subbuffer)); + ASSERT_EQ(-987654321, rdr.read()); } } // unnamed namespace From b8f3eb7df820df74e6be60a9667e9ebb38b2a8f9 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 30 Aug 2020 14:19:26 +0400 Subject: [PATCH 14/29] Enter BufDataStream --- src/bufdatastream.h | 51 ++++++++++++++++++++++++++++++++++++++++++ src/istreamreader.cpp | 39 ++++++++++++++++++++++++++++++++ test/istreamreader.cpp | 37 ++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 src/bufdatastream.h diff --git a/src/bufdatastream.h b/src/bufdatastream.h new file mode 100644 index 000000000..d56ca1811 --- /dev/null +++ b/src/bufdatastream.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZIM_BUFDATASTREAM_H +#define ZIM_BUFDATASTREAM_H + +#include "idatastream.h" +#include "debug.h" + +#include + +namespace zim +{ + +class BufDataStream : public IDataStream +{ +public: // functions + explicit BufDataStream(const char* data, size_t size) + : data_(data) + , size_(size) + {} + +protected: + void readImpl(void* buf, size_t nbytes) override; + Blob readBlobImpl(size_t size) override; + + +private: // data + const char* data_; + size_t size_; +}; + +} // namespace zim + +#endif // ZIM_BUFDATASTREAM_H diff --git a/src/istreamreader.cpp b/src/istreamreader.cpp index 889cc901b..8a5e33e69 100644 --- a/src/istreamreader.cpp +++ b/src/istreamreader.cpp @@ -19,10 +19,15 @@ #include "istreamreader.h" #include "file_reader.h" +#include "bufdatastream.h" namespace zim { +//////////////////////////////////////////////////////////////////////////////// +// IDataStream +//////////////////////////////////////////////////////////////////////////////// + std::unique_ptr IStreamReader::sub_reader(zsize_t size) { @@ -31,4 +36,38 @@ IStreamReader::sub_reader(zsize_t size) return std::unique_ptr(new BufferReader(buffer)); } +//////////////////////////////////////////////////////////////////////////////// +// BufDataStream +//////////////////////////////////////////////////////////////////////////////// + +void +BufDataStream::readImpl(void* buf, size_t nbytes) +{ + ASSERT(nbytes, <=, size_); + memcpy(buf, data_, nbytes); + data_ += nbytes; + size_ -= nbytes; +} + +namespace +{ + +struct NoDelete +{ + template void operator()(T*) {} +}; + +} // unnamed namespace + +IDataStream::Blob +BufDataStream::readBlobImpl(size_t nbytes) +{ + ASSERT(nbytes, <=, size_); + const IDataStream::Blob::DataPtr dataPtr(data_, NoDelete()); + const IDataStream::Blob blob(dataPtr, nbytes); + data_ += nbytes; + size_ -= nbytes; + return blob; +} + } // namespace zim diff --git a/test/istreamreader.cpp b/test/istreamreader.cpp index 50786c831..cf007b32d 100644 --- a/test/istreamreader.cpp +++ b/test/istreamreader.cpp @@ -18,6 +18,8 @@ */ #include "istreamreader.h" +#include "bufdatastream.h" +#include "endian_tools.h" #include "gtest/gtest.h" @@ -26,6 +28,10 @@ namespace using zim::IStreamReader; +//////////////////////////////////////////////////////////////////////////////// +// IDataStream +//////////////////////////////////////////////////////////////////////////////// + // Implement the IStreamReader interface in the simplest way class InfiniteZeroStream : public IStreamReader { @@ -58,4 +64,35 @@ TEST(IStreamReader, sub_reader) EXPECT_EQ(0, memcmp(buffer.data(), zerobuf, N)); } +//////////////////////////////////////////////////////////////////////////////// +// BufDataStream +//////////////////////////////////////////////////////////////////////////////// + +std::string toString(const IDataStream::Blob& blob) +{ + return std::string(blob.data(), blob.size()); +} + +TEST(BufDataStream, shouldJustWork) +{ + char data[] = "abcdefghijklmnopqrstuvwxyz"; + zim::toLittleEndian(uint32_t(1234), data); + zim::toLittleEndian(int64_t(-987654321), data+18); + + zim::BufDataStream bds(data, sizeof(data)); + IDataStream& ids = bds; + + ASSERT_EQ(1234, ids.read()); + + const auto blob1 = ids.readBlob(4); + ASSERT_EQ("efgh", toString(blob1)); + ASSERT_EQ(data + 4, blob1.data()); + + const auto blob2 = ids.readBlob(10); + ASSERT_EQ("ijklmnopqr", toString(blob2)); + ASSERT_EQ(data + 8, blob2.data()); + + ASSERT_EQ(-987654321, ids.read()); +} + } // unnamed namespace From d7960853e772a0ac3ac77b7cb06bb5b44ef1fc0b Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 16 Sep 2020 10:59:27 +0200 Subject: [PATCH 15/29] Adapt BufferStreamer to wrap a `Buffer` instead of raw data. BufferStreamer will not be use "in place" of a IStreamReader and it doesn't share the same methods. So we don't need it to inherint the `IStreamReader`. --- src/bufdatastream.h | 51 --------------------------- src/bufferstreamer.h | 78 +++++++++++++++++++++++++++++++++++++++++ src/istreamreader.cpp | 35 ------------------ test/bufferstreamer.cpp | 64 +++++++++++++++++++++++++++++++++ test/istreamreader.cpp | 34 +----------------- test/meson.build | 3 +- 6 files changed, 145 insertions(+), 120 deletions(-) delete mode 100644 src/bufdatastream.h create mode 100644 src/bufferstreamer.h create mode 100644 test/bufferstreamer.cpp diff --git a/src/bufdatastream.h b/src/bufdatastream.h deleted file mode 100644 index d56ca1811..000000000 --- a/src/bufdatastream.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (C) 2020 Veloman Yunkan - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and - * NON-INFRINGEMENT. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#ifndef ZIM_BUFDATASTREAM_H -#define ZIM_BUFDATASTREAM_H - -#include "idatastream.h" -#include "debug.h" - -#include - -namespace zim -{ - -class BufDataStream : public IDataStream -{ -public: // functions - explicit BufDataStream(const char* data, size_t size) - : data_(data) - , size_(size) - {} - -protected: - void readImpl(void* buf, size_t nbytes) override; - Blob readBlobImpl(size_t size) override; - - -private: // data - const char* data_; - size_t size_; -}; - -} // namespace zim - -#endif // ZIM_BUFDATASTREAM_H diff --git a/src/bufferstreamer.h b/src/bufferstreamer.h new file mode 100644 index 000000000..3b3d7752c --- /dev/null +++ b/src/bufferstreamer.h @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZIM_BUFFERSTREAMER_H +#define ZIM_BUFFERSTREAMER_H + +#include "debug.h" + +#include + +namespace zim +{ + +class BufferStreamer +{ +public: // functions + explicit BufferStreamer(const Buffer& buffer, zsize_t size) + : m_buffer(buffer), + m_current(buffer.data()), + m_size(size) + {} + + explicit BufferStreamer(const Buffer& buffer) + : BufferStreamer(buffer, buffer.size()) + {} + + // Reads a value of the said type from the stream + // + // For best portability this function should be used with types of known + // bit-width (int32_t, uint16_t, etc) rather than builtin types with + // unknown bit-width (int, unsigned, etc). + template T read() + { + const size_t N(sizeof(T)); + char buf[N]; + memcpy(buf, m_current, N); + skip(zsize_t(N)); + return fromLittleEndian(buf); // XXX: This handles only integral types + } + + const char* current() const { + return m_current; + } + + zsize_t left() const { + return m_size; + } + + void skip(zsize_t nbBytes) { + m_current += nbBytes.v; + m_size -= nbBytes; + } + +private: // data + const Buffer m_buffer; + const char* m_current; + zsize_t m_size; +}; + +} // namespace zim + +#endif // ZIM_BUFDATASTREAM_H diff --git a/src/istreamreader.cpp b/src/istreamreader.cpp index 8a5e33e69..7c319681f 100644 --- a/src/istreamreader.cpp +++ b/src/istreamreader.cpp @@ -19,7 +19,6 @@ #include "istreamreader.h" #include "file_reader.h" -#include "bufdatastream.h" namespace zim { @@ -36,38 +35,4 @@ IStreamReader::sub_reader(zsize_t size) return std::unique_ptr(new BufferReader(buffer)); } -//////////////////////////////////////////////////////////////////////////////// -// BufDataStream -//////////////////////////////////////////////////////////////////////////////// - -void -BufDataStream::readImpl(void* buf, size_t nbytes) -{ - ASSERT(nbytes, <=, size_); - memcpy(buf, data_, nbytes); - data_ += nbytes; - size_ -= nbytes; -} - -namespace -{ - -struct NoDelete -{ - template void operator()(T*) {} -}; - -} // unnamed namespace - -IDataStream::Blob -BufDataStream::readBlobImpl(size_t nbytes) -{ - ASSERT(nbytes, <=, size_); - const IDataStream::Blob::DataPtr dataPtr(data_, NoDelete()); - const IDataStream::Blob blob(dataPtr, nbytes); - data_ += nbytes; - size_ -= nbytes; - return blob; -} - } // namespace zim diff --git a/test/bufferstreamer.cpp b/test/bufferstreamer.cpp new file mode 100644 index 000000000..0b63a8b95 --- /dev/null +++ b/test/bufferstreamer.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "buffer.h" +#include "bufferstreamer.h" +#include "endian_tools.h" + +#include "gtest/gtest.h" + +namespace +{ + +using namespace zim; + +//////////////////////////////////////////////////////////////////////////////// +// BufferStreamer +//////////////////////////////////////////////////////////////////////////////// + +std::string toString(const Buffer& buffer) +{ + return std::string(buffer.data(), buffer.size().v); +} + +TEST(BufferStreamer, shouldJustWork) +{ + char data[] = "abcdefghijklmnopqrstuvwxyz"; + zim::toLittleEndian(uint32_t(1234), data); + zim::toLittleEndian(int64_t(-987654321), data+18); + + auto buffer = Buffer::makeBuffer(data, zsize_t(sizeof(data))); + zim::BufferStreamer bds(buffer, zsize_t(sizeof(data))); + + ASSERT_EQ(1234, bds.read()); + + ASSERT_EQ(data + 4, bds.current()); + const auto blob1 = std::string(bds.current(), 4); + bds.skip(zsize_t(4)); + ASSERT_EQ("efgh", blob1); + + ASSERT_EQ(data + 8, bds.current()); + const auto blob2 = std::string(bds.current(), 10); + bds.skip(zsize_t(10)); + ASSERT_EQ("ijklmnopqr", blob2); + + ASSERT_EQ(-987654321, bds.read()); +} + +} // unnamed namespace diff --git a/test/istreamreader.cpp b/test/istreamreader.cpp index cf007b32d..2a913c629 100644 --- a/test/istreamreader.cpp +++ b/test/istreamreader.cpp @@ -18,7 +18,6 @@ */ #include "istreamreader.h" -#include "bufdatastream.h" #include "endian_tools.h" #include "gtest/gtest.h" @@ -26,7 +25,7 @@ namespace { -using zim::IStreamReader; +using namespace zim; //////////////////////////////////////////////////////////////////////////////// // IDataStream @@ -64,35 +63,4 @@ TEST(IStreamReader, sub_reader) EXPECT_EQ(0, memcmp(buffer.data(), zerobuf, N)); } -//////////////////////////////////////////////////////////////////////////////// -// BufDataStream -//////////////////////////////////////////////////////////////////////////////// - -std::string toString(const IDataStream::Blob& blob) -{ - return std::string(blob.data(), blob.size()); -} - -TEST(BufDataStream, shouldJustWork) -{ - char data[] = "abcdefghijklmnopqrstuvwxyz"; - zim::toLittleEndian(uint32_t(1234), data); - zim::toLittleEndian(int64_t(-987654321), data+18); - - zim::BufDataStream bds(data, sizeof(data)); - IDataStream& ids = bds; - - ASSERT_EQ(1234, ids.read()); - - const auto blob1 = ids.readBlob(4); - ASSERT_EQ("efgh", toString(blob1)); - ASSERT_EQ(data + 4, blob1.data()); - - const auto blob2 = ids.readBlob(10); - ASSERT_EQ("ijklmnopqr", toString(blob2)); - ASSERT_EQ(data + 8, blob2.data()); - - ASSERT_EQ(-987654321, ids.read()); -} - } // unnamed namespace diff --git a/test/meson.build b/test/meson.build index 87662a878..421479fee 100644 --- a/test/meson.build +++ b/test/meson.build @@ -14,7 +14,8 @@ tests = [ 'impl_find', 'istreamreader', 'decoderstreamreader', - 'rawstreamreader' + 'rawstreamreader', + 'bufferstreamer' ] if gtest_dep.found() and not meson.is_cross_build() From 480780af1dfef451d1e1433fd94d74b2130325c7 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sat, 29 Aug 2020 23:45:23 +0400 Subject: [PATCH 16/29] Got rid of read_size() in cluster.cpp --- src/cluster.cpp | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/src/cluster.cpp b/src/cluster.cpp index dd300e991..c9f7a46da 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -41,23 +41,6 @@ namespace zim namespace { -template -zsize_t _read_size(const Reader* reader, offset_t offset) -{ - OFFSET_TYPE blob_offset = reader->read_uint(offset); - auto off = offset+offset_t(blob_offset-sizeof(OFFSET_TYPE)); - auto s = reader->read_uint(off); - return zsize_t(s); -} - -zsize_t read_size(const Reader* reader, bool isExtended, offset_t offset) -{ - if (isExtended) - return _read_size(reader, offset); - else - return _read_size(reader, offset); -} - const Buffer getClusterBuffer(const Reader& zimReader, offset_t offset, CompressionType comp) { @@ -95,9 +78,8 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp case zimcompDefault: case zimcompNone: { - auto size = read_size(&zimReader, *extended, offset + offset_t(1)); // No compression, just a sub_reader - return zimReader.sub_reader(offset+offset_t(1), size); + return zimReader.sub_reader(offset+offset_t(1)); } break; case zimcompLzma: @@ -137,7 +119,7 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp } else { startOffset = read_header(); } - reader = reader->sub_reader(startOffset); + reader = reader->sub_reader(startOffset, zsize_t(offsets.back().v)); auto d1 = reader->offset(); ASSERT(d+startOffset, ==, d1); } @@ -147,8 +129,7 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp offset_t Cluster::read_header() { // read first offset, which specifies, how many offsets we need to read - OFFSET_TYPE offset; - offset = reader->read_uint(offset_t(0)); + OFFSET_TYPE offset = reader->read_uint(offset_t(0)); size_t n_offset = offset / sizeof(OFFSET_TYPE); const offset_t data_address(offset); @@ -170,7 +151,6 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp offsets.push_back(offset_t(offset - data_address.v)); current += sizeof(OFFSET_TYPE); } - ASSERT(offset, ==, reader->size().v); return data_address; } From 1b5f8e7907b3614ae2af71678d2fc7ec240c1d5d Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 15 Sep 2020 17:38:40 +0200 Subject: [PATCH 17/29] Make the Cluster use `IStreamReader`. The cluster now internally use a `IStreamReader` to read the cluster data. As we are reading the data sequentially, we have to store the previous blob's data in a vector. As we don't want to read/mmap the data of the blob we won't read, we don't store a buffer but a reader. Depending of the `IStreamReader`, the stored readers may be : - A BufferReader (on the decompressed data). We have no choice here, we need to store the data if we don't want to decompress the data twice. - A FileReader (on uncompressed data). This is a lightweight object with just a offset to the data to read. The data will be read only when we really want to access the blob. --- src/cluster.cpp | 127 ++++++++++++++++++++---------------------------- src/cluster.h | 32 ++++++------ 2 files changed, 72 insertions(+), 87 deletions(-) diff --git a/src/cluster.cpp b/src/cluster.cpp index c9f7a46da..9136ca1d8 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -22,6 +22,9 @@ #include #include "file_reader.h" #include "endian_tools.h" +#include "bufferstreamer.h" +#include "decoderstreamreader.h" +#include "rawstreamreader.h" #include #include #include @@ -41,55 +44,28 @@ namespace zim namespace { -const Buffer -getClusterBuffer(const Reader& zimReader, offset_t offset, CompressionType comp) -{ - zsize_t uncompressed_size(0); - std::unique_ptr uncompressed_data; - switch (comp) { - case zimcompLzma: - uncompressed_data = uncompress(&zimReader, offset, &uncompressed_size); - break; - case zimcompZip: -#if defined(ENABLE_ZLIB) - uncompressed_data = uncompress(&zimReader, offset, &uncompressed_size); -#else - throw std::runtime_error("zlib not enabled in this library"); -#endif - break; - case zimcompZstd: - uncompressed_data = uncompress(&zimReader, offset, &uncompressed_size); - break; - default: - throw std::logic_error("compressions should not be something else than zimcompLzma, zimComZip or zimcompZstd."); - } - auto shared_data = std::shared_ptr(uncompressed_data.release(), std::default_delete()); - return Buffer::makeBuffer(shared_data, uncompressed_size); -} - -std::unique_ptr +std::unique_ptr getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp, bool* extended) { uint8_t clusterInfo = zimReader.read(offset); *comp = static_cast(clusterInfo & 0x0F); *extended = clusterInfo & 0x10; + auto subReader = std::shared_ptr(zimReader.sub_reader(offset+offset_t(1))); switch (*comp) { case zimcompDefault: case zimcompNone: - { - // No compression, just a sub_reader - return zimReader.sub_reader(offset+offset_t(1)); - } - break; + return std::unique_ptr(new RawStreamReader(subReader)); case zimcompLzma: - case zimcompZip: + return std::unique_ptr(new DecoderStreamReader(subReader)); case zimcompZstd: - { - auto buffer = getClusterBuffer(zimReader, offset+offset_t(1), *comp); - return std::unique_ptr(new BufferReader(buffer)); - } - break; + return std::unique_ptr(new DecoderStreamReader(subReader)); + case zimcompZip: +#if defined(ENABLE_ZLIB) + return std::unique_ptr(new DecoderStreamReader(subReader)); +#else + throw std::runtime_error("zlib not enabled in this library"); +#endif case zimcompBzip2: throw std::runtime_error("bzip2 not enabled in this library"); default: @@ -103,72 +79,78 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp { CompressionType comp; bool extended; - std::shared_ptr reader = getClusterReader(zimReader, clusterOffset, &comp, &extended); - return std::make_shared(reader, comp, extended); + auto reader = getClusterReader(zimReader, clusterOffset, &comp, &extended); + return std::make_shared(std::move(reader), comp, extended); } - Cluster::Cluster(std::shared_ptr reader_, CompressionType comp, bool isExtended) + Cluster::Cluster(std::unique_ptr reader_, CompressionType comp, bool isExtended) : compression(comp), isExtended(isExtended), - reader(reader_), - startOffset(0) + m_reader(std::move(reader_)) { - auto d = reader->offset(); if (isExtended) { - startOffset = read_header(); + read_header(); } else { - startOffset = read_header(); + read_header(); } - reader = reader->sub_reader(startOffset, zsize_t(offsets.back().v)); - auto d1 = reader->offset(); - ASSERT(d+startOffset, ==, d1); } /* This return the number of char read */ template - offset_t Cluster::read_header() + void Cluster::read_header() { // read first offset, which specifies, how many offsets we need to read - OFFSET_TYPE offset = reader->read_uint(offset_t(0)); + OFFSET_TYPE offset = m_reader->read(); size_t n_offset = offset / sizeof(OFFSET_TYPE); const offset_t data_address(offset); // read offsets - offsets.clear(); - offsets.reserve(n_offset); - offsets.push_back(offset_t(0)); - - auto buffer = reader->get_buffer(offset_t(0), zsize_t(offset)); - offset_t current = offset_t(sizeof(OFFSET_TYPE)); + m_blobOffsets.clear(); + m_blobOffsets.reserve(n_offset); + m_blobOffsets.push_back(offset_t(offset)); + + // Get the whole offsets data to avoid to many (system) call. + auto bufferSize = zsize_t(offset-sizeof(OFFSET_TYPE)); + auto buffer = m_reader->sub_reader(bufferSize)->get_buffer(offset_t(0), bufferSize); + auto seqReader = BufferStreamer(buffer, bufferSize); while (--n_offset) { - OFFSET_TYPE new_offset = buffer.as(current); + OFFSET_TYPE new_offset = seqReader.read(); ASSERT(new_offset, >=, offset); - ASSERT(new_offset, <=, reader->size().v); + m_blobOffsets.push_back(offset_t(new_offset)); offset = new_offset; - offsets.push_back(offset_t(offset - data_address.v)); - current += sizeof(OFFSET_TYPE); } - return data_address; } zsize_t Cluster::getBlobSize(blob_index_t n) const { - if (blob_index_type(n)+1 >= offsets.size()) throw ZimFileFormatError("blob index out of range"); - return zsize_t(offsets[blob_index_type(n)+1].v - offsets[blob_index_type(n)].v); + if (blob_index_type(n)+1 >= m_blobOffsets.size()) { + throw ZimFileFormatError("blob index out of range"); + } + return zsize_t(m_blobOffsets[blob_index_type(n)+1].v - m_blobOffsets[blob_index_type(n)].v); } - Blob Cluster::getBlob(blob_index_t n) const + const Reader& Cluster::getReader(blob_index_t n) const { - if (n < count()) { - auto blobSize = getBlobSize(n); + std::lock_guard lock(m_readerAccessMutex); + for(blob_index_type current(m_blobReaders.size()); current<=n.v; ++current) { + auto blobSize = getBlobSize(blob_index_t(current)); if (blobSize.v > SIZE_MAX) { - return Blob(); + m_blobReaders.push_back(std::unique_ptr(new BufferReader(Buffer::makeBuffer(zsize_t(0))))); + } else { + m_blobReaders.push_back(m_reader->sub_reader(blobSize)); } - auto buffer = reader->get_buffer(offsets[blob_index_type(n)], blobSize); - return buffer; + } + return *m_blobReaders[blob_index_type(n)]; + } + + Blob Cluster::getBlob(blob_index_t n) const + { + if (n < count()) { + const auto blobSize = getBlobSize(n); + return getReader(n).get_buffer(offset_t(0), blobSize); } else { return Blob(); } @@ -177,6 +159,7 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp Blob Cluster::getBlob(blob_index_t n, offset_t offset, zsize_t size) const { if (n < count()) { + const auto blobSize = getBlobSize(n); if ( offset.v > blobSize.v ) { return Blob(); @@ -185,9 +168,7 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp if (size.v > SIZE_MAX) { return Blob(); } - offset += offsets[blob_index_type(n)]; - auto buffer = reader->get_buffer(offset, size); - return buffer; + return getReader(n).get_buffer(offset, size); } else { return Blob(); } diff --git a/src/cluster.h b/src/cluster.h index cde3065a2..c26589130 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -27,6 +27,7 @@ #include #include #include +#include #include "zim_types.h" #include "zim/error.h" @@ -35,40 +36,43 @@ namespace zim { class Blob; class Reader; + class IStreamReader; class Cluster : public std::enable_shared_from_this { - typedef std::vector Offsets; + typedef std::vector BlobOffsets; + typedef std::vector> BlobReaders; public: const CompressionType compression; const bool isExtended; private: - std::shared_ptr reader; + std::unique_ptr m_reader; - // offset of the first blob of this cluster relative to the beginning - // of the (uncompressed) cluster data - offset_t startOffset; + // offsets of the blob boundaries relative to the start of the cluster data + // (*after* the first byte (clusterInfo)) + // For a cluster with N blobs, this collection contains N+1 entries. + // The start of the first blob and the end of the last blob are included. + BlobOffsets m_blobOffsets; + + mutable std::mutex m_readerAccessMutex; + mutable BlobReaders m_blobReaders; - // offsets of the blob boundaries relative to the start of the first - // blob in this cluster. For a cluster with N blobs, this collection - // contains N+1 entries - the start of the first blob (which is always - // 0) and the end of the last blob are also included. - Offsets offsets; template - offset_t read_header(); + void read_header(); + const Reader& getReader(blob_index_t n) const; public: - Cluster(std::shared_ptr reader, CompressionType comp, bool isExtended); + Cluster(std::unique_ptr reader, CompressionType comp, bool isExtended); CompressionType getCompression() const { return compression; } bool isCompressed() const { return compression != zimcompDefault && compression != zimcompNone; } - blob_index_t count() const { return blob_index_t(offsets.size() - 1); } + blob_index_t count() const { return blob_index_t(m_blobOffsets.size() - 1); } zsize_t getBlobSize(blob_index_t n) const; - offset_t getBlobOffset(blob_index_t n) const { return startOffset + offsets[blob_index_type(n)]; } + offset_t getBlobOffset(blob_index_t n) const { return m_blobOffsets[blob_index_type(n)]; } Blob getBlob(blob_index_t n) const; Blob getBlob(blob_index_t n, offset_t offset, zsize_t size) const; From 76c60b4e338a0c429619a49b90dd75fcea178e7f Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 15 Sep 2020 18:59:48 +0200 Subject: [PATCH 18/29] Make `Dirent` use BufferStreamer. --- src/dirent.cpp | 49 +++++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/src/dirent.cpp b/src/dirent.cpp index 32c08042d..3d4427653 100644 --- a/src/dirent.cpp +++ b/src/dirent.cpp @@ -20,6 +20,7 @@ #include "_dirent.h" #include #include "buffer.h" +#include "bufferstreamer.h" #include "endian_tools.h" #include "log.h" #include @@ -40,22 +41,20 @@ namespace zim Dirent::Dirent(const Buffer& bufferRef) : Dirent() { - const Buffer* const buffer = &bufferRef; - uint16_t mimeType = buffer->as(offset_t(0)); + BufferStreamer reader(bufferRef); + uint16_t mimeType = reader.read(); bool redirect = (mimeType == Dirent::redirectMimeType); bool linktarget = (mimeType == Dirent::linktargetMimeType); bool deleted = (mimeType == Dirent::deletedMimeType); - uint8_t extraLen = buffer->data()[2]; - char ns = buffer->data()[3]; - uint32_t version = buffer->as(offset_t(4)); + uint8_t extraLen = reader.read(); + char ns = reader.read(); + uint32_t version = reader.read(); setVersion(version); - offset_t current = offset_t(8); if (redirect) { - article_index_t redirectIndex(buffer->as(current)); - current += sizeof(article_index_t); + article_index_t redirectIndex(reader.read()); log_debug("redirectIndex=" << redirectIndex); @@ -70,10 +69,8 @@ namespace zim { log_debug("read article entry"); - uint32_t clusterNumber = buffer->as(current); - current += sizeof(uint32_t); - uint32_t blobNumber = buffer->as(current); - current += sizeof(uint32_t); + uint32_t clusterNumber = reader.read(); + uint32_t blobNumber = reader.read(); log_debug("mimeType=" << mimeType << " clusterNumber=" << clusterNumber << " blobNumber=" << blobNumber); @@ -86,30 +83,30 @@ namespace zim log_debug("read url, title and parameters"); - offset_type url_size = strnlen( - buffer->data(current), - buffer->size().v - current.v - extraLen + size_type url_size = strnlen( + reader.current(), + reader.left().v - extraLen ); - if (current.v + url_size >= buffer->size().v) { + if (url_size >= reader.left().v) { throw(InvalidSize()); } - url = std::string(buffer->data(current), url_size); - current += url_size + 1; + url = std::string(reader.current(), url_size); + reader.skip(zsize_t(url_size+1)); - offset_type title_size = strnlen( - buffer->data(current), - buffer->size().v - current.v - extraLen + size_type title_size = strnlen( + reader.current(), + reader.left().v - extraLen ); - if (current.v + title_size >= buffer->size().v) { + if (title_size >= reader.left().v) { throw(InvalidSize()); } - title = std::string(buffer->data(current), title_size); - current += title_size + 1; + title = std::string(reader.current(), title_size); + reader.skip(zsize_t(title_size+1)); - if (current.v + extraLen > buffer->size().v) { + if (extraLen > reader.left().v) { throw(InvalidSize()); } - parameter = std::string(buffer->data(current), extraLen); + parameter = std::string(reader.current(), extraLen); setUrl(ns, url); setTitle(title); From 9d358d4a11a6ce3af7e820f1ec135c287d2d591b Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 15 Sep 2020 19:00:13 +0200 Subject: [PATCH 19/29] Make FileHeader use `BufferStreamer`. --- include/zim/fileheader.h | 4 ++-- src/fileheader.cpp | 33 +++++++++++++++++++-------------- src/fileimpl.cpp | 2 +- test/header.cpp | 3 ++- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/include/zim/fileheader.h b/include/zim/fileheader.h index 019b52b90..002d201b2 100644 --- a/include/zim/fileheader.h +++ b/include/zim/fileheader.h @@ -33,7 +33,7 @@ namespace zim { - class Buffer; + class Reader; class Fileheader { public: @@ -72,7 +72,7 @@ namespace zim {} void write(int out_fd) const; - void read(const Buffer& buffer); + void read(const Reader& reader); // Do some sanity check, raise a ZimFileFormateError is // something is wrong. diff --git a/src/fileheader.cpp b/src/fileheader.cpp index 1bb465874..a24131219 100644 --- a/src/fileheader.cpp +++ b/src/fileheader.cpp @@ -23,6 +23,8 @@ #include #include "log.h" #include "endian_tools.h" +#include "reader.h" +#include "bufferstreamer.h" #include "buffer.h" #ifdef _WIN32 # include "io.h" @@ -62,9 +64,11 @@ namespace zim _write(out_fd, header, Fileheader::size); } - void Fileheader::read(const Buffer& buffer) + void Fileheader::read(const Reader& reader) { - uint32_t magicNumber = buffer.as(offset_t(0)); + auto buffer = reader.get_buffer(offset_t(0), zsize_t(Fileheader::size)); + auto seqReader = BufferStreamer(buffer); + uint32_t magicNumber = seqReader.read(); if (magicNumber != Fileheader::zimMagic) { log_error("invalid magic number " << magicNumber << " found - " @@ -72,7 +76,7 @@ namespace zim throw ZimFileFormatError("Invalid magic number"); } - uint16_t major_version = buffer.as(offset_t(4)); + uint16_t major_version = seqReader.read(); if (major_version != zimClassicMajorVersion && major_version != zimExtendedMajorVersion) { log_error("invalid zimfile major version " << major_version << " found - " @@ -81,21 +85,22 @@ namespace zim } setMajorVersion(major_version); - setMinorVersion(buffer.as(offset_t(6))); + setMinorVersion(seqReader.read()); Uuid uuid; - std::copy(buffer.data(offset_t(8)), buffer.data(offset_t(24)), uuid.data); + std::copy(seqReader.current(), seqReader.current()+16, uuid.data); + seqReader.skip(zsize_t(16)); setUuid(uuid); - setArticleCount(buffer.as(offset_t(24))); - setClusterCount(buffer.as(offset_t(28))); - setUrlPtrPos(buffer.as(offset_t(32))); - setTitleIdxPos(buffer.as(offset_t(40))); - setClusterPtrPos(buffer.as(offset_t(48))); - setMimeListPos(buffer.as(offset_t(56))); - setMainPage(buffer.as(offset_t(64))); - setLayoutPage(buffer.as(offset_t(68))); - setChecksumPos(buffer.as(offset_t(72))); + setArticleCount(seqReader.read()); + setClusterCount(seqReader.read()); + setUrlPtrPos(seqReader.read()); + setTitleIdxPos(seqReader.read()); + setClusterPtrPos(seqReader.read()); + setMimeListPos(seqReader.read()); + setMainPage(seqReader.read()); + setLayoutPage(seqReader.read()); + setChecksumPos(seqReader.read()); sanity_check(); } diff --git a/src/fileimpl.cpp b/src/fileimpl.cpp index a92c756cc..5bdf07d9c 100644 --- a/src/fileimpl.cpp +++ b/src/fileimpl.cpp @@ -78,7 +78,7 @@ offset_t readOffset(const Reader& reader, size_t idx) throw ZimFileFormatError("zim-file is too small to contain a header"); } try { - header.read(zimReader->get_buffer(offset_t(0), zsize_t(Fileheader::size))); + header.read(*zimReader); } catch (ZimFileFormatError& e) { throw e; } catch (...) { diff --git a/test/header.cpp b/test/header.cpp index 19b490695..2407421d5 100644 --- a/test/header.cpp +++ b/test/header.cpp @@ -35,6 +35,7 @@ #include #include "../src/buffer.h" +#include "../src/file_reader.h" #include "tempfile.h" @@ -82,7 +83,7 @@ TEST(HeaderTest, read_write_header) auto buffer = write_to_buffer(header); zim::Fileheader header2; - header2.read(buffer); + header2.read(zim::BufferReader(buffer)); ASSERT_EQ(header2.getUuid(), "1234567890abcdef"); ASSERT_EQ(header2.getArticleCount(), 4711U); From 8b83dc1808542ea54d1c27bbe8fa95794d5d5a6b Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 8 Sep 2020 14:50:10 +0400 Subject: [PATCH 20/29] Faster Blob/Buffer constructor for non-owned data case --- src/blob.cpp | 9 +++++++-- src/buffer.cpp | 7 ++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/blob.cpp b/src/blob.cpp index 18c24d142..db3ada263 100644 --- a/src/blob.cpp +++ b/src/blob.cpp @@ -32,16 +32,21 @@ struct NoDelete template void operator()(T*) {} }; +// This shared_ptr is used as a source object for the std::shared_ptr +// aliasing constructor (with the purpose of avoiding the control block +// allocation) for the case when the referred data must not be deleted. +static Blob::DataPtr nonOwnedDataPtr((char*)nullptr, NoDelete()); + } // unnamed namespace Blob::Blob() - : _data(0), + : _data(nonOwnedDataPtr), _size(0) {} Blob::Blob(const char* data, size_type size) - : _data(DataPtr(data, NoDelete())), + : _data(nonOwnedDataPtr, data), _size(size) { ASSERT(size, <, SIZE_MAX); diff --git a/src/buffer.cpp b/src/buffer.cpp index a089ea1cd..93d395faf 100644 --- a/src/buffer.cpp +++ b/src/buffer.cpp @@ -42,6 +42,11 @@ struct NoDelete template void operator()(T*) {} }; +// This shared_ptr is used as a source object for the std::shared_ptr +// aliasing constructor (with the purpose of avoiding the control block +// allocation) for the case when the referred data must not be deleted. +static Buffer::DataPtr nonOwnedDataPtr((char*)nullptr, NoDelete()); + } // unnamed namespace const Buffer Buffer::sub_buffer(offset_t offset, zsize_t size) const @@ -59,7 +64,7 @@ const Buffer Buffer::makeBuffer(const DataPtr& data, zsize_t size) const Buffer Buffer::makeBuffer(const char* data, zsize_t size) { - return Buffer(DataPtr(data, NoDelete()), size); + return Buffer(DataPtr(nonOwnedDataPtr, data), size); } Buffer Buffer::makeBuffer(zsize_t size) From 04c402090c75d13f60ba49b6fcaaf4ee101f06bf Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Thu, 17 Sep 2020 11:33:02 +0200 Subject: [PATCH 21/29] fixup! Adapt BufferStreamer to wrap a `Buffer` instead of raw data. --- src/bufferstreamer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bufferstreamer.h b/src/bufferstreamer.h index 3b3d7752c..ff447d97e 100644 --- a/src/bufferstreamer.h +++ b/src/bufferstreamer.h @@ -30,7 +30,7 @@ namespace zim class BufferStreamer { public: // functions - explicit BufferStreamer(const Buffer& buffer, zsize_t size) + BufferStreamer(const Buffer& buffer, zsize_t size) : m_buffer(buffer), m_current(buffer.data()), m_size(size) From 39533c78e088e1e8e8b673cf2cae76787701444c Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Thu, 17 Sep 2020 11:34:29 +0200 Subject: [PATCH 22/29] fixup! Adapt DecoderStreamReader to wrap a Reader instead of a InputStream. --- src/decoderstreamreader.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/decoderstreamreader.h b/src/decoderstreamreader.h index e151b9d25..99bdd3ca6 100644 --- a/src/decoderstreamreader.h +++ b/src/decoderstreamreader.h @@ -17,8 +17,8 @@ * */ -#ifndef ZIM_DECODECDATASTREAM_H -#define ZIM_DECODECDATASTREAM_H +#ifndef ZIM_DECODERSTREAMREADER_H +#define ZIM_DECODERSTREAMREADER_H #include "compression.h" #include "istreamreader.h" @@ -97,4 +97,4 @@ class DecoderStreamReader : public IStreamReader } // namespace zim -#endif // ZIM_DECODECDATASTREAM_H +#endif // ZIM_DECODERSTREAMREADER_H From 04843d955855a2d0769f29b74c2d46f036104d3d Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Thu, 17 Sep 2020 11:35:34 +0200 Subject: [PATCH 23/29] fixup! Adapt RawStreamReader to wrap a reader --- src/rawstreamreader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rawstreamreader.h b/src/rawstreamreader.h index 20b1d0182..aced150c1 100644 --- a/src/rawstreamreader.h +++ b/src/rawstreamreader.h @@ -37,7 +37,7 @@ class RawStreamReader : public IStreamReader void readImpl(char* buf, zsize_t nbytes) override { - m_reader->read(static_cast(buf), m_readerPos, zsize_t(nbytes)); + m_reader->read(buf, m_readerPos, zsize_t(nbytes)); m_readerPos += nbytes; } From 4672b1946b161fd7683a97b7254cdbfcc88d60a1 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Thu, 17 Sep 2020 14:08:04 +0200 Subject: [PATCH 24/29] Move `BufferReader` to its own file. --- src/buffer_reader.cpp | 70 ++++++++++++++++++++++++++++++++++++ src/buffer_reader.h | 47 ++++++++++++++++++++++++ src/cluster.cpp | 2 +- src/file_reader.cpp | 45 ----------------------- src/file_reader.h | 18 ---------- src/fileimpl.cpp | 2 +- src/istreamreader.cpp | 2 +- src/meson.build | 1 + test/cluster.cpp | 2 +- test/decoderstreamreader.cpp | 1 + test/header.cpp | 2 +- test/rawstreamreader.cpp | 2 +- 12 files changed, 125 insertions(+), 69 deletions(-) create mode 100644 src/buffer_reader.cpp create mode 100644 src/buffer_reader.h diff --git a/src/buffer_reader.cpp b/src/buffer_reader.cpp new file mode 100644 index 000000000..7d1e6ead8 --- /dev/null +++ b/src/buffer_reader.cpp @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2017 Matthieu Gautier + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include "buffer_reader.h" +#include "buffer.h" + +#include + +namespace zim { + +const Buffer BufferReader::get_buffer(offset_t offset, zsize_t size) const +{ + return source.sub_buffer(offset, size); +} + +std::unique_ptr BufferReader::sub_reader(offset_t offset, zsize_t size) const +{ + auto sub_buff = get_buffer(offset, size); + std::unique_ptr sub_read(new BufferReader(sub_buff)); + return sub_read; +} + +zsize_t BufferReader::size() const +{ + return source.size(); +} + +offset_t BufferReader::offset() const +{ + return offset_t((offset_type)(static_cast(source.data(offset_t(0))))); +} + + +void BufferReader::read(char* dest, offset_t offset, zsize_t size) const { + ASSERT(offset.v, <, source.size().v); + ASSERT(offset+offset_t(size.v), <=, offset_t(source.size().v)); + if (! size ) { + return; + } + memcpy(dest, source.data(offset), size.v); +} + + +char BufferReader::read(offset_t offset) const { + ASSERT(offset.v, <, source.size().v); + char dest; + dest = *source.data(offset); + return dest; +} + + +} // zim diff --git a/src/buffer_reader.h b/src/buffer_reader.h new file mode 100644 index 000000000..938aecc0b --- /dev/null +++ b/src/buffer_reader.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2017 Matthieu Gautier + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZIM_BUFFER_READER_H_ +#define ZIM_BUFFER_READER_H_ + +#include "reader.h" + +namespace zim { + +class BufferReader : public Reader { + public: + BufferReader(const Buffer& source) + : source(source) {} + virtual ~BufferReader() {}; + + zsize_t size() const; + offset_t offset() const; + + void read(char* dest, offset_t offset, zsize_t size) const; + char read(offset_t offset) const; + const Buffer get_buffer(offset_t offset, zsize_t size) const; + std::unique_ptr sub_reader(offset_t offset, zsize_t size) const; + + private: + const Buffer source; +}; + +}; + +#endif // ZIM_BUFFER_READER_H_ diff --git a/src/cluster.cpp b/src/cluster.cpp index 9136ca1d8..56de356c4 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -20,7 +20,7 @@ #include "cluster.h" #include #include -#include "file_reader.h" +#include "buffer_reader.h" #include "endian_tools.h" #include "bufferstreamer.h" #include "decoderstreamreader.h" diff --git a/src/file_reader.cpp b/src/file_reader.cpp index 8dd4e7ce3..1d31759f1 100644 --- a/src/file_reader.cpp +++ b/src/file_reader.cpp @@ -217,49 +217,4 @@ std::unique_ptr FileReader::sub_reader(offset_t offset, zsize_t si return std::unique_ptr(new FileReader(source, _offset+offset, size)); } - -//BufferReader::BufferReader(std::shared_ptr source) -// : source(source) {} - -const Buffer BufferReader::get_buffer(offset_t offset, zsize_t size) const -{ - return source.sub_buffer(offset, size); -} - -std::unique_ptr BufferReader::sub_reader(offset_t offset, zsize_t size) const -{ - auto sub_buff = get_buffer(offset, size); - std::unique_ptr sub_read(new BufferReader(sub_buff)); - return sub_read; -} - -zsize_t BufferReader::size() const -{ - return source.size(); -} - -offset_t BufferReader::offset() const -{ - return offset_t((offset_type)(static_cast(source.data(offset_t(0))))); -} - - -void BufferReader::read(char* dest, offset_t offset, zsize_t size) const { - ASSERT(offset.v, <, source.size().v); - ASSERT(offset+offset_t(size.v), <=, offset_t(source.size().v)); - if (! size ) { - return; - } - memcpy(dest, source.data(offset), size.v); -} - - -char BufferReader::read(offset_t offset) const { - ASSERT(offset.v, <, source.size().v); - char dest; - dest = *source.data(offset); - return dest; -} - - } // zim diff --git a/src/file_reader.h b/src/file_reader.h index 94656bdb0..dc0fed819 100644 --- a/src/file_reader.h +++ b/src/file_reader.h @@ -49,24 +49,6 @@ class FileReader : public Reader { zsize_t _size; }; -class BufferReader : public Reader { - public: - BufferReader(const Buffer& source) - : source(source) {} - virtual ~BufferReader() {}; - - zsize_t size() const; - offset_t offset() const; - - void read(char* dest, offset_t offset, zsize_t size) const; - char read(offset_t offset) const; - const Buffer get_buffer(offset_t offset, zsize_t size) const; - std::unique_ptr sub_reader(offset_t offset, zsize_t size) const; - - private: - const Buffer source; -}; - }; #endif // ZIM_FILE_READER_H_ diff --git a/src/fileimpl.cpp b/src/fileimpl.cpp index 5bdf07d9c..eacdd944f 100644 --- a/src/fileimpl.cpp +++ b/src/fileimpl.cpp @@ -21,7 +21,7 @@ #include #include "_dirent.h" #include "file_compound.h" -#include "file_reader.h" +#include "buffer_reader.h" #include #include #include diff --git a/src/istreamreader.cpp b/src/istreamreader.cpp index 7c319681f..9352bd5a4 100644 --- a/src/istreamreader.cpp +++ b/src/istreamreader.cpp @@ -18,7 +18,7 @@ */ #include "istreamreader.h" -#include "file_reader.h" +#include "buffer_reader.h" namespace zim { diff --git a/src/meson.build b/src/meson.build index 28caaf74e..57d963614 100644 --- a/src/meson.build +++ b/src/meson.build @@ -9,6 +9,7 @@ common_sources = [ # 'config.h', 'article.cpp', 'cluster.cpp', + 'buffer_reader.cpp', 'dirent.cpp', 'envvalue.cpp', 'file.cpp', diff --git a/test/cluster.cpp b/test/cluster.cpp index 7ce08d292..621b0acb7 100644 --- a/test/cluster.cpp +++ b/test/cluster.cpp @@ -50,7 +50,7 @@ #include "../src/cluster.h" #include "../src/file_part.h" #include "../src/file_compound.h" -#include "../src/file_reader.h" +#include "../src/buffer_reader.h" #include "../src/writer/cluster.h" #include "../src/endian_tools.h" #include "../src/config.h" diff --git a/test/decoderstreamreader.cpp b/test/decoderstreamreader.cpp index 3d64ee47c..417552a57 100644 --- a/test/decoderstreamreader.cpp +++ b/test/decoderstreamreader.cpp @@ -18,6 +18,7 @@ */ #include "decoderstreamreader.h" +#include "buffer_reader.h" #include "gtest/gtest.h" diff --git a/test/header.cpp b/test/header.cpp index 2407421d5..686a4b62f 100644 --- a/test/header.cpp +++ b/test/header.cpp @@ -35,7 +35,7 @@ #include #include "../src/buffer.h" -#include "../src/file_reader.h" +#include "../src/buffer_reader.h" #include "tempfile.h" diff --git a/test/rawstreamreader.cpp b/test/rawstreamreader.cpp index a79268912..ec1bdb41b 100644 --- a/test/rawstreamreader.cpp +++ b/test/rawstreamreader.cpp @@ -19,7 +19,7 @@ #include "rawstreamreader.h" #include "buffer.h" -#include "file_reader.h" +#include "buffer_reader.h" #include "gtest/gtest.h" From b3e64fed46fb3b8666e747ece61d45fd643000ff Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Thu, 17 Sep 2020 14:13:20 +0200 Subject: [PATCH 25/29] Remove `Buffer.as` method. --- src/buffer.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/buffer.h b/src/buffer.h index 53db43d48..299396e6a 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -50,14 +50,6 @@ class Buffer { } zsize_t size() const { return m_size; } const Buffer sub_buffer(offset_t offset, zsize_t size) const; - - template - T as(offset_t offset) const { - ASSERT(offset.v, <, m_size.v); - ASSERT(offset.v+sizeof(T), <=, m_size.v); - return fromLittleEndian(data(offset)); - } - operator Blob() const { return Blob(m_data, m_size.v); } private: // functions From 8a816f23c147cc8ab8382a0fdb700c5d8384b2b7 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 23 Sep 2020 14:01:37 +0200 Subject: [PATCH 26/29] fixup! Do not use external shared_ptr to keep buffer memo --- src/buffer.cpp | 9 --------- src/buffer.h | 1 - src/istreamreader.cpp | 2 +- test/cluster.cpp | 2 +- test/dirent.cpp | 2 +- test/header.cpp | 2 +- 6 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/buffer.cpp b/src/buffer.cpp index 93d395faf..692971125 100644 --- a/src/buffer.cpp +++ b/src/buffer.cpp @@ -85,13 +85,4 @@ Buffer::data(offset_t offset) const { return m_data.get() + offset.v; } -char* -Buffer::data(offset_t offset) { - ASSERT(offset.v, <=, m_size.v); - // We know we can do this cast as the only way to get a non const Buffer is - // to use the factory allocating the memory for us. - return const_cast(m_data.get() + offset.v); -} - - } //zim diff --git a/src/buffer.h b/src/buffer.h index 299396e6a..c9851dbd2 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -43,7 +43,6 @@ class Buffer { static Buffer makeBuffer(zsize_t size); const char* data(offset_t offset=offset_t(0)) const; - char* data(offset_t offset=offset_t(0)); char at(offset_t offset) const { return *(data(offset)); diff --git a/src/istreamreader.cpp b/src/istreamreader.cpp index 9352bd5a4..17e4b7b60 100644 --- a/src/istreamreader.cpp +++ b/src/istreamreader.cpp @@ -31,7 +31,7 @@ std::unique_ptr IStreamReader::sub_reader(zsize_t size) { auto buffer = Buffer::makeBuffer(size); - readImpl(buffer.data(), size); + readImpl(const_cast(buffer.data()), size); return std::unique_ptr(new BufferReader(buffer)); } diff --git a/test/cluster.cpp b/test/cluster.cpp index 621b0acb7..26cfb4a0c 100644 --- a/test/cluster.cpp +++ b/test/cluster.cpp @@ -72,7 +72,7 @@ zim::Buffer write_to_buffer(zim::writer::Cluster& cluster) auto buf = zim::Buffer::makeBuffer(zim::zsize_t(size)); lseek(tmp_fd, 0, SEEK_SET); - if (read(tmp_fd, buf.data(), size) == -1) + if (read(tmp_fd, const_cast(buf.data()), size) == -1) throw std::runtime_error("Cannot read"); return buf; } diff --git a/test/dirent.cpp b/test/dirent.cpp index 5888572a5..b190f8fec 100644 --- a/test/dirent.cpp +++ b/test/dirent.cpp @@ -54,7 +54,7 @@ const zim::Buffer write_to_buffer(zim::writer::Dirent& dirent) auto buf = zim::Buffer::makeBuffer(zim::zsize_t(size)); lseek(tmp_fd, 0, SEEK_SET); - if (read(tmp_fd, buf.data(), size) == -1) + if (read(tmp_fd, const_cast(buf.data()), size) == -1) throw std::runtime_error("Cannot read"); return buf; diff --git a/test/header.cpp b/test/header.cpp index 686a4b62f..a73f3d90f 100644 --- a/test/header.cpp +++ b/test/header.cpp @@ -53,7 +53,7 @@ zim::Buffer write_to_buffer(zim::Fileheader &header) auto buf = zim::Buffer::makeBuffer(zim::zsize_t(size)); lseek(tmp_fd, 0, SEEK_SET); - if (read(tmp_fd, buf.data(), size) == -1) + if (read(tmp_fd, const_cast(buf.data()), size) == -1) throw std::runtime_error("Cannot read"); return buf; } From 12218e2c30b41ba12d3ed5f5ad234f105052f308 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 23 Sep 2020 14:07:56 +0200 Subject: [PATCH 27/29] Rename tempfile.(cpp|h) to tools.(cpp|h) --- test/cluster.cpp | 2 +- test/dirent.cpp | 2 +- test/header.cpp | 2 +- test/meson.build | 2 +- test/{tempfile.cpp => tools.cpp} | 2 +- test/{tempfile.h => tools.h} | 6 +++--- test/zimfile.cpp | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) rename test/{tempfile.cpp => tools.cpp} (98%) rename test/{tempfile.h => tools.h} (96%) diff --git a/test/cluster.cpp b/test/cluster.cpp index 26cfb4a0c..a3b437d51 100644 --- a/test/cluster.cpp +++ b/test/cluster.cpp @@ -55,7 +55,7 @@ #include "../src/endian_tools.h" #include "../src/config.h" -#include "tempfile.h" +#include "tools.h" namespace { diff --git a/test/dirent.cpp b/test/dirent.cpp index b190f8fec..293b0977f 100644 --- a/test/dirent.cpp +++ b/test/dirent.cpp @@ -38,7 +38,7 @@ #include "../src/_dirent.h" #include "../src/writer/_dirent.h" -#include "tempfile.h" +#include "tools.h" namespace { diff --git a/test/header.cpp b/test/header.cpp index a73f3d90f..e786121cb 100644 --- a/test/header.cpp +++ b/test/header.cpp @@ -37,7 +37,7 @@ #include "../src/buffer.h" #include "../src/buffer_reader.h" -#include "tempfile.h" +#include "tools.h" namespace { diff --git a/test/meson.build b/test/meson.build index 421479fee..a9c571280 100644 --- a/test/meson.build +++ b/test/meson.build @@ -20,7 +20,7 @@ tests = [ if gtest_dep.found() and not meson.is_cross_build() foreach test_name : tests - test_exe = executable(test_name, [test_name+'.cpp', 'tempfile.cpp'], + test_exe = executable(test_name, [test_name+'.cpp', 'tools.cpp'], implicit_include_directories: false, include_directories : [include_directory, src_directory], link_with : libzim, diff --git a/test/tempfile.cpp b/test/tools.cpp similarity index 98% rename from test/tempfile.cpp rename to test/tools.cpp index 14ca625ef..b1d630c06 100644 --- a/test/tempfile.cpp +++ b/test/tools.cpp @@ -17,7 +17,7 @@ * */ -#include "tempfile.h" +#include "tools.h" #include diff --git a/test/tempfile.h b/test/tools.h similarity index 96% rename from test/tempfile.h rename to test/tools.h index a0b9c6444..10ba0400a 100644 --- a/test/tempfile.h +++ b/test/tools.h @@ -17,8 +17,8 @@ * */ -#ifndef ZIM_TEST_TEMPFILE_H -#define ZIM_TEST_TEMPFILE_H +#ifndef ZIM_TEST_TOOLS_H +#define ZIM_TEST_TOOLS_H #include @@ -72,4 +72,4 @@ class TempFile } // namespace zim -#endif // ZIM_TEST_TEMPFILE_H +#endif // ZIM_TEST_TOOLS_H diff --git a/test/zimfile.cpp b/test/zimfile.cpp index 752212421..ddcdc3afe 100644 --- a/test/zimfile.cpp +++ b/test/zimfile.cpp @@ -20,7 +20,7 @@ #include #include -#include "tempfile.h" +#include "tools.h" #include "../src/fs.h" #include "gtest/gtest.h" From f5e682d7f4707090cedb594bc1ba28c909900bdf Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 23 Sep 2020 14:16:15 +0200 Subject: [PATCH 28/29] Move `write_to_buffer` test function to a generic helper function. This avoid code duplication in tests. --- test/cluster.cpp | 22 +++++++--------------- test/dirent.cpp | 16 +--------------- test/header.cpp | 15 +-------------- test/tools.h | 19 +++++++++++++++++++ 4 files changed, 28 insertions(+), 44 deletions(-) diff --git a/test/cluster.cpp b/test/cluster.cpp index a3b437d51..35bf7ebb1 100644 --- a/test/cluster.cpp +++ b/test/cluster.cpp @@ -61,21 +61,7 @@ namespace { using zim::unittests::TempFile; - -zim::Buffer write_to_buffer(zim::writer::Cluster& cluster) -{ - const TempFile tmpFile("test_cluster"); - cluster.close(); - const auto tmp_fd = tmpFile.fd(); - cluster.write(tmp_fd); - auto size = lseek(tmp_fd, 0, SEEK_END); - - auto buf = zim::Buffer::makeBuffer(zim::zsize_t(size)); - lseek(tmp_fd, 0, SEEK_SET); - if (read(tmp_fd, const_cast(buf.data()), size) == -1) - throw std::runtime_error("Cannot read"); - return buf; -} +using zim::unittests::write_to_buffer; TEST(ClusterTest, create_cluster) { @@ -109,6 +95,7 @@ TEST(ClusterTest, read_write_cluster) cluster.addData(blob1.data(), zim::zsize_t(blob1.size())); cluster.addData(blob2.data(), zim::zsize_t(blob2.size())); + cluster.close(); auto buffer = write_to_buffer(cluster); const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0)); zim::Cluster& cluster2 = *cluster2shptr; @@ -128,6 +115,7 @@ TEST(ClusterTest, read_write_empty) cluster.addData(0, zim::zsize_t(0)); cluster.addData(0, zim::zsize_t(0)); + cluster.close(); auto buffer = write_to_buffer(cluster); const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0)); zim::Cluster& cluster2 = *cluster2shptr; @@ -152,6 +140,7 @@ TEST(ClusterTest, read_write_clusterZ) cluster.addData(blob1.data(), zim::zsize_t(blob1.size())); cluster.addData(blob2.data(), zim::zsize_t(blob2.size())); + cluster.close(); auto buffer = write_to_buffer(cluster); const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0)); zim::Cluster& cluster2 = *cluster2shptr; @@ -180,6 +169,7 @@ TEST(ClusterTest, read_write_clusterLzma) cluster.addData(blob1.data(), zim::zsize_t(blob1.size())); cluster.addData(blob2.data(), zim::zsize_t(blob2.size())); + cluster.close(); auto buffer = write_to_buffer(cluster); const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0)); zim::Cluster& cluster2 = *cluster2shptr; @@ -206,6 +196,7 @@ TEST(ClusterTest, read_write_clusterZstd) cluster.addData(blob1.data(), zim::zsize_t(blob1.size())); cluster.addData(blob2.data(), zim::zsize_t(blob2.size())); + cluster.close(); auto buffer = write_to_buffer(cluster); const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0)); zim::Cluster& cluster2 = *cluster2shptr; @@ -268,6 +259,7 @@ TEST(ClusterTest, read_write_extended_cluster) delete[] blob3; // MEM = 4GiB + cluster.close(); buffer = write_to_buffer(cluster); } } diff --git a/test/dirent.cpp b/test/dirent.cpp index 293b0977f..9adfab396 100644 --- a/test/dirent.cpp +++ b/test/dirent.cpp @@ -44,21 +44,7 @@ namespace { using zim::unittests::TempFile; - -const zim::Buffer write_to_buffer(zim::writer::Dirent& dirent) -{ - const TempFile tmpFile("test_dirent"); - const auto tmp_fd = tmpFile.fd(); - dirent.write(tmp_fd); - auto size = lseek(tmp_fd, 0, SEEK_END); - - auto buf = zim::Buffer::makeBuffer(zim::zsize_t(size)); - lseek(tmp_fd, 0, SEEK_SET); - if (read(tmp_fd, const_cast(buf.data()), size) == -1) - throw std::runtime_error("Cannot read"); - - return buf; -} +using zim::unittests::write_to_buffer; size_t writenDirentSize(const zim::writer::Dirent& dirent) { diff --git a/test/header.cpp b/test/header.cpp index e786121cb..814c2e120 100644 --- a/test/header.cpp +++ b/test/header.cpp @@ -43,20 +43,7 @@ namespace { using zim::unittests::TempFile; - -zim::Buffer write_to_buffer(zim::Fileheader &header) -{ - const TempFile tmpFile("test_header"); - const auto tmp_fd = tmpFile.fd(); - header.write(tmp_fd); - auto size = lseek(tmp_fd, 0, SEEK_END); - - auto buf = zim::Buffer::makeBuffer(zim::zsize_t(size)); - lseek(tmp_fd, 0, SEEK_SET); - if (read(tmp_fd, const_cast(buf.data()), size) == -1) - throw std::runtime_error("Cannot read"); - return buf; -} +using zim::unittests::write_to_buffer; TEST(HeaderTest, read_write_header) { diff --git a/test/tools.h b/test/tools.h index 10ba0400a..1c0139c29 100644 --- a/test/tools.h +++ b/test/tools.h @@ -20,7 +20,11 @@ #ifndef ZIM_TEST_TOOLS_H #define ZIM_TEST_TOOLS_H +#include "../src/buffer.h" + #include +#include +#include namespace zim { @@ -68,6 +72,21 @@ class TempFile std::string path() const { return path_; } }; +template +zim::Buffer write_to_buffer(const T& object) +{ + const TempFile tmpFile("test_temp_file"); + const auto tmp_fd = tmpFile.fd(); + object.write(tmp_fd); + auto size = lseek(tmp_fd, 0, SEEK_END); + + auto buf = zim::Buffer::makeBuffer(zim::zsize_t(size)); + lseek(tmp_fd, 0, SEEK_SET); + if (read(tmp_fd, const_cast(buf.data()), size) == -1) + throw std::runtime_error("Cannot read"); + return buf; +} + } // namespace unittests } // namespace zim From 004afcb2b2b201ea538c0de88972f7484f470b4d Mon Sep 17 00:00:00 2001 From: Kelson Date: Wed, 23 Sep 2020 19:10:12 +0200 Subject: [PATCH 29/29] Remove a few useless empty lines --- include/zim/blob.h | 1 - src/cluster.cpp | 1 - src/fileimpl.cpp | 1 - 3 files changed, 3 deletions(-) diff --git a/include/zim/blob.h b/include/zim/blob.h index 46d324be0..88e35efd3 100644 --- a/include/zim/blob.h +++ b/include/zim/blob.h @@ -47,7 +47,6 @@ namespace zim private: DataPtr _data; size_type _size; - }; inline std::ostream& operator<< (std::ostream& out, const Blob& blob) diff --git a/src/cluster.cpp b/src/cluster.cpp index 56de356c4..5ba912059 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -159,7 +159,6 @@ getClusterReader(const Reader& zimReader, offset_t offset, CompressionType* comp Blob Cluster::getBlob(blob_index_t n, offset_t offset, zsize_t size) const { if (n < count()) { - const auto blobSize = getBlobSize(n); if ( offset.v > blobSize.v ) { return Blob(); diff --git a/src/fileimpl.cpp b/src/fileimpl.cpp index eacdd944f..2497db816 100644 --- a/src/fileimpl.cpp +++ b/src/fileimpl.cpp @@ -481,7 +481,6 @@ offset_t readOffset(const Reader& reader, size_t idx) log_warn("error reading checksum"); return std::string(); } - } bool FileImpl::verify()