-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improved python module architecture and added algos.
* Splitted core and algos * Introduced a new API * Added zstd, brotli, snappy, lzham, implode and bzip * Fixed brotli files extension * Added paramaters for all the algos * Some improvements in packaging. * Improved testing.
- Loading branch information
Showing
40 changed files
with
797 additions
and
77 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
��� |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
�-�gj:��i:xB2�dӽ�ּB������Ԁ |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
meta: | ||
id: test_implode_ascii | ||
seq: | ||
- id: body | ||
size-eos: true | ||
process: kaitai.compress.implode(4096, 1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
meta: | ||
id: test_implode_binary | ||
seq: | ||
- id: body | ||
size-eos: true | ||
process: kaitai.compress.implode(4096, 0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,4 +3,4 @@ meta: | |
seq: | ||
- id: body | ||
size-eos: true | ||
process: kaitai.compress.lzma_raw | ||
process: kaitai.compress.lzma(2, 9, "raw") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,4 +3,4 @@ meta: | |
seq: | ||
- id: body | ||
size-eos: true | ||
process: kaitai.compress.lzma_xz | ||
process: kaitai.compress.lzma(2, 9, "xz") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
meta: | ||
id: test_snappy | ||
seq: | ||
- id: body | ||
size-eos: true | ||
process: kaitai.compress.snappy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,36 +1,53 @@ | ||
#!/usr/bin/env python3 | ||
|
||
from glob import glob | ||
from os.path import basename | ||
from pathlib import Path | ||
import re | ||
import unittest | ||
|
||
from test_lz4 import TestLz4 | ||
from test_lzma_lzma import TestLzmaLzma | ||
from test_lzma_raw import TestLzmaRaw | ||
from test_lzma_xz import TestLzmaXz | ||
from test_zlib import TestZlib | ||
from test_snappy import TestSnappy | ||
from test_brotli import TestBrotli | ||
from test_zstd import TestZstd | ||
from test_implode_binary import TestImplodeBinary | ||
from test_implode_ascii import TestImplodeAscii | ||
|
||
for uncompressed_fn in glob('uncompressed/*.dat'): | ||
name = re.sub(r'.dat$', '', basename(uncompressed_fn)) | ||
print(name) | ||
|
||
f = open(uncompressed_fn, 'rb') | ||
uncompressed_data = f.read() | ||
f.close() | ||
|
||
algs = [ | ||
(TestLz4, 'lz4'), | ||
(TestLzmaLzma, 'lzma'), | ||
# (TestLzmaRaw, 'lzma_raw'), # requires filters= to be set | ||
(TestLzmaXz, 'xz'), | ||
(TestZlib, 'zlib'), | ||
(TestBrotli, 'brotli'), | ||
] | ||
|
||
for alg in algs: | ||
test_class = alg[0] | ||
ext = alg[1] | ||
|
||
obj = test_class.from_file('compressed/%s.%s' % (name, ext)) | ||
print(obj.body == uncompressed_data) | ||
cwd = Path(".").absolute() | ||
this_dir = Path(__file__).absolute().parent.relative_to(cwd) | ||
compressed_dir = this_dir / "compressed" | ||
uncompressed_dir = this_dir / "uncompressed" | ||
|
||
|
||
class SimpleTests(unittest.TestCase): | ||
def testCompressors(self): | ||
for uncompressed_fn in uncompressed_dir.glob("*.dat"): | ||
name = uncompressed_fn.stem | ||
print(name) | ||
|
||
uncompressed_data = uncompressed_fn.read_bytes() | ||
|
||
algs = [ | ||
(TestLz4, "lz4"), | ||
(TestLzmaLzma, "lzma"), | ||
# (TestLzmaRaw, 'lzma_raw'), # requires filters= to be set | ||
(TestLzmaXz, "xz"), | ||
(TestZlib, "zlib"), | ||
(TestSnappy, "snappy"), | ||
(TestBrotli, "br"), | ||
(TestZstd, "zst"), | ||
(TestImplodeBinary, "binary.implode"), | ||
(TestImplodeAscii, "ascii.implode"), | ||
] | ||
|
||
for test_class, ext in algs: | ||
compressed_fn = compressed_dir / (name + "." + ext) | ||
with self.subTest(test_class=test_class, file=compressed_fn): | ||
obj = test_class.from_file(str(compressed_fn)) | ||
self.assertEqual(obj.body, uncompressed_data) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
__pycache__ | ||
*.pyc | ||
*.pyo | ||
/build | ||
/dist | ||
/*.egg-info |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,9 @@ | ||
from .lz4 import Lz4 | ||
from .zlib import Zlib | ||
from .lzma_raw import LzmaRaw | ||
from .lzma_lzma import LzmaLzma | ||
from .lzma_xz import LzmaXz | ||
from .brotli import Brotli | ||
from .core import * | ||
from .algorithms.zlib import Zlib | ||
from .algorithms.lzma import Lzma | ||
from .algorithms.lz4 import Lz4 | ||
from .algorithms.brotli import Brotli | ||
from .algorithms.zstd import Zstd | ||
from .algorithms.bz2 import Bz2 | ||
from .algorithms.snappy import Snappy | ||
from .algorithms.implode import Implode |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import typing | ||
|
||
from ..core import KaitaiCompressor, ProcessorContextStub | ||
|
||
# pylint:disable=arguments-differ | ||
|
||
|
||
class Brotli(KaitaiCompressor): | ||
__slots__ = ("compressorParams", "decompressorParams") | ||
brotli = None | ||
|
||
def __init__(self, level: typing.Optional[int] = None, mode: typing.Optional[str] = "generic", log_window_size: typing.Optional[int] = None, log_block_size: typing.Optional[int] = None, dictionary: typing.Optional[bytes] = None) -> None: # pylint:disable=redefined-builtin,too-many-arguments,too-many-locals,unused-argument | ||
super().__init__() | ||
if self.__class__.brotli is None: | ||
import brotli # pylint:disable=import-outside-toplevel | ||
|
||
self.__class__.brotli = brotli | ||
self.compressorParams = {} | ||
self.decompressorParams = {} | ||
|
||
if mode is not None: | ||
if isinstance(mode, str): | ||
mode = getattr(self.__class__.brotli, "MODE_" + mode.upper()) | ||
self.compressorParams["mode"] = mode | ||
|
||
if level is not None: | ||
self.compressorParams["quality"] = level | ||
|
||
if log_window_size is not None: | ||
self.compressorParams["lgwin"] = log_window_size | ||
|
||
if log_block_size is not None: | ||
self.compressorParams["lgblock"] = log_block_size | ||
|
||
if dictionary is not None: | ||
self.decompressorParams["dictionary"] = self.compressorParams["dictionary"] = dictionary | ||
|
||
# new API | ||
def process(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
return ProcessorContextStub(self.__class__.brotli.decompress(bytes(data), **self.decompressorParams)) | ||
|
||
def unprocess(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
return ProcessorContextStub(self.__class__.brotli.compress(data, **self.compressorParams)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import bz2 | ||
import typing | ||
|
||
from ..core import KaitaiCompressor, ProcessorContextStub | ||
|
||
# pylint:disable=arguments-differ | ||
|
||
|
||
class Bz2(KaitaiCompressor): | ||
__slots__ = ("level",) | ||
|
||
def __init__(self, level: int = 9, *args, **kwargs) -> None: # pylint:disable=unused-argument | ||
super().__init__() | ||
self.level = level | ||
|
||
def process(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
decompressor = bz2.BZ2Decompressor() | ||
return ProcessorContextStub(decompressor.decompress(data)) | ||
|
||
def unprocess(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
compressor = bz2.BZ2Compressor(self.level) | ||
return ProcessorContextStub(compressor.compress(data) + compressor.flush()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import typing | ||
|
||
from ..core import KaitaiCompressor, ProcessorContextStub | ||
|
||
# pylint:disable=arguments-differ | ||
|
||
|
||
class Implode(KaitaiCompressor): | ||
"""PKWare implode format""" | ||
|
||
__slots__ = ("dictionarySize", "compressionType") | ||
|
||
def __init__(self, dictionarySize: int = 4096, compressionType: int = 0, *args, **kwargs) -> None: # pylint:disable=unused-argument | ||
super().__init__() | ||
|
||
try: | ||
from pklib_base import CompressionType | ||
except ImportError: | ||
pass | ||
else: | ||
if isinstance(compressionType, str): | ||
compressionType = CompressionType[compressionType.lower()] | ||
else: | ||
compressionType = CompressionType(compressionType) | ||
|
||
self.compressionType = compressionType | ||
self.dictionarySize = dictionarySize | ||
|
||
def process(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
import pkblast | ||
|
||
return ProcessorContextStub(pkblast.decompressBytesWholeToBytes(data)[1]) | ||
|
||
def unprocess(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
from pkimplode import compressBytesChunkedToBytes | ||
|
||
return ProcessorContextStub(compressBytesChunkedToBytes(data, compressionType=self.compressionType, dictionarySize=self.dictionarySize,)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import typing | ||
from enum import IntEnum | ||
|
||
from ..core import KaitaiCompressor, ProcessorContextStub | ||
|
||
# pylint:disable=arguments-differ | ||
|
||
|
||
class LRZip(KaitaiCompressor): | ||
__slots__ = ("algo",) | ||
|
||
lrzip = None | ||
Algos = None | ||
|
||
@classmethod | ||
def initLib(cls): | ||
import lrzip | ||
|
||
self.__class__.lrzip = lrzip | ||
prefix = "LRZIP_MODE_COMPRESS_" | ||
self.__class__.Algos = IntEnum("A", sorted(((k[len(prefix) :].lower(), getattr(lrzip, k)) for k in dir(lrzip) if k[: len(prefix)] == prefix), key=lambda x: x[1])) | ||
|
||
def __init__(self, algo: typing.Union[int, str] = "none", *args, **kwargs) -> None: # pylint:disable=unused-argument | ||
if self.__class__.lrzip is None: | ||
self.__class__.initLib() | ||
if isinstance(algo, str): | ||
algo = self.__class__.Algos[algo.lower()] | ||
else: | ||
algo = self.__class__.Algos(algo) | ||
self.algo = algo | ||
super().__init__() | ||
|
||
def process(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
return ProcessorContextStub(self.__class__.lrzip.decompress(data)) | ||
|
||
def unprocess(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
return ProcessorContextStub(self.__class__.lrzip.compress(data, compressMode=self.algo)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import typing | ||
|
||
from ..core import KaitaiCompressor, ProcessorContextStub | ||
|
||
# pylint:disable=arguments-differ | ||
|
||
|
||
class Lz4(KaitaiCompressor): | ||
__slots__ = ("compressorParams",) | ||
lz4Frame = None | ||
|
||
def __init__(self, block_size: typing.Optional[int] = None, should_link_blocks: bool = True, compression_level: typing.Optional[int] = None, frame_checksum: bool = False, block_checksum: bool = False, *args, **kwargs) -> None: # pylint:disable=unused-argument,too-many-arguments | ||
super().__init__() | ||
if self.__class__.lz4Frame is None: | ||
import lz4.frame # pylint:disable=import-outside-toplevel | ||
|
||
self.__class__.lz4Frame = lz4.frame | ||
|
||
if compression_level is None: | ||
compression_level = self.__class__.lz4Frame.COMPRESSIONLEVEL_MAX | ||
if block_size is None: | ||
block_size = self.__class__.lz4Frame.BLOCKSIZE_MAX4MB | ||
self.compressorParams = { | ||
"block_size": block_size, | ||
"block_linked": should_link_blocks, | ||
"compression_level": compression_level, | ||
"content_checksum": frame_checksum, | ||
"block_checksum": block_checksum, | ||
"return_bytearray": False | ||
} | ||
|
||
def process(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
obj = self.__class__.lz4Frame.LZ4FrameDecompressor(return_bytearray=False) | ||
return ProcessorContextStub(obj.decompress(data)) | ||
|
||
def unprocess(self, data: typing.Union[bytes, bytearray]) -> ProcessorContextStub: | ||
obj = self.__class__.lz4Frame.LZ4FrameCompressor(**self.compressorParams) | ||
return ProcessorContextStub(obj.begin(len(data)) + obj.compress(data) + obj.flush()) | ||
|
||
def extract_args(self, data: typing.Union[bytes, bytearray]): | ||
res = self.__class__.lz4Frame.get_frame_info(data) | ||
return (res["block_size"], res["linker"], res["compression_level"], res["content_checksum"], res["block_checksum"]) |
Oops, something went wrong.