Skip to content

Commit

Permalink
gzip2
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz committed Jan 4, 2021
1 parent cabdaa0 commit 3678944
Show file tree
Hide file tree
Showing 3 changed files with 382 additions and 168 deletions.
204 changes: 36 additions & 168 deletions dfvfs/file_io/gzip_file_io.py
Original file line number Diff line number Diff line change
@@ -1,160 +1,67 @@
# -*- coding: utf-8 -*-
"""The gzip file-like object."""

import collections
import os

from dfvfs.file_io import file_io
from dfvfs.file_io import file_object_io
from dfvfs.lib import errors
from dfvfs.lib import gzipfile
from dfvfs.resolver import resolver


class GzipFile(file_io.FileIO):
"""File input/output (IO) object of a gzip file.
The gzip file format is defined in RFC1952: http://www.zlib.org/rfc-gzip.html
Attributes:
uncompressed_data_size (int): total size of the decompressed data stored
in the gzip file.
"""

def __init__(self, resolver_context, path_spec):
"""Initializes a file input/output (IO) object.
Args:
resolver_context (Context): resolver context.
path_spec (PathSpec): a path specification.
"""
super(GzipFile, self).__init__(resolver_context, path_spec)
self._compressed_data_size = -1
self._current_offset = 0
self._gzip_file_object = None
self._members_by_end_offset = collections.OrderedDict()

self.uncompressed_data_size = 0
class GzipFile(file_object_io.FileObjectIO):
"""File input/output (IO) object of a gzip file."""

@property
def original_filenames(self):
"""list(str): The original filenames stored in the gzip file."""
return [member.original_filename
for member in self._members_by_end_offset.values()]
def comments(self):
"""list(str): comments in the gzip file."""
return [member.comment for member in self._file_object.members]

@property
def modification_times(self):
"""list(int): The modification times stored in the gzip file."""
return [member.modification_time
for member in self._members_by_end_offset.values()]
"""list(int): modification times stored in the gzip file."""
return [member.modification_time for member in self._file_object.members]

@property
def operating_systems(self):
"""list(int): The operating system values stored in the gzip file."""
return [member.operating_system
for member in self._members_by_end_offset.values()]
def original_filenames(self):
"""list(str): original filenames stored in the gzip file."""
return [member.original_filename for member in self._file_object.members]

@property
def comments(self):
"""list(str): The comments in the gzip file."""
return [member.comment
for member in self._members_by_end_offset.values()]

def _GetMemberForOffset(self, offset):
"""Finds the member whose data includes the provided offset.
Args:
offset (int): offset in the uncompressed data to find the
containing member for.
Returns:
gzipfile.GzipMember: gzip file member or None if not available.
Raises:
ValueError: if the provided offset is outside of the bounds of the
uncompressed data.
"""
if offset < 0 or offset >= self.uncompressed_data_size:
raise ValueError('Offset {0:d} is larger than file size {1:d}.'.format(
offset, self.uncompressed_data_size))

for end_offset, member in self._members_by_end_offset.items():
if offset < end_offset:
return member

return None

def seek(self, offset, whence=os.SEEK_SET):
"""Seeks to an offset within the file-like object.
Args:
offset (int): offset to seek to.
whence (Optional(int)): value that indicates whether offset is an absolute
or relative position within the file.
Raises:
IOError: if the seek failed or the file has not been opened.
OSError: if the seek failed or the file has not been opened.
"""
if not self._gzip_file_object:
raise IOError('Not opened.')

if whence == os.SEEK_CUR:
offset += self._current_offset
elif whence == os.SEEK_END:
offset += self.uncompressed_data_size
elif whence != os.SEEK_SET:
raise IOError('Unsupported whence.')

if offset < 0:
raise IOError('Invalid offset value less than zero.')

self._current_offset = offset
def operating_systems(self):
"""list(int): operating system values stored in the gzip file."""
return [member.operating_system for member in self._file_object.members]

def read(self, size=None):
"""Reads a byte string from the gzip file at the current offset.
@property
def uncompressed_data_size(self):
"""int: uncompressed data size."""
return self._file_object.uncompressed_data_size

The function will read a byte string up to the specified size or
all of the remaining data if no size was specified.
def _OpenFileObject(self, path_spec):
"""Opens the file-like object defined by path specification.
Args:
size (Optional[int]): number of bytes to read, where None is all
remaining data.
path_spec (PathSpec): path specification.
Returns:
bytes: data read.
pyvde.volume: gzip file-like object.
Raises:
IOError: if the read failed.
OSError: if the read failed.
PathSpecError: if the path specification is incorrect.
"""
data = b''
while ((size and len(data) < size) and
self._current_offset < self.uncompressed_data_size):
member = self._GetMemberForOffset(self._current_offset)
member_offset = self._current_offset - member.uncompressed_data_offset

data_read = member.ReadAtOffset(member_offset, size)
if not data_read:
break

self._current_offset += len(data_read)
data = b''.join([data, data_read])
if not path_spec.HasParent():
raise errors.PathSpecError(
'Unsupported path specification without parent.')

return data
file_object = resolver.Resolver.OpenFileObject(
path_spec.parent, resolver_context=self._resolver_context)

def get_offset(self):
"""Retrieves the current offset into the file-like object.
gzip_compressed_stream = gzipfile.GzipCompressedStream()
gzip_compressed_stream.Open(file_object)

Returns:
int: current offset into the file-like object.
return gzip_compressed_stream

Raises:
IOError: if the file-like object has not been opened.
OSError: if the file-like object has not been opened.
"""
if not self._gzip_file_object:
raise IOError('Not opened.')
return self._current_offset
# Note: that the following functions do not follow the style guide
# because they are part of the file-like object interface.
# pylint: disable=invalid-name

def get_size(self):
"""Retrieves the size of the file-like object.
Expand All @@ -166,46 +73,7 @@ def get_size(self):
IOError: if the file-like object has not been opened.
OSError: if the file-like object has not been opened.
"""
if not self._gzip_file_object:
if not self._is_open:
raise IOError('Not opened.')
return self.uncompressed_data_size

def _Close(self):
"""Closes the file-like object."""
self._members_by_end_offset = []
if self._gzip_file_object:
self._gzip_file_object.close()

def _Open(self, mode='rb'):
"""Opens the file-like object defined by path specification.
Args:
mode (Optional[str]): file access mode.
Raises:
AccessError: if the access to open the file was denied.
IOError: if the file-like object could not be opened.
OSError: if the file-like object could not be opened.
PathSpecError: if the path specification is incorrect.
"""
if not self._path_spec.HasParent():
raise errors.PathSpecError(
'Unsupported path specification without parent.')

self._gzip_file_object = resolver.Resolver.OpenFileObject(
self._path_spec.parent, resolver_context=self._resolver_context)
file_size = self._gzip_file_object.get_size()

self._gzip_file_object.seek(0, os.SEEK_SET)

uncompressed_data_offset = 0
next_member_offset = 0

while next_member_offset < file_size:
member = gzipfile.GzipMember(
self._gzip_file_object, next_member_offset, uncompressed_data_offset)
uncompressed_data_offset = (
uncompressed_data_offset + member.uncompressed_data_size)
self._members_by_end_offset[uncompressed_data_offset] = member
self.uncompressed_data_size += member.uncompressed_data_size
next_member_offset = member.member_end_offset
return self._file_object.uncompressed_data_size
Loading

0 comments on commit 3678944

Please sign in to comment.