Skip to content

Commit

Permalink
Merge pull request #10 from heikomuller/dev0.1.3
Browse files Browse the repository at this point in the history
Dev0.1.3
  • Loading branch information
heikomuller authored Oct 5, 2020
2 parents 0bf8f2b + 7c285c7 commit f9cce73
Show file tree
Hide file tree
Showing 27 changed files with 963 additions and 151 deletions.
11 changes: 8 additions & 3 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
# Data Frame History Store - Changelog

### 0.1.0 - 05-06-2020
### 0.1.0 - 2020-05-06

* Initial version. Support for snapshot archives in main-memory and on file system.


### 0.1.1 - 06-16-2020
### 0.1.1 - 2020-06-16

* Allow different types of input documents (e.g., CSV files or Json)
* External merge-sort for large CSV files.
* Add managers for maintaining sets of archives


### 0.1.2 - 06-25-2020
### 0.1.2 - 2020-06-25

* Proper handling of date/time objects by the default archive reader and writer
* Optional arguments for Json encoder and decoder for persistent archives
* Add encoder and decoder information to archive manager metadata
* Simple command-line interface for persistent archive manager


### 0.1.3 - 2020-10-05

* Add archive manager that maintains descriptors in a relational database (\#8)
2 changes: 1 addition & 1 deletion histore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
# file LICENSE for full license details.

from histore.archive.base import Archive, PersistentArchive # noqa: F401
from histore.archive.manager.fs import PersistentArchiveManager # noqa: F401
from histore.archive.manager.persist import PersistentArchiveManager # noqa: F401, E501
16 changes: 9 additions & 7 deletions histore/archive/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,7 @@ class histore.document.schema.Column.
origin = last_snapshot.version
# Get a modified snapshot list where the last entry represents the
# new snapshot.
snapshots = self.snapshots().append(
valid_time=valid_time,
description=description
)
version = snapshots.last_snapshot().version
version = self.snapshots().next_version()
# Merge the new snapshot schema with the current archive schema.
schema, matched_columns, unchanged_columns = self.schema().merge(
columns=doc.columns,
Expand Down Expand Up @@ -220,9 +216,15 @@ class histore.document.schema.Column.
# for cleanup of temporary files.
doc.close()
# Commit all changes to the associated archive store.
self.store.commit(schema=schema, writer=writer, snapshots=snapshots)
snapshot = self.store.commit(
schema=schema,
writer=writer,
version=version,
valid_time=valid_time,
description=description
)
# Return descriptor for the created snapshot.
return snapshots.last_snapshot()
return snapshot

def diff(self, original_version, new_version):
"""Get provenance information representing the difference between two
Expand Down
32 changes: 21 additions & 11 deletions histore/archive/manager/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
"""Abstract class for managers that maintain a set of archives."""

from abc import ABCMeta, abstractmethod
from typing import Dict, List, Optional, Union

from histore.archive.base import Archive
from histore.archive.manager.descriptor import ArchiveDescriptor


class ArchiveManager(metaclass=ABCMeta): # pragma: no cover
Expand All @@ -17,7 +21,7 @@ class ArchiveManager(metaclass=ABCMeta): # pragma: no cover
maintained in an archive descriptor.
"""
@abstractmethod
def archives(self):
def archives(self) -> Dict[str, ArchiveDescriptor]:
"""Get dictionary of archive descriptors. The returned dictionary maps
archive identifier to their descriptor.
Expand All @@ -27,7 +31,7 @@ def archives(self):
"""
raise NotImplementedError()

def contains(self, identifier):
def contains(self, identifier: str) -> bool:
"""Returns True if an archive with the given identifier exists.
Parameters
Expand All @@ -43,10 +47,12 @@ def contains(self, identifier):

@abstractmethod
def create(
self, name=None, description=None, primary_key=None, encoder=None,
decoder=None
):
"""Create a new archive object.
self, name: Optional[str] = None, description: Optional[str] = None,
primary_key: Optional[Union[List[str], str]] = None,
encoder: Optional[str] = None, decoder: Optional[str] = None
) -> ArchiveDescriptor:
"""Create a new archive object. Raises a ValueError if an archive with
the given name exists.
Parameters
----------
Expand All @@ -67,11 +73,15 @@ def create(
Returns
-------
histore.archive.manager.descriptor.ArchiveDescriptor
Raises
------
ValueError
"""
raise NotImplementedError()

@abstractmethod
def delete(self, identifier):
def delete(self, identifier: str):
"""Delete the archive with the given identifier.
Parameters
Expand All @@ -82,7 +92,7 @@ def delete(self, identifier):
raise NotImplementedError()

@abstractmethod
def get(self, identifier):
def get(self, identifier: str) -> Archive:
"""Get the archive that is associated with the given identifier. Raises
a ValueError if the identifier is unknown.
Expand All @@ -101,7 +111,7 @@ def get(self, identifier):
"""
raise NotImplementedError()

def get_by_name(self, name):
def get_by_name(self, name: str) -> ArchiveDescriptor:
"""Get descriptor for the archive with the given name. If no archive
with that name exists None is returned.
Expand All @@ -119,7 +129,7 @@ def get_by_name(self, name):
return archive
return None

def list(self):
def list(self) -> List[ArchiveDescriptor]:
"""Get the list of descriptors for the maintained archives.
Returns
Expand All @@ -129,7 +139,7 @@ def list(self):
return list(self.archives().values())

@abstractmethod
def rename(self, identifier, name):
def rename(self, identifier: str, name: str):
"""Rename the specified archive. Raises a ValueError if the identifier
is unknown or if an archive with the given name exist.
Expand Down
6 changes: 6 additions & 0 deletions histore/archive/manager/db/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# This file is part of the History Store (histore).
#
# Copyright (C) 2018-2020 New York University.
#
# The History Store (histore) is released under the Revised BSD License. See
# file LICENSE for full license details.
Loading

0 comments on commit f9cce73

Please sign in to comment.