diff --git a/changelog.md b/changelog.md
index 3927e01..8adfdc4 100644
--- a/changelog.md
+++ b/changelog.md
@@ -8,3 +8,11 @@
 ### 0.1.1 - 2020-02-03
 
 * Use `appdirs.user_cache_dir` as parent directory for the default target directory for downloaded files (\#5).
+
+
+### 0.2.0 - 2020-02-16
+
+* Repository index loader for different data sources. It is now possible to load the repository index from an Url, a locak file, or directly from a given dictionary.
+* Support loading index files in Json or YAML format.
+* Add package information and timestamp for downloaded datasets.
+* Add optional value transformers to `distinct()` and `mapping()` methods of the `DatasetHandle` (\#4)
diff --git a/docs/examples/Usage Example.ipynb b/docs/examples/Usage Example.ipynb
index 9d22784..6e6ab26 100644
--- a/docs/examples/Usage Example.ipynb	
+++ b/docs/examples/Usage Example.ipynb	
@@ -31,9 +31,9 @@
    "source": [
     "# Create an instance of the local data store with default settings.\n",
     "\n",
-    "from refdata.store import LocalStore\n",
+    "from refdata.store import RefStore\n",
     "\n",
-    "refstore = LocalStore()"
+    "refstore = RefStore()"
    ]
   },
   {
@@ -89,51 +89,19 @@
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "('a023b7d5233a4d35a15a11b2ec8b9cfa',\n",
-       " {'id': 'restcountries.eu',\n",
-       "  'name': 'REST Countries',\n",
-       "  'description': 'Information about countries in the world available from the restcountries.eu project.',\n",
-       "  'url': 'https://raw.githubusercontent.com/VIDA-NYU/openclean-reference-data/master/data/restcountries.eu.json',\n",
-       "  'checksum': '5893ebfad649533ac82a0b030a24efdd519f95a8b030a5ac9c7df37e85aad005',\n",
-       "  'webpage': 'https://restcountries.eu/',\n",
-       "  'schema': [{'id': 'name',\n",
-       "    'name': 'Name',\n",
-       "    'description': 'Country name',\n",
-       "    'dtype': 'text'},\n",
-       "   {'id': 'alpha2Code',\n",
-       "    'name': 'Country Code (2-letters)',\n",
-       "    'description': 'ISO 3166-1 2-letter country code',\n",
-       "    'dtype': 'text'},\n",
-       "   {'id': 'alpha3Code',\n",
-       "    'name': 'Country Code (3-letters)',\n",
-       "    'description': 'ISO 3166-1 3-letter country code',\n",
-       "    'dtype': 'text'},\n",
-       "   {'id': 'capital',\n",
-       "    'name': 'Capital',\n",
-       "    'description': 'Capital city',\n",
-       "    'dtype': 'text'},\n",
-       "   {'id': 'region',\n",
-       "    'name': 'Region',\n",
-       "    'description': 'World region',\n",
-       "    'dtype': 'text'},\n",
-       "   {'id': 'subregion',\n",
-       "    'name': 'Sub-Region',\n",
-       "    'description': 'Sub-region within the country region',\n",
-       "    'dtype': 'text'}],\n",
-       "  'format': {'type': 'json', 'parameters': {}}})"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "downloaded dataset restcountries.eu (size 316025 bytes).\n"
+     ]
     }
    ],
    "source": [
     "# Download the restcountries dataset\n",
     "\n",
-    "refstore.download('restcountries.eu')"
+    "dataset = refstore.download('restcountries.eu')\n",
+    "\n",
+    "print('downloaded dataset {} (size {} bytes).'.format(dataset.identifier, dataset.filesize))"
    ]
   },
   {
@@ -148,8 +116,8 @@
      "text": [
       "Downloaded datasets:\n",
       "\n",
-      "> REST Countries (id=restcountries.eu)\n",
-      "> Cities in the U.S. (id=encyclopaedia_britannica:us_cities)\n"
+      "> Cities in the U.S. (id=encyclopaedia_britannica:us_cities)\n",
+      "> REST Countries (id=restcountries.eu)\n"
      ]
     }
    ],
@@ -188,7 +156,7 @@
     "# in the restcountries dataset.\n",
     "\n",
     "print('Columns:\\n')\n",
-    "for col in refstore.open('restcountries.eu').columns:\n",
+    "for col in refstore.load('restcountries.eu').columns:\n",
     "    print('  {} (id={})'.format(col.name, col.identifier))"
    ]
   },
@@ -261,7 +229,7 @@
     "\n",
     "import json\n",
     "\n",
-    "print(json.dumps(refstore.open('restcountries.eu').to_dict(), indent=4))"
+    "print(json.dumps(refstore.load('restcountries.eu').to_dict(), indent=4))"
    ]
   },
   {
@@ -274,7 +242,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[<refdata.base.DatasetDescriptor object at 0x7f89f7d12040>]\n"
+      "encyclopaedia_britannica:us_cities\n"
      ]
     }
    ],
@@ -283,7 +251,8 @@
     "\n",
     "refstore.remove('restcountries.eu')\n",
     "\n",
-    "print(refstore.list())"
+    "for dataset in refstore.list():\n",
+    "    print(dataset.identifier)"
    ]
   },
   {
@@ -386,7 +355,7 @@
     "# which will download the datast if it is no in the local\n",
     "# store.\n",
     "\n",
-    "dataset = refstore.open('encyclopaedia_britannica:us_cities', auto_download=True)\n",
+    "dataset = refstore.load('encyclopaedia_britannica:us_cities', auto_download=True)\n",
     "# Alternative shortcut:\n",
     "# refstore.distinct(key='encyclopaedia_britannica:us_cities', columns='state')\n",
     "\n",
@@ -412,7 +381,7 @@
     {
      "data": {
       "text/plain": [
-       "'Canberra'"
+       "'CANBERRA'"
       ]
      },
      "execution_count": 9,
@@ -423,13 +392,14 @@
    "source": [
     "# Get a lookup table (dictionary) that maps the\n",
     "# ISO 3166-1 3-letter country code to the country's\n",
-    "# captital city\n",
+    "# captital city. Convert values from both attributes\n",
+    "# to upper case before adding them to the mapping.\n",
     "\n",
-    "dataset = refstore.open('restcountries.eu', auto_download=True)\n",
+    "dataset = refstore.load('restcountries.eu', auto_download=True)\n",
     "# Alternative shortcut:\n",
     "# refstore.mapping(key='restcountries.eu', lhs='alpha3Code', rhs='capital')\n",
     "\n",
-    "mapping = dataset.mapping(lhs='alpha3Code', rhs='capital')\n",
+    "mapping = dataset.mapping(lhs='alpha3Code', rhs='capital', transformer=str.upper)\n",
     "\n",
     "mapping['AUS']"
    ]
@@ -529,11 +499,11 @@
     "# Get data frame with country name, 3-letter country code,\n",
     "# and capital city.\n",
     "\n",
-    "dataset = refstore.open('restcountries.eu', auto_download=True)\n",
+    "dataset = refstore.load('restcountries.eu', auto_download=True)\n",
     "# Alternative shortcut:\n",
     "# refstore.load('restcountries.eu', ['name', 'alpha3Code', 'capital'])\n",
     "\n",
-    "df = dataset.data_frame(['name', 'alpha3Code', 'capital'])\n",
+    "df = dataset.df(['name', 'alpha3Code', 'capital'])\n",
     "\n",
     "df.head()"
    ]
diff --git a/refdata/cli/repo.py b/refdata/cli/repo.py
index 1279a33..3e95c77 100644
--- a/refdata/cli/repo.py
+++ b/refdata/cli/repo.py
@@ -8,8 +8,11 @@
 """Commands that interact with a repository index."""
 
 import click
+import tableprint as tp
 
-from refdata.repo import RepositoryManager, validate
+from refdata.repo.loader import DictLoader, UrlLoader
+from refdata.repo.manager import RepositoryManager
+from refdata.repo.schema import validate
 
 import refdata.cli.util as util
 
@@ -26,9 +29,24 @@ def cli_repo():
 @click.option('-i', '--index', required=False, help='Repository index file')
 def list_repository(index):
     """List repository index content."""
-    # Read the index of given.
-    doc = util.read_index(index) if index is not None else None
-    util.print_datasets(RepositoryManager(doc=doc).find())
+    # Read the index from the optional file or Url. By default, the index that
+    # is specified in the environment is loaded.
+    loader = DictLoader(util.read_index(index)) if index is not None else UrlLoader()
+    datasets = RepositoryManager(doc=loader.load()).find()
+    headers = ['Identifier', 'Name', 'Description']
+    data = list()
+    # Maintain the maximum with for each columns.
+    widths = [len(h) + 1 for h in headers]
+    # Sort datasets by name before output.
+    for dataset in sorted(datasets, key=lambda d: d.name):
+        desc = dataset.description if dataset.description is not None else ''
+        row = [dataset.identifier, dataset.name, desc]
+        for i in range(len(row)):
+            w = len(row[i]) + 1
+            if w > widths[i]:
+                widths[i] = w
+        data.append(row)
+    tp.table(data, headers=headers, width=widths, style='grid', out=util.TPrinter())
 
 
 @cli_repo.command(name='show')
@@ -37,9 +55,10 @@ def list_repository(index):
 @click.argument('key')
 def show_dataset(index, raw, key):
     """Show dataset descriptor from repository index."""
-    # Read the index of given.
-    doc = util.read_index(index) if index is not None else None
-    util.print_dataset(dataset=RepositoryManager(doc=doc).get(key), raw=raw)
+    # Read the index from the optional file or Url. By default, the index that
+    # is specified in the environment is loaded.
+    loader = DictLoader(util.read_index(index)) if index is not None else UrlLoader()
+    util.print_dataset(dataset=RepositoryManager(doc=loader.load()).get(key), raw=raw)
 
 
 @cli_repo.command(name='validate')
diff --git a/refdata/cli/store.py b/refdata/cli/store.py
index d08cbea..bca7082 100644
--- a/refdata/cli/store.py
+++ b/refdata/cli/store.py
@@ -7,10 +7,13 @@
 
 """Commands that interact with the local data store."""
 
+from datasize import DataSize
+
 import click
+import tableprint as tp
 
-from refdata.repo import RepositoryManager
-from refdata.store.base import LocalStore
+from refdata.repo.loader import DictLoader, UrlLoader
+from refdata.store.base import RefStore
 
 import refdata.cli.util as util
 
@@ -31,8 +34,8 @@ def cli_store():
 def download_dataset(basedir, db, index, key):
     """List local store content."""
     # Read the index of given.
-    doc = util.read_index(index) if index is not None else None
-    store = LocalStore(basedir=basedir, repo=RepositoryManager(doc=doc), connect_url=db)
+    loader = DictLoader(util.read_index(index)) if index is not None else UrlLoader()
+    store = RefStore(basedir=basedir, loader=loader, connect_url=db)
     store.download(key)
 
 
@@ -43,9 +46,27 @@ def download_dataset(basedir, db, index, key):
 def list_datasets(basedir, db, index):
     """List local store content."""
     # Read the index of given.
-    doc = util.read_index(index) if index is not None else None
-    store = LocalStore(basedir=basedir, repo=RepositoryManager(doc=doc), connect_url=db)
-    util.print_datasets(store.list())
+    loader = DictLoader(util.read_index(index)) if index is not None else UrlLoader()
+    store = RefStore(basedir=basedir, loader=loader, connect_url=db)
+    datasets = store.list()
+    headers = ['Name', 'Size', 'Downloaded', 'Package']
+    data = list()
+    # Maintain the maximum with for each columns.
+    widths = [len(h) + 1 for h in headers]
+    # Sort datasets by name before output.
+    for dataset in sorted(datasets, key=lambda d: d.name):
+        row = [
+            dataset.identifier,
+            '{:.2a}'.format(DataSize(dataset.filesize)),
+            ' '.join(dataset.created_at.isoformat()[:19].split('T')),
+            '{} {}'.format(dataset.package_name, dataset.package_version)
+        ]
+        for i in range(len(row)):
+            w = len(row[i]) + 1
+            if w > widths[i]:
+                widths[i] = w
+        data.append(row)
+    tp.table(data, headers=headers, width=widths, style='grid', out=util.TPrinter())
 
 
 @cli_store.command(name='remove')
@@ -61,8 +82,8 @@ def remove_dataset(basedir, db, index, force, key):
         msg = "Do you really want to remove dataset '{}'".format(key)
         click.confirm(msg, default=True, abort=True)
     # Read the index of given.
-    doc = util.read_index(index) if index is not None else None
-    store = LocalStore(basedir=basedir, repo=RepositoryManager(doc=doc), connect_url=db)
+    loader = DictLoader(util.read_index(index)) if index is not None else UrlLoader()
+    store = RefStore(basedir=basedir, loader=loader, connect_url=db)
     store.remove(key)
 
 
@@ -75,6 +96,6 @@ def remove_dataset(basedir, db, index, force, key):
 def show_dataset(basedir, db, index, raw, key):
     """Show descriptor for downloaded dataset."""
     # Read the index of given.
-    doc = util.read_index(index) if index is not None else None
-    store = LocalStore(basedir=basedir, repo=RepositoryManager(doc=doc), connect_url=db)
-    util.print_dataset(dataset=store.open(key), raw=raw)
+    loader = DictLoader(util.read_index(index)) if index is not None else UrlLoader()
+    store = RefStore(basedir=basedir, loader=loader, connect_url=db)
+    util.print_dataset(dataset=store.load(key), raw=raw)
diff --git a/refdata/cli/util.py b/refdata/cli/util.py
index 3064e3e..f1e7df0 100644
--- a/refdata/cli/util.py
+++ b/refdata/cli/util.py
@@ -9,41 +9,22 @@
 line interface.
 """
 
-from typing import Dict, List
+from typing import Dict
 
 import click
 import json
-import os
 
 from refdata.base import DatasetDescriptor
-from refdata.repo import download_index
+from refdata.repo.loader import FileLoader, UrlLoader
 
 
-def print_datasets(datasets: List[DatasetDescriptor]):
-    """Print a listing of datasets to the console.
+class TPrinter:
+    """Wrapper around `click.echo` for table printing."""
+    def write(self, s):
+        click.echo(s)
 
-    Outputs the identifier, name and description for each dataset in the given
-    list. Datasets are sorted by their name.
-
-    Parameters
-    ----------
-    datasets: list of refdata.base.DatasetDescriptor
-        List of dataset descriptors.
-    """
-    # Compute maximal length of values for the dataset identifier, name and
-    # description. The length values are used to align the output.
-    id_len = max([len(d.identifier) for d in datasets] + [10])
-    name_len = max([len(d.name) for d in datasets] + [4])
-    desc_len = max([len(d.description) for d in datasets if d.description is not None] + [11])
-    # Create the output template with all values left aligned.
-    template = '{:<' + str(id_len) + '} | {:<' + str(name_len) + '} | {:<' + str(desc_len) + '}'
-    click.echo()
-    click.echo(template.format('Identifier', 'Name', 'Description'))
-    click.echo(template.format('-' * id_len, '-' * name_len, '-' * desc_len))
-    # Sort datasets by name before output.
-    for dataset in sorted(datasets, key=lambda d: d.name):
-        desc = dataset.description if dataset.description is not None else ''
-        click.echo(template.format(dataset.identifier, dataset.name, desc))
+    def flush(self):
+        pass
 
 
 def print_dataset(dataset: DatasetDescriptor, raw: bool):
@@ -88,8 +69,11 @@ def print_dataset(dataset: DatasetDescriptor, raw: bool):
 
 
 def read_index(filename: str) -> Dict:
-    """Read a repository index file. The filename may either reference a file
-    on the local file system or is expected to be an Url.
+    """Read a repository index file.
+
+    The filename may either reference a file on the local file system or is
+    expected to be an Url. Attempts to read a file first and then load the
+    Url if an error occured while loading the file.
 
     Parameters
     ----------
@@ -101,8 +85,7 @@ def read_index(filename: str) -> Dict:
     dict
     """
     try:
-        with open(filename, 'r') as f:
-            return json.load(f)
-    except OSError as ex:
-        print(ex)
-    return download_index(url=filename)
+        return FileLoader(filename).load()
+    except (IOError, OSError):
+        pass
+    return UrlLoader(url=filename).load()
diff --git a/refdata/config.py b/refdata/config.py
index 9252475..c129778 100644
--- a/refdata/config.py
+++ b/refdata/config.py
@@ -51,7 +51,7 @@ def AUTO_DOWNLOAD() -> bool:
 def BASEDIR() -> str:
     """Get the current value for the environment variable REFDATA_BASEDIR.
 
-    If the value is not set (missing or empty) the folder `.refdata` in the
+    If the value is not set (missing or empty) the folder `refdata` in the
     OS-specific data cache directory for the current user is used as the
     default.
 
diff --git a/refdata/dataset/__init__.py b/refdata/dataset/__init__.py
new file mode 100644
index 0000000..35d3926
--- /dev/null
+++ b/refdata/dataset/__init__.py
@@ -0,0 +1,5 @@
+# This file is part of the Reference Data Repository (refdata).
+#
+# Copyright (C) 2021 New York University.
+#
+# refdata is free software; you can redistribute it and/or modify it under the
diff --git a/refdata/store/dataset.py b/refdata/dataset/base.py
similarity index 61%
rename from refdata/store/dataset.py
rename to refdata/dataset/base.py
index 9977008..971a7ef 100644
--- a/refdata/store/dataset.py
+++ b/refdata/dataset/base.py
@@ -5,14 +5,22 @@
 # refdata is free software; you can redistribute it and/or modify it under the
 # terms of the MIT License; see LICENSE file for more details.
 
-from typing import Dict, List, Optional, Set, Union
+"""Base classes for handles that provide access to datasets that have been
+downloaded to the local data store.
+"""
 
+from dateutil.parser import isoparse
+from typing import Callable, Dict, IO, List, Optional, Set, Tuple, Union
+
+import datetime
 import gzip
+import os
 import pandas as pd
 
 from refdata.base import DatasetDescriptor
-from refdata.loader import CSVLoader, JsonLoader
-from refdata.loader.consumer import DataConsumer, DataFrameGenerator, DistinctSetGenerator, MappingGenerator
+from refdata.dataset.consumer import DataConsumer, DataFrameGenerator, DistinctSetGenerator, MappingGenerator
+from refdata.dataset.csv_loader import CSVLoader
+from refdata.dataset.json_loader import JsonLoader
 
 import refdata.error as err
 
@@ -21,19 +29,34 @@ class DatasetHandle(DatasetDescriptor):
     """Handle for a dataset in the local data store. Provides the functionality
     to read data in different formats from the downloaded data file.
     """
-    def __init__(self, doc: Dict, datafile: str):
+    def __init__(
+        self, descriptor: Dict, package_name: str, package_version: str,
+        created_at: datetime.datetime, datafile: str
+    ):
         """Initialize the descriptor information and the path to the downloaded
-        data file. This will also create an instance of the dataset loader that
-        is dependent on the dataset format.
+        data file.
+
+        This will also create an instance of the dataset loader that is used
+        for reading the data file dependent on the dataset format.
 
         Parameters
         ----------
-        doc: dict
+        descriptor: dict
             Dictionary serialization for the dataset descriptor.
+        package_name: string
+            Name of the package that downloaded the dataset.
+        package_version: string
+            Version information for the package that downloaded the dataset.
+        created_at: str
+            Timestamp (in UTC) when the dataset was downloaded.
         datafile: string
             Path to the downloaded file.
         """
-        super(DatasetHandle, self).__init__(doc=doc)
+        super(DatasetHandle, self).__init__(doc=descriptor)
+        self.package_name = package_name
+        self.package_version = package_version
+        # Convert the timestamp from UTC to local time.
+        self.created_at = isoparse(created_at).astimezone()
         self.datafile = datafile
         # Create the format-dependent instance of the dataset loader.
         parameters = self.format
@@ -47,7 +70,7 @@ def __init__(self, doc: Dict, datafile: str):
         else:
             raise err.InvalidFormatError("unknown format '{}'".format(parameters.format_type))
 
-    def data_frame(self, columns: Optional[List[str]] = None) -> pd.DataFrame:
+    def df(self, columns: Optional[List[str]] = None) -> pd.DataFrame:
         """Load dataset as a pandas data frame.
 
         This is a shortcut to load all (or a given selection of) columns in
@@ -70,7 +93,10 @@ def data_frame(self, columns: Optional[List[str]] = None) -> pd.DataFrame:
         consumer = DataFrameGenerator(columns=columns)
         return self.load(columns=columns, consumer=consumer).to_df()
 
-    def distinct(self, columns: Optional[Union[str, List[str]]] = None) -> Set:
+    def distinct(
+        self, columns: Optional[Union[str, List[str]]] = None,
+        transformer: Optional[Callable] = None
+    ) -> Set:
         """Get the set of distinct values from the specified column(s) in the
         dataset.
 
@@ -81,11 +107,18 @@ def distinct(self, columns: Optional[Union[str, List[str]]] = None) -> Set:
         If more than one column is specified the elements in the returned set
         are tuples of values.
 
+        If the optional transformer is given it will be evaluated on the individual
+        values that are extracted from the columns before adding them to the set
+        of unique values.
+
         Parameters
         ----------
         columns: string or list of string, default=None
             Column identifier defining the values that are added to the
             generated set of distinct values.
+        transformer: callable, default=None
+            Optional transformer function that is evaluated on column values
+            before adding them to the set of distinct values.
 
         Returns
         -------
@@ -96,7 +129,12 @@ def distinct(self, columns: Optional[Union[str, List[str]]] = None) -> Set:
         columns = columns if columns is not None else [c.identifier for c in self.columns]
         # Ensure that columns are a list.
         columns = columns if isinstance(columns, list) else [columns]
-        return self.load(columns=columns, consumer=DistinctSetGenerator()).to_set()
+        consumer = DistinctSetGenerator(transformer=transformer)
+        return self.load(columns=columns, consumer=consumer).to_set()
+
+    @property
+    def filesize(self) -> int:
+        return os.stat(self.datafile).st_size
 
     def load(self, columns: List[str], consumer: DataConsumer) -> DataConsumer:
         """Load data for the specified columns from the downloaded dataset
@@ -113,12 +151,12 @@ def load(self, columns: List[str], consumer: DataConsumer) -> DataConsumer:
         columns: list of string
             Column identifier defining the content and the schema of the
             returned data.
-        consumer: refdata.loader.consumer.DataConsumer
+        consumer: refdata.dataset.consumer.DataConsumer
             Consumer for data rows that are being read.
 
         Returns
         -------
-        refdata.loader.consumer.DataConsumer
+        refdata.dataset.consumer.DataConsumer
         """
         # Open the file depending on whether it is compressed or not. By now,
         # we only support gzip compression.
@@ -135,6 +173,7 @@ def load(self, columns: List[str], consumer: DataConsumer) -> DataConsumer:
 
     def mapping(
         self, lhs: Union[str, List[str]], rhs: Union[str, List[str]],
+        transformer: Optional[Union[Callable, Tuple[Callable, Callable]]] = None,
         ignore_equal: Optional[bool] = True
     ) -> Dict:
         """Generate a mapping from values in dataset rows.
@@ -146,6 +185,12 @@ def mapping(
         This is a shortcut to load column values using the mapping generator
         as the data consumer.
 
+        It the optional transformer is given it is evaluated on column values
+        before adding them to the mapping. If a single callable is given, that
+        function is evalauated on the lhs and rhs columns. If a 2-tuple of
+        callables is given, the first function is evalauted on lhs columns and
+        the second function on rhs. columns.
+
         Parameters
         ----------
         lhs: string or list of string
@@ -154,6 +199,9 @@ def mapping(
         rhs: string or list of string
             Columns defining the source of values for the right-hand side of the
             mapping.
+        transformer: callable or tuple of callable, default=None
+            Optional transformer function(s) that are evaluated on the values
+            for lhs and rhs columns before adding them to the mapping.
         ignore_equal: bool, default=True
             Exclude mappings from a value to itself from the created mapping.
 
@@ -164,5 +212,23 @@ def mapping(
         # Ensure that lhs and rhs are lists.
         lhs = lhs if isinstance(lhs, list) else [lhs]
         rhs = rhs if isinstance(rhs, list) else [rhs]
-        consumer = MappingGenerator(split_at=len(lhs), ignore_equal=ignore_equal)
+        consumer = MappingGenerator(
+            split_at=len(lhs),
+            transformer=transformer,
+            ignore_equal=ignore_equal
+        )
         return self.load(columns=lhs + rhs, consumer=consumer).to_mapping()
+
+    def open(self) -> IO:
+        """Open the downloaded data file for the dataset.
+
+        Returns
+        -------
+        file-like object
+        """
+        # Open the file depending on whether it is compressed or not. By now,
+        # we only support gzip compression.
+        if self.compression == 'gzip':
+            return gzip.open(self.datafile, 'rt')
+        else:
+            return open(self.datafile, 'rt')
diff --git a/refdata/loader/consumer.py b/refdata/dataset/consumer.py
similarity index 80%
rename from refdata/loader/consumer.py
rename to refdata/dataset/consumer.py
index 5651230..e6595f7 100644
--- a/refdata/loader/consumer.py
+++ b/refdata/dataset/consumer.py
@@ -10,7 +10,7 @@
 """
 
 from abc import ABCMeta, abstractmethod
-from typing import Any, Dict, List, Optional, Set
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
 
 import pandas as pd
 
@@ -114,9 +114,18 @@ class DistinctSetGenerator(DataConsumer):
     If the loader is reading multiple columns, a tuple of values is generated
     for each row before adding it to the set of distinct values.
     """
-    def __init__(self):
-        """Initialize the empty set of distinct values."""
+    def __init__(self, transformer: Optional[Callable] = None):
+        """Initialize the empty set of distinct values and the optional value
+        transformer.
+
+        Parameters
+        ----------
+        transformer: callable, default=None
+            Optional transformer function that is evaluated on column values
+            before adding them to the set of distinct values.
+        """
         self.values = set()
+        self.transformer = transformer
 
     def consume(self, row: List):
         """Add the given row to the internal set of distinct values.
@@ -132,7 +141,7 @@ def consume(self, row: List):
             List of column values for row in a dataset that is being read
             by a dataset loader.
         """
-        self.values.add(to_value(row))
+        self.values.add(to_value(row=row, transformer=self.transformer))
 
     def to_set(self) -> Set:
         """Get the set of distinct values that has been created by the consumer
@@ -154,7 +163,11 @@ class MappingGenerator(DataConsumer):
     mapping. If either side of the mapping involves multiple columns, a tuple
     of values for these columns is added to the mapping.
     """
-    def __init__(self, split_at: int, ignore_equal: Optional[bool] = True):
+    def __init__(
+        self, split_at: int,
+        transformer: Optional[Union[Callable, Tuple[Callable, Callable]]] = None,
+        ignore_equal: Optional[bool] = True
+    ):
         """Initialize the dictionary for the mapping and the column index that
         separates the values in the left-hand side of the mapping from those in
         the right-hand side.
@@ -168,11 +181,15 @@ def __init__(self, split_at: int, ignore_equal: Optional[bool] = True):
         split_at: int
             Columns index position at which rows are divided into left-hand side
             and right-hand side of the mapping.
+        transformer: callable or tuple of callable, default=None
+            Optional transformer function(s) that are evaluated on the values
+            for lhs and rhs columns before adding them to the mapping.
         ignore_equal: bool, default=True
             Exclude mappings from a value to itself from the created mapping.
         """
         self.mapping = dict()
         self.split_at = split_at
+        self.transformer = transformer
         self.ignore_equal = ignore_equal
 
     def consume(self, row: List):
@@ -192,8 +209,16 @@ def consume(self, row: List):
             List of column values for row in a dataset that is being read
             by a dataset loader.
         """
-        lhs = to_value(row[:self.split_at])
-        rhs = to_value(row[self.split_at:])
+        # Set transformers for lhs and rhs columns.
+        transform_lhs, transform_rhs = None, None
+        if self.transformer is not None:
+            if isinstance(self.transformer, tuple):
+                transform_lhs, transform_rhs = self.transformer
+            else:
+                transform_lhs = self.transformer
+                transform_rhs = self.transformer
+        lhs = to_value(row[:self.split_at], transformer=transform_lhs)
+        rhs = to_value(row[self.split_at:], transformer=transform_rhs)
         # Ignore the row id lhs and rhs are equal and ignore_equal flag is True.
         if lhs == rhs and self.ignore_equal:
             return
@@ -219,10 +244,13 @@ def to_mapping(self) -> Dict:
 
 # -- Helper Functions ---------------------------------------------------------
 
-def to_value(row: List) -> Any:
-    """Convert a given list of values into a scalar value or tuple. If the given
-    list contains a single element that element is returned. Otherwise, a tuple
-    of the values in the list is returned.
+def to_value(row: List, transformer: Optional[Callable] = None) -> Any:
+    """Convert a given list of values into a scalar value or tuple.
+
+    If the given list contains a single element that element is returned.
+    Otherwise, a tuple of the values in the list is returned.
+
+    The optional tranformer is applied to all list values individually.
 
     Parameters
     ----------
@@ -233,4 +261,5 @@ def to_value(row: List) -> Any:
     -------
     any
     """
+    row = row if transformer is None else list(map(transformer, row))
     return row[0] if len(row) == 1 else tuple(row)
diff --git a/refdata/loader/csv_loader.py b/refdata/dataset/csv_loader.py
similarity index 94%
rename from refdata/loader/csv_loader.py
rename to refdata/dataset/csv_loader.py
index d6772aa..3f93aaa 100644
--- a/refdata/loader/csv_loader.py
+++ b/refdata/dataset/csv_loader.py
@@ -11,8 +11,8 @@
 
 import csv
 
-from refdata.loader.base import DatasetLoader
-from refdata.loader.consumer import DataConsumer
+from refdata.dataset.loader import DatasetLoader
+from refdata.dataset.consumer import DataConsumer
 from refdata.base import FormatDescriptor
 
 
@@ -65,12 +65,12 @@ def read(self, file: IO, columns: List[str], consumer: DataConsumer) -> DataCons
             Open file object.
         columns: list of string
             Identifier of columns that are contained in the output.
-        consumer: refdata.loader.consumer.DataConsumer
+        consumer: refdata.dataset.consumer.DataConsumer
             Consumer for data rows that are being read.
 
         Returns
         -------
-        refdata.loader.consumer.DataConsumer
+        refdata.dataset.consumer.DataConsumer
         """
         reader = csv.reader(file, delimiter=self.delim)
         # Skip the first row the it contains the dataset header.
diff --git a/refdata/loader/json_loader.py b/refdata/dataset/json_loader.py
similarity index 97%
rename from refdata/loader/json_loader.py
rename to refdata/dataset/json_loader.py
index 45fc871..34cbd70 100644
--- a/refdata/loader/json_loader.py
+++ b/refdata/dataset/json_loader.py
@@ -12,8 +12,8 @@
 import json
 
 from refdata.base import FormatDescriptor
-from refdata.loader.consumer import DataConsumer
-from refdata.loader.base import DatasetLoader
+from refdata.dataset.consumer import DataConsumer
+from refdata.dataset.loader import DatasetLoader
 
 
 class JsonLoader(DatasetLoader):
@@ -68,7 +68,7 @@ def read(self, file: IO, columns: List[str], consumer: DataConsumer) -> DataCons
         columns: list of string
             Column identifier defining the content and the schema of the
             returned data.
-        consumer: refdata.loader.consumer.DataConsumer
+        consumer: refdata.dataset.consumer.DataConsumer
             Consumer for data rows that are being read.
 
         Returns
diff --git a/refdata/loader/base.py b/refdata/dataset/loader.py
similarity index 92%
rename from refdata/loader/base.py
rename to refdata/dataset/loader.py
index 089561c..02fd4e8 100644
--- a/refdata/loader/base.py
+++ b/refdata/dataset/loader.py
@@ -13,7 +13,7 @@
 from abc import ABCMeta, abstractmethod
 from typing import IO, List
 
-from refdata.loader.consumer import DataConsumer
+from refdata.dataset.consumer import DataConsumer
 
 
 class DatasetLoader(metaclass=ABCMeta):
@@ -44,11 +44,11 @@ def read(self, file: IO, columns: List[str], consumer: DataConsumer) -> DataCons
         columns: list of string
             Column identifier defining the content and the schema of the
             returned data.
-        consumer: refdata.loader.consumer.DataConsumer
+        consumer: refdata.dataset.consumer.DataConsumer
             Consumer for data rows that are being read.
 
         Returns
         -------
-        refdata.loader.consumer.DataConsumer
+        refdata.dataset.consumer.DataConsumer
         """
         raise NotImplementedError()  # pragma: no cover
diff --git a/refdata/db.py b/refdata/db.py
index 02af7bd..9ec8c79 100644
--- a/refdata/db.py
+++ b/refdata/db.py
@@ -12,6 +12,8 @@
 import json
 import uuid
 
+from datetime import datetime
+from dateutil.tz import UTC
 from sqlalchemy import Column, String, create_engine
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import sessionmaker
@@ -54,12 +56,25 @@ def process_result_value(self, value, dialect):
         return json.loads(value)
 
 
+def local_time() -> str:
+    """Get the current time as a string in ISO format.
+
+    Returns
+    -------
+    string
+    """
+    return datetime.now(UTC).isoformat()
+
+
 class Dataset(Base):
     """Descriptor for dataset that has been downloaded to the local data store.
     Each dataset has two identifier, (i) the identifier that is part of the
     dataset descriptor (`key`), and (ii) an internal identifier (`dataset_id`).
     Users will reference datasets by their key. The internal dataset identifier
     specifies the subfolder under which the dataset files are stored.
+
+    With each downloaded dataset we maintain a reference to the package that
+    created the local store instance an initiated the download.
     """
     # -- Schema ---------------------------------------------------------------
     __tablename__ = 'dataset'
@@ -67,6 +82,9 @@ class Dataset(Base):
     dataset_id = Column(String(32), default=DATASET_ID, primary_key=True)
     key = Column(String(1024), nullable=False, unique=True)
     descriptor = Column(JsonObject, nullable=False)
+    package_name = Column(String(256), nullable=False)
+    package_version = Column(String(256), nullable=False)
+    created_at = Column(String(32), default=local_time, nullable=False)
 
 
 # -- Database Object ----------------------------------------------------------
diff --git a/refdata/loader/__init__.py b/refdata/repo/__init__.py
similarity index 67%
rename from refdata/loader/__init__.py
rename to refdata/repo/__init__.py
index ea27325..cbacb87 100644
--- a/refdata/loader/__init__.py
+++ b/refdata/repo/__init__.py
@@ -5,5 +5,5 @@
 # refdata is free software; you can redistribute it and/or modify it under the
 # terms of the MIT License; see LICENSE file for more details.
 
-from refdata.loader.csv_loader import CSVLoader
-from refdata.loader.json_loader import JsonLoader
+from refdata.repo.manager import RepositoryManager  # noqa: F401
+from refdata.repo.schema import validate  # noqa: F401
diff --git a/refdata/repo/loader.py b/refdata/repo/loader.py
new file mode 100644
index 0000000..e4d9eba
--- /dev/null
+++ b/refdata/repo/loader.py
@@ -0,0 +1,233 @@
+# This file is part of the Reference Data Repository (refdata).
+#
+# Copyright (C) 2021 New York University.
+#
+# refdata is free software; you can redistribute it and/or modify it under the
+# terms of the MIT License; see LICENSE file for more details.
+
+"""The repository loader serves two purposes. First, it provides an interface
+for loading (or creating) repository indexes from different sources (e.g., from
+an Url or a file on the local file system) and (if required) in different
+formats than the official repository index schema. Second, it supports deferred
+loading of the dataset index. Deferred loading is used by the local data store
+to defer loading of the associated repository index until it is first being
+accesed.
+
+This module provides default implementations for loading a repository index
+in the default schema from an Url, a local file, or from a dictionary. It also
+provides implementations for the loader of a federated index file and the
+loader for the default federated index.
+"""
+
+
+from abc import ABCMeta, abstractmethod
+from typing import Dict, Optional
+
+import json
+import requests
+import yaml
+
+import refdata.config as config
+
+
+"""Definition of valid file format identifier."""
+FORMAT_JSON = 'json'
+FORMAT_YAML = 'yaml'
+FILE_FORMATS = [FORMAT_JSON, FORMAT_YAML]
+
+
+class RepositoryIndexLoader(metaclass=ABCMeta):
+    """Interface for the repository index loader.
+
+    Provides a single method `load` that is expected to return a dictionary
+    containing a list of dataset descriptors that adheres to the `RepositoryIndex`
+    structure defined in the repository schema.
+
+    Different implementations will (a) read the data from different sources,
+    and (b) transform the read data if necessary into the expected format.
+    """
+    @abstractmethod
+    def load(self) -> Dict:
+        """Load a repository index from a data source.
+
+        Returns a dictionary that adheres to the `RepositoryIndex` structure
+        defined in the repository schema.
+
+        Returns
+        -------
+        dict
+        """
+        raise NotImplementedError()  # pragma: no cover
+
+
+# -- Helper Functions ---------------------------------------------------------
+
+class DictLoader(RepositoryIndexLoader):
+    """Repository index loader that is a wrapper around a dictionary containing
+    a list of serialized file descriptors. Loading the index will simply return
+    the wrapped dictionary.
+    """
+    def __init__(self, doc: Dict):
+        """Initialize the dictionary containing the serialized repository index.
+
+        Parameters
+        ----------
+        doc: dict
+            Dictionary containing a single element `datasets` with a list of
+            serialized dataset descriptors.
+        """
+        self.doc = doc
+
+    def load(self) -> Dict:
+        """Loading the repository index will return the dictionary that was
+        given when the object was instantiated.
+
+        Returns
+        -------
+        dict
+        """
+        return self.doc
+
+
+class FileLoader(RepositoryIndexLoader):
+    """Load repository index from a file on the local file system.
+
+    Supports loading files in Json or YAML format. The file format is specified
+    as as an optional argument with file types being identified as `json` or
+    `yaml`. If the file format argument is not given an attempt is made to
+    guess the format from the file suffix. The default format is `json`.
+
+    The file loader currently does not follow references to federated
+    repositories that may be listed in optional `repositories` element of the
+    loaded index file.
+    """
+    def __init__(self, filename: str, ftype: Optional[str] = None):
+        """Initialize the path to the file containing the repository index and
+        the optional file format identifier.
+
+        Raises a ValueError if an invalid file format is given. Valid format
+        identifier are `json` or `yaml`.
+
+        Parameters
+        ----------
+        filename: string
+            Path to file on the file system.
+        ftype: string, default='json'
+            Identifier for the file format.
+        """
+        self.filename = filename
+        self.ftype = get_file_format(ftype=ftype, filename=filename)
+
+    def load(self) -> Dict:
+        """Read repository index from file.
+
+        Returns
+        -------
+        dict
+        """
+        with open(self.filename, 'r') as f:
+            if self.ftype == FORMAT_YAML:
+                return yaml.load(f.read(), Loader=yaml.FullLoader)
+            else:
+                return json.load(f)
+
+
+class UrlLoader(RepositoryIndexLoader):
+    """Repository index loader that reads data from a given Url.
+
+    Uses the Url that is specified in the environment variable *REFDATA_URL* if
+    no Url is given when the class is initialized.
+
+    Supports loading files in Json or YAML format. The file format is specified
+    as as an optional argument with file types being identified as `json` or
+    `yaml`. If the file format argument is not given an attempt is made to
+    guess the format from the file suffix. The default format is `json`.
+
+    The UrlLoader recursively follows links to federated repositories in the
+    optional `repositories` list for a read index file.
+    """
+    def __init__(self, url: Optional[str] = None, ftype: Optional[str] = None):
+        """Initialize the Url for the repository index file and the file format.
+
+        Uses the Url that is specified in the environment variable *REFDATA_URL*
+        as default.
+
+        Raises a ValueError if an invalid file format is given. Valid format
+        identifier are `json` or `yaml`.
+
+        Parameters
+        ----------
+        url: string, default=None
+            Url pointing to the repository index document.
+        ftype: string, default=None
+            Identifier for the file format.
+        """
+        self.url = url if url is not None else config.URL()
+        self.ftype = get_file_format(ftype=ftype, filename=self.url)
+
+    def load(self) -> Dict:
+        """Download the repository index file from the given Url.
+
+        Recursively follwos references to other repositories in the optional
+        `repositories` list of the downloaded index file.
+
+        Returns
+        -------
+        dict
+        """
+        # Load the index file. Raises an error if the HTTP request is not
+        # successful.
+        r = requests.get(self.url)
+        r.raise_for_status()
+        # Load response body depending on the specified file format type.
+        if self.ftype == FORMAT_YAML:
+            body = yaml.load(r.content, Loader=yaml.FullLoader)
+        else:
+            body = r.json()
+        # Create the result containing only the dataset descriptors.
+        datasets = body.get('datasets', list())
+        # Recursively read additional federated repositories that are specified
+        # in the 'repositories' list and add their datasets to the returned
+        # result.
+        for url in body.get('repositories', list()):
+            for obj in UrlLoader(url=url).load().get('datasets'):
+                datasets.append(obj)
+        return {'datasets': datasets}
+
+
+# -- Helper Functions ---------------------------------------------------------
+
+def get_file_format(ftype: str, filename: str) -> str:
+    """Get the file format identifier.
+
+    If the `ftype` is given it is verified that a valid file format identifier
+    is specified. Valid format identifier are `json` and `yaml`. If the identifier
+    is valid it is returned. Otherwise, a ValueError is raised.
+
+    If the format identifier is not given, an attempt is made to *guess* the
+    format from the suffix of the given file name or Url. Files ending with
+    `.json` are assumed to be in Json format and files ending in `.yml` or
+    `.yaml` are assumed to be in YAML format. Files that do not match either
+    of these suffixes are assumed to be in Json format.
+
+    Parameters
+    ----------
+    ftype: string
+        Identifier for the file format. The giben value may be None.
+    filename: string
+        Path to file on the file system or Url.
+
+    Returns
+    -------
+    string
+    """
+    if ftype is not None:
+        if ftype not in FILE_FORMATS:
+            raise ValueError("unknown file format '{}'".format(ftype))
+        return ftype
+    if '.' in filename:
+        # Get the file suffix.
+        suffix = filename.lower().split('.')[-1]
+        if suffix in ['yml', 'yaml']:
+            return FORMAT_YAML
+    return FORMAT_JSON
diff --git a/refdata/repo.py b/refdata/repo/manager.py
similarity index 55%
rename from refdata/repo.py
rename to refdata/repo/manager.py
index ab7cda0..f24221d 100644
--- a/refdata/repo.py
+++ b/refdata/repo/manager.py
@@ -9,18 +9,10 @@
 available for download in the Reference Data Repository.
 """
 
-from jsonschema import Draft7Validator, RefResolver
 from typing import Dict, List, Optional, Set, Union
 
-import importlib.resources as pkg_resources
-import os
-import requests
-import yaml
-
 from refdata.base import DatasetDescriptor
 
-import refdata.config as config
-
 
 class RepositoryManager:
     """The repository manager provides the functionality for querying a
@@ -28,10 +20,8 @@ class RepositoryManager:
     By default, the index that the environment variable REFDATA_URL or
     its default value points to is read.
     """
-    def __init__(self, doc: Optional[Dict] = None):
-        """Initialize the index of dataset descriptors. If no data is provided
-        it is read from the value that the environment variable REFDATA_URL
-        points to.
+    def __init__(self, doc: Dict):
+        """Initialize the index of dataset descriptors.
 
         Parameters
         ----------
@@ -39,19 +29,11 @@ def __init__(self, doc: Optional[Dict] = None):
             Dictionary containing the dataset index. This dictionary is
             expected to follow the `RepositoryIndex` schema.
         """
-        # Read the default index if no data was given.
-        doc = doc if doc is not None else download_index(url=config.URL())
         # Create dataset index for entries in the read document.
         self.datasets = dict()
         for obj in doc.get('datasets', list()):
             ds = DatasetDescriptor(obj)
             self.datasets[ds.identifier] = ds
-        # Read additional repositories that may be specified in the main
-        # document.
-        for url in doc.get('repositories', list()):
-            for obj in download_index(url=url).get('datasets', list()):
-                ds = DatasetDescriptor(obj)
-                self.datasets[ds.identifier] = ds
 
     def find(self, filter: Optional[Union[str, List[str], Set[str]]] = None) -> List[DatasetDescriptor]:
         """Query the dataset index. The filter is a single tag or a list of
@@ -98,55 +80,3 @@ def get(self, key: str) -> DatasetDescriptor:
         refdata.base.DatasetDescriptor
         """
         return self.datasets.get(key)
-
-
-# -- Helper Functions ---------------------------------------------------------
-
-def download_index(url: str) -> Dict:
-    """Download the repository index file from the given Url.
-
-    Parameters
-    ----------
-    url: string
-        Url pointing to the repository index document.
-
-    Returns
-    -------
-    dict
-    """
-    r = requests.get(url)
-    r.raise_for_status()
-    return r.json()
-
-
-"""Create schema validator for the repository index file."""
-# Make sure that the path to the schema file is a valid URI. Otherwise, errors
-# occur (at least on MS Windows environments). Changed based on:
-# https://github.com/Julian/jsonschema/issues/398#issuecomment-385130094
-schemafile = 'file:///{}'.format(os.path.abspath(os.path.join(__file__, 'schema.yaml')))
-schema = yaml.safe_load(pkg_resources.open_text(__package__, 'schema.yaml'))
-resolver = RefResolver(schemafile, schema)
-
-
-def validate(doc: Dict) -> Draft7Validator:
-    """Validate the schema for a repository index document.
-
-    The given document is a dictionary containing the repository index. An
-    error is raised if the referenced document does not satisfy the defined
-    repository index schema.
-
-
-    Parameters
-    ----------
-    doc: dict
-        Repository index document.
-
-    Raises
-    ------
-    jsonschema.exceptions.ValidationError
-    """
-    validator = Draft7Validator(
-        schema=schema['definitions']['RepositoryIndex'],
-        resolver=resolver
-    )
-    validator.validate(doc)
diff --git a/refdata/repo/schema.py b/refdata/repo/schema.py
new file mode 100644
index 0000000..c8013a1
--- /dev/null
+++ b/refdata/repo/schema.py
@@ -0,0 +1,53 @@
+# This file is part of the Reference Data Repository (refdata).
+#
+# Copyright (C) 2021 New York University.
+#
+# refdata is free software; you can redistribute it and/or modify it under the
+# terms of the MIT License; see LICENSE file for more details.
+
+"""Schema validator for the repository index file.
+
+The validator is inteded as a tool for developers and data publishers to
+validate their data indexes before publishing them. The validator is currently
+not used to validate the schema of an index structure that is passed to a
+repository manager.
+"""
+
+from jsonschema import Draft7Validator, RefResolver
+from typing import Dict
+
+import importlib.resources as pkg_resources
+import os
+import yaml
+
+
+# Make sure that the path to the schema file is a valid URI. Otherwise, errors
+# occur (at least on MS Windows environments). Changed based on:
+# https://github.com/Julian/jsonschema/issues/398#issuecomment-385130094
+schemafile = 'file:///{}'.format(os.path.abspath(os.path.join(__file__, 'schema.yaml')))
+schema = yaml.safe_load(pkg_resources.open_text(__package__, 'schema.yaml'))
+resolver = RefResolver(schemafile, schema)
+
+
+def validate(doc: Dict) -> Draft7Validator:
+    """Validate the schema for a repository index document.
+
+    The given document is a dictionary containing the repository index. An
+    error is raised if the referenced document does not satisfy the defined
+    repository index schema.
+
+
+    Parameters
+    ----------
+    doc: dict
+        Repository index document.
+
+    Raises
+    ------
+    jsonschema.exceptions.ValidationError
+    """
+    validator = Draft7Validator(
+        schema=schema['definitions']['RepositoryIndex'],
+        resolver=resolver
+    )
+    validator.validate(doc)
diff --git a/refdata/schema.yaml b/refdata/repo/schema.yaml
similarity index 100%
rename from refdata/schema.yaml
rename to refdata/repo/schema.yaml
diff --git a/refdata/store/__init__.py b/refdata/store/__init__.py
index 7ac7a27..3cbdedd 100644
--- a/refdata/store/__init__.py
+++ b/refdata/store/__init__.py
@@ -5,4 +5,4 @@
 # refdata is free software; you can redistribute it and/or modify it under the
 # terms of the MIT License; see LICENSE file for more details.
 
-from refdata.store.base import LocalStore, download_file  # noqa: F401
+from refdata.store.base import LocalStore, RefStore, download_file  # noqa: F401
diff --git a/refdata/store/base.py b/refdata/store/base.py
index fe1525f..68d4c8a 100644
--- a/refdata/store/base.py
+++ b/refdata/store/base.py
@@ -11,15 +11,16 @@
 from pooch.core import stream_download
 from pooch.downloaders import choose_downloader
 
-from typing import Dict, List, Optional, Set, Tuple, Union
+from typing import IO, List, Optional
 
 import os
-import pandas as pd
 
 from refdata.base import DatasetDescriptor
-from refdata.store.dataset import DatasetHandle
+from refdata.dataset.base import DatasetHandle
 from refdata.db import Dataset, DATASET_ID, DB, SessionScope
-from refdata.repo import RepositoryManager
+from refdata.repo.loader import RepositoryIndexLoader, UrlLoader
+from refdata.repo.manager import RepositoryManager
+from refdata.version import __version__
 
 import refdata.config as config
 import refdata.error as err
@@ -28,14 +29,15 @@
 class LocalStore:
     """The local dataset store maintains downloaded datasets on the file system.
     All datasets are maintained in subfolders of a base directory. By default,
-    the base directory is in the users home directory under `.refdata`.
+    the base directory is in the users cache directory under the package name.
 
     Information about downloaded datasets is maintaind in an SQLite database
     `refdata.db` that is created in the base directory. The data file for
     each downloaded dataset is maintained in a separate subfolder.
     """
     def __init__(
-        self, basedir: Optional[str] = None, repo: Optional[RepositoryManager] = None,
+        self, package_name: str, package_version: str,
+        basedir: Optional[str] = None, loader: Optional[RepositoryIndexLoader] = None,
         auto_download: Optional[bool] = None, connect_url: Optional[str] = None
     ):
         """Initialize the base directory on the file system where downloaded
@@ -45,13 +47,22 @@ def __init__(
 
         Parameters
         ----------
+        package_name: string
+            Name of the package that created the instance of the local store.
+            This name is used to associated downloaded datasets in the local
+            database with the packages that downloaded them.
+        package_version: string
+            Version information for the package that created the local store
+            instance.
         basedir: string, default=None
             Path to the directory for downloaded datasets. By default, the
             directory that is specified in the environment variable REFDATA_BASEDIR
-            is used or $HOME/.refdata if the environment variable is not set.
-        repo: refdata.repo.RepositoryManager, default=None
-            Repository manager that is used to access dataset metadata for
-            downloading datasets.
+            is used. If the environment variable is not set an directory under
+            the OS-specific users cache data directory is used.
+        loader: refdata.repo.loader.RepositoryIndexLoader, default=None
+            Loader for a dataset repository index. the loaded index is used to
+            create an instance of the repository manager that is associated with
+            the local data store for downloading datasets.
         auto_download: bool, default=None
             If auto download is enabled (True) datasets are downloaded automatically
             when they are first accessed via `.open()`. If this option is not
@@ -65,12 +76,16 @@ def __init__(
             value is given the default SQLite database is used. If the respective
             database file does not exist a new database will be created.
         """
+        self.package_name = package_name
+        self.package_version = package_version
         # Create the base directory if it does not exist.
         self.basedir = basedir if basedir else config.BASEDIR()
         os.makedirs(self.basedir, exist_ok=True)
-        # Set the repository manager. If none was given the default manager will
-        # be used when it is first accessed.
-        self.repo = repo
+        # Set the repository loader. The repository manager will be instantiated
+        # when it is first accessed. If no loader is given the default dataset
+        # index will be loaded for the associated repository manager instance.
+        self.loader = loader if loader is not None else UrlLoader()
+        self.repo = None
         # Set the auto download option. Read REFDATA_AUTODOWNLOAD if not no
         # argument value was given. The default is False.
         self.auto_download = auto_download if auto_download is not None else config.AUTO_DOWNLOAD()
@@ -101,35 +116,12 @@ def _datafile(self, dataset_id: str) -> str:
         """
         return os.path.abspath(os.path.join(self.basedir, '{}.{}'.format(dataset_id, 'dat')))
 
-    def distinct(
-        self, key: str, columns: Optional[Union[str, List[str]]] = None,
-        auto_download: Optional[bool] = None
-    ) -> Set:
-        """Shortcut to get the set of distinct values in one or more columns
-        for a downloaded dataset with the given identifier.
-
-        Parameters
-        ----------
-        key: string
-            External unique dataset identifier.
-        columns: list of string, default=None
-            Column identifier defining the content and returned distinct value
-            set.
-        auto_download: bool, default=None
-            Override the class global auto download flag.
-
-        Returns
-        -------
-        set
-        """
-        dataset = self.open(key=key, auto_download=auto_download)
-        return dataset.distinct(columns=columns)
+    def download(self, key: str) -> DatasetHandle:
+        """Download the dataset with the given (external) identifier.
 
-    def download(self, key: str) -> Tuple[str, Dict]:
-        """Download the dataset with the given (external) identifier. If no
-        dataset with that given key exists an error is raised. If the
-        dataset had been downloaded before the existing data file is
-        downloaded again.
+        Returns the handle for the downloaded dataset. If no dataset with that
+        given key exists an error is raised. If the dataset had been downloaded
+        before the data file is downloaded again.
 
         Returns the internal identifier and the descriptor (serialization) for
         the downloaded dataset.
@@ -141,15 +133,15 @@ def download(self, key: str) -> Tuple[str, Dict]:
 
         Returns
         -------
-        string, dict
+        refdata.dataset.base.DatasetHandle
 
         Raises
         ------
         refdata.error.UnknownDatasetError
         """
         # Get the dataset descriptor from the repository.
-        ds = self.repository().get(key=key)
-        if ds is None:
+        descriptor = self.repository().get(key=key)
+        if descriptor is None:
             raise err.UnknownDatasetError(key=key)
         # Get the internal dataset identifier if the dataset had been
         # downloaded before. If the dataset had not been downloaded an new
@@ -166,7 +158,7 @@ def download(self, key: str) -> Tuple[str, Dict]:
         # will raise an error if the checksum for the downloaded file does not
         # match the expected checksum from the repository index.
         dst = self._datafile(dataset_id)
-        download_file(dataset=ds, dst=dst)
+        download_file(dataset=descriptor, dst=dst)
         # Create entry for the downloaded dataset if it was downloaded for
         # the first time.
         if not ds_exists:
@@ -174,10 +166,12 @@ def download(self, key: str) -> Tuple[str, Dict]:
                 dataset = Dataset(
                     dataset_id=dataset_id,
                     key=key,
-                    descriptor=ds.to_dict()
+                    descriptor=descriptor.to_dict(),
+                    package_name=self.package_name,
+                    package_version=self.package_version
                 )
                 session.add(dataset)
-        return dataset_id, ds.to_dict()
+        return self.load(key=key)
 
     def _get(self, session: SessionScope, key: str) -> Dataset:
         """Get the database object for the dataset with the given key. If
@@ -194,120 +188,98 @@ def _get(self, session: SessionScope, key: str) -> Dataset:
         """
         return session.query(Dataset).filter(Dataset.key == key).one_or_none()
 
-    def list(self) -> List[DatasetDescriptor]:
+    def list(self) -> List[DatasetHandle]:
         """Get the descriptors for all datasets that have been downloaded and
         are available from the local dataset store.
 
         Returns
         -------
-        list of refdata.base.DatasetDescriptor
+        list of refdata.dataset.base.DatasetHandle
         """
         with self.db.session() as session:
             datasets = session.query(Dataset).all()
-            return [DatasetDescriptor(ds.descriptor) for ds in datasets]
-
-    def load(
-        self, key: str, columns: Optional[List[str]] = None,
-        auto_download: Optional[bool] = None
-    ) -> pd.DataFrame:
-        """Load the dataset with the given identifier as a pandas data frame.
+            return [
+                DatasetHandle(
+                    descriptor=d.descriptor,
+                    package_name=d.package_name,
+                    package_version=d.package_version,
+                    created_at=d.created_at,
+                    datafile=self._datafile(d.dataset_id)
+                ) for d in datasets
+            ]
+
+    def load(self, key: str, auto_download: Optional[bool] = None) -> DatasetHandle:
+        """Get handle for the specified dataset.
+
+        If the dataset does not exist in the local store it will be downloaded
+        if the `auto_download` flag argument is True or if the class global
+        `auto_download` flag is True. Note that the `auto_download` argument
+        will override the class global one.
 
-        This is a shortcut to open the dataset with the given identifier (and
-        optionally download it first) and then reading data from the downloaded
-        file into a data frame.
+        If the dataset is not available in the local store (and not automatically
+        downloaded) an error is raised.
 
         Parameters
         ----------
         key: string
             External unique dataset identifier.
-        columns: list of string, default=None
-            Column identifier defining the content and the schema of the
-            returned data frame.
         auto_download: bool, default=None
             Override the class global auto download flag.
 
         Returns
         -------
-        pd.DataFrame
-        """
-        dataset = self.open(key=key, auto_download=auto_download)
-        return dataset.data_frame(columns=columns)
+        refdata.dataset.base.DatasetHandle
 
-    def mapping(
-        self, key: str, lhs: Union[str, List[str]], rhs: Union[str, List[str]],
-        ignore_equal: Optional[bool] = True, auto_download: Optional[bool] = None
-    ) -> Dict:
-        """Generate a mapping from values in dataset rows.
-
-        This is a shortcut to open the dataset with the given identifier (and
-        optionally download it first) and the generae a mapping from the
-        downloaded dataset for the given columns.
-
-        Parameters
-        ----------
-        key: string
-            External unique dataset identifier.
-        lhs: string or list of string
-            Columns defining the source of values for the left-hand side of the
-            mapping.
-        rhs: string or list of string
-            Columns defining the source of values for the right-hand side of the
-            mapping.
-        ignore_equal: bool, default=True
-            Exclude mappings from a value to itself from the created mapping.
-        auto_download: bool, default=None
-            Override the class global auto download flag.
-
-        Returns
-        -------
-        set
+        Raises
+        ------
+        refdata.error.NotDownloadedError
         """
-        dataset = self.open(key=key, auto_download=auto_download)
-        return dataset.mapping(lhs=lhs, rhs=rhs, ignore_equal=ignore_equal)
+        # Return the dataset handle if the dataset has been downloaded before.
+        with self.db.session() as session:
+            dataset = self._get(session=session, key=key)
+            if dataset is not None:
+                return DatasetHandle(
+                    descriptor=dataset.descriptor,
+                    package_name=dataset.package_name,
+                    package_version=dataset.package_version,
+                    created_at=dataset.created_at,
+                    datafile=self._datafile(dataset.dataset_id)
+                )
+        # Attempt to download if it does not exist in the local store and either
+        # of the given auto_download flag or the class global auto_download is
+        # True. Raises error if dataset has not been downloaded and
+        # auto_download is False.
+        download = auto_download if auto_download is not None else self.auto_download
+        if download:
+            return self.download(key=key)
+        else:
+            raise err.NotDownloadedError(key=key)
 
-    def open(self, key: str, auto_download: Optional[bool] = None) -> DatasetHandle:
-        """Get handle for the specified dataset. If the dataset does not exist
-        in the local store it will be downloaded if the given auto_download
-        flag is True or if the class global auto_download flag is True. Note
-        that the auto_download argument will override the class global one.
+    def open(
+        self, key: str, columns: Optional[List[str]] = None,
+        auto_download: Optional[bool] = None
+    ) -> IO:
+        """Open the dataset with the given identifier for reading.
 
-        If the dataset is not available in the local store (and not automatically
-        downloaded) an error is raised.
+        Returns a file-like object to read the dataset content. This is a
+        shortcut to open the dataset with the given identifier (and optionally
+        download it first).
 
         Parameters
         ----------
         key: string
             External unique dataset identifier.
+        columns: list of string, default=None
+            Column identifier defining the content and the schema of the
+            returned data frame.
         auto_download: bool, default=None
             Override the class global auto download flag.
 
         Returns
         -------
-        refdata.dataset.DatasetHandle
-
-        Raises
-        ------
-        refdata.error.NotDownloadedError
+        file-like object
         """
-        # Get the identifier and descriptor for the dataset. Raises error
-        # if dataset has not been downloaded and auto_download is False.
-        dataset_id, descriptor = None, None
-        with self.db.session() as session:
-            dataset = self._get(session=session, key=key)
-            if dataset is not None:
-                dataset_id = dataset.dataset_id
-                descriptor = dataset.descriptor
-        # Attempt to download if it does not exist in the local store and either
-        # of the given auto_download flag or the class global auto_download is
-        # True.
-        if dataset_id is None:
-            download = auto_download if auto_download is not None else self.auto_download
-            if download:
-                dataset_id, descriptor = self.download(key=key)
-            else:
-                raise err.NotDownloadedError(key=key)
-        # Return handle for the dataset.
-        return DatasetHandle(doc=descriptor, datafile=self._datafile(dataset_id))
+        return self.load(key=key, auto_download=auto_download).open()
 
     def remove(self, key: str) -> bool:
         """Remove the dataset with the given (external) identifier from the
@@ -345,10 +317,55 @@ def repository(self) -> RepositoryManager:
         # given when the store was created and this is the firat access to
         # the manager.
         if self.repo is None:
-            self.repo = RepositoryManager()
+            self.repo = RepositoryManager(doc=self.loader.load())
         return self.repo
 
 
+class RefStore(LocalStore):
+    """Default local store for the refdata package. Uses the module name and
+    package version to set the respective properties of the created local store
+    instance.
+    """
+    def __init__(
+        self, basedir: Optional[str] = None, loader: Optional[RepositoryIndexLoader] = None,
+        auto_download: Optional[bool] = None, connect_url: Optional[str] = None
+    ):
+        """Initialize the store properties.
+
+        Parameters
+        ----------
+        basedir: string, default=None
+            Path to the directory for downloaded datasets. By default, the
+            directory that is specified in the environment variable REFDATA_BASEDIR
+            is used. If the environment variable is not set an directory under
+            the OS-specific users cache data directory is used.
+        loader: refdata.repo.loader.RepositoryIndexLoader, default=None
+            Loader for a dataset repository index. the loaded index is used to
+            create an instance of the repository manager that is associated with
+            the local data store for downloading datasets.
+        auto_download: bool, default=None
+            If auto download is enabled (True) datasets are downloaded automatically
+            when they are first accessed via `.open()`. If this option is not
+            enabled and an attempt is made to open a datasets that has not yet
+            been downloaded to the local file syste, an error is raised. If this
+            argument is not given the value from the environment variable
+            REFDATA_AUTODOWNLOAD is used or False if the variable is not set.
+        connect_url: string, default=None
+            SQLAlchemy database connect Url string. If a value is given it is
+            assumed that the database exists and has been initialized. If no
+            value is given the default SQLite database is used. If the respective
+            database file does not exist a new database will be created.
+        """
+        super(RefStore, self).__init__(
+            package_name=__name__.split('.')[0],
+            package_version=__version__,
+            basedir=basedir,
+            loader=loader,
+            auto_download=auto_download,
+            connect_url=connect_url
+        )
+
+
 # -- Helper Functions ---------------------------------------------------------
 
 def download_file(dataset: DatasetDescriptor, dst: str):
diff --git a/refdata/version.py b/refdata/version.py
index 2110efc..e4442e7 100644
--- a/refdata/version.py
+++ b/refdata/version.py
@@ -6,4 +6,4 @@
 # terms of the MIT License; see LICENSE file for more details.
 
 """Information about the current version of the refdata package."""
-__version__ = '0.1.1'
+__version__ = '0.2.0'
diff --git a/requirements.txt b/requirements.txt
index ecf96c6..b8770d8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,12 @@
 future
 appdirs>=1.4.4
 pandas>=1.0.0
+python-dateutil
+datasize>=1.0.0
 pyyaml>=5.1
 jsonschema
 SQLAlchemy>=1.3.18
 pooch>=1.3.0
 requests
 Click>=7.0.0
+tableprint
diff --git a/setup.py b/setup.py
index 7614174..19f2043 100644
--- a/setup.py
+++ b/setup.py
@@ -17,12 +17,15 @@
     'future',
     'appdirs>=1.4.4',
     'pandas>=1.0.0',
+    'python-dateutil',
+    'datasize>=1.0.0',
     'pyyaml>=5.1',
     'jsonschema',
     'SQLAlchemy>=1.3.18',
     'pooch>=1.3.0',
     'requests',
-    'Click>=7.0.0'
+    'Click>=7.0.0',
+    'tableprint'
 ]
 
 
diff --git a/tests/.files/index.yaml b/tests/.files/index.yaml
new file mode 100644
index 0000000..93f1150
--- /dev/null
+++ b/tests/.files/index.yaml
@@ -0,0 +1,56 @@
+datasets:
+- checksum: 8d4c77b84cbe8c6683bbfa9f58c8268455f820b98289b51955dcef87b1d48d60
+  compression: gzip
+  description: Names of cities in the U.S. from the Encyclopaedia Britannica.
+  format:
+    parameters:
+      delim: "\t"
+    type: csv
+  id: cities
+  name: Cities in the U.S.
+  schema:
+  - description: City Name
+    dtype: text
+    id: city
+    name: City
+  - description: U.S. State Name
+    dtype: text
+    id: state
+    name: State
+  url: http://cities.tsv.gz
+  webpage: https://www.britannica.com/topic/list-of-cities-and-towns-in-the-United-States-2023068
+- checksum: 889c264f2ac4629b4998aa8b8b1d4de45890c39c10e24cfd8a017e9924e805c7
+  description: Information about countries in the world available from the restcountries.eu
+    project.
+  format:
+    parameters: {}
+    type: json
+  id: countries
+  name: REST Countries
+  schema:
+  - description: Country name
+    dtype: text
+    id: name
+    name: Name
+  - description: ISO 3166-1 2-letter country code
+    dtype: text
+    id: alpha2Code
+    name: Country Code (2-letters)
+  - description: ISO 3166-1 3-letter country code
+    dtype: text
+    id: alpha3Code
+    name: Country Code (3-letters)
+  - description: Capital city
+    dtype: text
+    id: capital
+    name: Capital
+  - description: World region
+    dtype: text
+    id: region
+    name: Region
+  - description: Sub-region within the country region
+    dtype: text
+    id: subregion
+    name: Sub-Region
+  url: http://countries.json
+  webpage: https://restcountries.eu/
diff --git a/tests/loader/test_consumer.py b/tests/dataset/test_consumer.py
similarity index 86%
rename from tests/loader/test_consumer.py
rename to tests/dataset/test_consumer.py
index 4837603..6e2cf7d 100644
--- a/tests/loader/test_consumer.py
+++ b/tests/dataset/test_consumer.py
@@ -9,7 +9,8 @@
 
 import pytest
 
-from refdata.loader.consumer import DataFrameGenerator, DistinctSetGenerator, MappingGenerator
+from refdata.dataset.consumer import DataFrameGenerator, DistinctSetGenerator, MappingGenerator
+from refdata.dataset.consumer import to_value
 
 
 # List of rows for test purposes.
@@ -78,3 +79,10 @@ def test_mapping_generator():
     assert mapping[('alice', 'smith')] == 23
     assert mapping[('alice', 'jones')] == 25
     assert mapping[('bob', 'jackson')] == 24
+
+
+def test_value_transformer():
+    """Test transformation of list values with optional transformer function."""
+    values = ['A', 'B']
+    assert to_value(row=values) == ('A', 'B')
+    assert to_value(row=values, transformer=str.lower) == ('a', 'b')
diff --git a/tests/loader/test_csv_loader.py b/tests/dataset/test_csv_loader.py
similarity index 93%
rename from tests/loader/test_csv_loader.py
rename to tests/dataset/test_csv_loader.py
index 324b31b..6c95b32 100644
--- a/tests/loader/test_csv_loader.py
+++ b/tests/dataset/test_csv_loader.py
@@ -12,8 +12,8 @@
 import pytest
 
 from refdata.base import FormatDescriptor
-from refdata.loader.consumer import DataCollector
-from refdata.loader.csv_loader import CSVLoader
+from refdata.dataset.consumer import DataCollector
+from refdata.dataset.csv_loader import CSVLoader
 
 
 @pytest.mark.parametrize(
diff --git a/tests/dataset/test_dataset_handle.py b/tests/dataset/test_dataset_handle.py
new file mode 100644
index 0000000..aface7f
--- /dev/null
+++ b/tests/dataset/test_dataset_handle.py
@@ -0,0 +1,39 @@
+# This file is part of the Reference Data Repository (refdata).
+#
+# Copyright (C) 2021 New York University.
+#
+# refdata is free software; you can redistribute it and/or modify it under the
+# terms of the MIT License; see LICENSE file for more details.
+
+"""Unit tests for the dataset handle."""
+
+import pytest
+
+from refdata.dataset.base import DatasetHandle
+from refdata.db import local_time
+
+import refdata.error as err
+
+
+def test_format_error():
+    """Ensure that the proper error is raised when initializing a dataset
+    handle with an invalid format identifier.
+    """
+    doc = {
+        'id': '0000',
+        'url': 'countries.json',
+        "checksum": "889c264f2ac4629b4998aa8b8b1d4de45890c39c10e24cfd8a017e9924e805c7",
+        "schema": [{"id": "name"}, {"id": "alpha2Code"}],
+        "format": {
+            "type": "unknown",
+            "parameters": {}
+        }
+    }
+    with pytest.raises(err.InvalidFormatError):
+        DatasetHandle(
+            descriptor=doc,
+            package_name='test',
+            package_version='0',
+            created_at=local_time(),
+            datafile='/dev/null'
+        )
diff --git a/tests/loader/test_json_loader.py b/tests/dataset/test_json_loader.py
similarity index 87%
rename from tests/loader/test_json_loader.py
rename to tests/dataset/test_json_loader.py
index 432e1ae..6294ae5 100644
--- a/tests/loader/test_json_loader.py
+++ b/tests/dataset/test_json_loader.py
@@ -9,9 +9,10 @@
 
 import pytest
 
-from refdata.loader.consumer import DataCollector
-from refdata.loader.json_loader import JQuery
-from refdata.store.dataset import DatasetHandle
+from refdata.dataset.base import DatasetHandle
+from refdata.dataset.consumer import DataCollector
+from refdata.dataset.json_loader import JQuery
+from refdata.db import local_time
 
 
 # -- Loader -------------------------------------------------------------------
@@ -60,7 +61,13 @@
 def test_json_loader(parameters, columns, first_row, countries_file, mock_response):
     descriptor = dict(DESCRIPTOR)
     descriptor['format'] = parameters
-    dataset = DatasetHandle(doc=descriptor, datafile=countries_file)
+    dataset = DatasetHandle(
+        descriptor=descriptor,
+        package_name='test',
+        package_version='0',
+        created_at=local_time(),
+        datafile=countries_file
+    )
     data = dataset.load(columns=columns, consumer=DataCollector()).data
     assert len(data) == 2
     assert data[0] == first_row
diff --git a/tests/repo/test_repo_loader.py b/tests/repo/test_repo_loader.py
new file mode 100644
index 0000000..20ea090
--- /dev/null
+++ b/tests/repo/test_repo_loader.py
@@ -0,0 +1,73 @@
+# This file is part of the Reference Data Repository (refdata).
+#
+# Copyright (C) 2021 New York University.
+#
+# refdata is free software; you can redistribute it and/or modify it under the
+# terms of the MIT License; see LICENSE file for more details.
+
+"""Unit tests for the repository index loader."""
+
+import os
+import pytest
+
+from refdata.repo.loader import DictLoader, FileLoader, UrlLoader
+from refdata.repo.loader import FORMAT_JSON, FORMAT_YAML, get_file_format
+
+
+"""Path to index files in the local test file directory."""
+DIR = os.path.dirname(os.path.realpath(__file__))
+DATA_DIR = os.path.join(DIR, '../.files')
+JSON_FILE = os.path.join(DATA_DIR, 'index.json')
+YAML_FILE = os.path.join(DATA_DIR, 'index.yaml')
+
+"""Mocked Urls."""
+JSON_URL = 'http://index.json'
+YAML_URL = 'http://index.yaml'
+
+
+def test_dictionary_loader():
+    """Test the dictionary index loader."""
+    doc = {'datasets': []}
+    assert DictLoader(doc=doc).load() == doc
+
+
+@pytest.mark.parametrize(
+    'filename,ftype',
+    [(YAML_FILE, None), (YAML_FILE, FORMAT_YAML), (JSON_FILE, None), (JSON_FILE, FORMAT_JSON)]
+)
+def test_file_loader(filename, ftype):
+    """Test loading the repository index from files in different formats."""
+    doc = FileLoader(filename=filename, ftype=ftype).load()
+    assert len(doc['datasets']) == 2
+
+
+@pytest.mark.parametrize(
+    'ftype,filename,result',
+    [
+        (FORMAT_JSON, 'index.yaml', FORMAT_JSON),
+        (FORMAT_YAML, 'index.json', FORMAT_YAML),
+        (None, 'index.json', FORMAT_JSON),
+        (None, 'index.yml', FORMAT_YAML),
+        (None, 'index.json.yaml', FORMAT_YAML),
+        (None, 'index_json_yaml', FORMAT_JSON)
+    ]
+)
+def test_guess_file_format(ftype, filename, result):
+    """Test various cases for guessing the index file format."""
+    assert get_file_format(ftype=ftype, filename=filename) == result
+
+
+def test_invalid_file_format():
+    """Test error case where an invalid file format identifier is given."""
+    with pytest.raises(ValueError):
+        get_file_format(ftype='unknown', filename='index.json')
+
+
+@pytest.mark.parametrize(
+    'url,ftype',
+    [(YAML_URL, None), (YAML_URL, FORMAT_YAML), (JSON_URL, None), (JSON_URL, FORMAT_JSON)]
+)
+def test_url_loader(url, ftype, mock_response):
+    """Test loading the repository index from a Url in different formats."""
+    doc = UrlLoader(url=url, ftype=ftype).load()
+    assert len(doc['datasets']) == 2
diff --git a/tests/repo/test_repo_manager.py b/tests/repo/test_repo_manager.py
new file mode 100644
index 0000000..7636fbc
--- /dev/null
+++ b/tests/repo/test_repo_manager.py
@@ -0,0 +1,28 @@
+# This file is part of the Reference Data Repository (refdata).
+#
+# Copyright (C) 2021 New York University.
+#
+# refdata is free software; you can redistribute it and/or modify it under the
+# terms of the MIT License; see LICENSE file for more details.
+
+"""Unit tests for the dataset repository."""
+
+from refdata.repo.loader import UrlLoader
+from refdata.repo.manager import RepositoryManager
+
+
+def test_get_dataset(mock_response):
+    """Test getting a dataset from the default repository."""
+    # Will attempt to download the default repository. The mocked response will
+    # return the content of the `index.json` file in the test files directory.
+    repo = RepositoryManager(doc=UrlLoader().load())
+    assert repo.get(key='DS1').identifier == 'DS1'
+    assert repo.get(key='UNKNOWN') is None
+
+
+def test_read_linked_index(mock_response):
+    """Test reading a federated repository index."""
+    repo = RepositoryManager(doc=UrlLoader(url='multi-index.json').load())
+    assert len(repo.find()) == 3
+    assert repo.get('us_cities') is not None
+    assert repo.get('cities') is not None
diff --git a/tests/test_data_repo.py b/tests/repo/test_validate_index.py
similarity index 57%
rename from tests/test_data_repo.py
rename to tests/repo/test_validate_index.py
index 93fd911..e0e053a 100644
--- a/tests/test_data_repo.py
+++ b/tests/repo/test_validate_index.py
@@ -5,20 +5,13 @@
 # refdata is free software; you can redistribute it and/or modify it under the
 # terms of the MIT License; see LICENSE file for more details.
 
-"""Unit tests for the dataset repository."""
+"""Unit tests for the repository index schema validator."""
 
 from jsonschema.exceptions import ValidationError
 
 import pytest
 
-from refdata.repo import RepositoryManager, download_index, validate
-
-
-def test_get_dataset(mock_response):
-    """Test getting a dataset from the test repository."""
-    repo = RepositoryManager()
-    assert repo.get(key='DS1').identifier == 'DS1'
-    assert repo.get(key='UNKNOWN') is None
+from refdata.repo.schema import validate
 
 
 @pytest.mark.parametrize(
@@ -55,15 +48,40 @@ def test_get_dataset(mock_response):
         }
     ]
 )
-def test_invalid_repository_index(doc, mock_response):
+def test_invalid_repository_index(doc):
     """Test error for invalid repository index documents."""
     with pytest.raises(ValidationError):
         validate(doc)
 
 
-def test_read_linked_index(mock_response):
-    """Test validating a 'downloaded' repository index document."""
-    repo = RepositoryManager(doc=download_index(url='multi-index.json'))
-    assert len(repo.find()) == 3
-    assert repo.get('us_cities') is not None
-    assert repo.get('cities') is not None
+@pytest.mark.parametrize(
+    'doc',
+    [
+        {
+            'datasets': [
+                {
+                    'id': '0000',
+                    'url': 'xyz.com',
+                    'checksum': '0',
+                    'schema': [{'id': 'C1'}],
+                    'format': {'type': 'csv', 'parameters': {}}
+                }
+            ]
+        },
+        {
+            'datasets': [
+                {
+                    'id': '0000',
+                    'url': 'xyz.com',
+                    'checksum': '0',
+                    'schema': [{'id': 'C1'}],
+                    'format': {'type': 'csv', 'parameters': {}}
+                }
+            ],
+            'repositories': ['abc.org']
+        }
+    ]
+)
+def test_valid_repository_index(doc):
+    """Test correct validation for valid repository index documents."""
+    validate(doc)
diff --git a/tests/store/conftest.py b/tests/store/conftest.py
index 5ae6ce4..e11c95e 100644
--- a/tests/store/conftest.py
+++ b/tests/store/conftest.py
@@ -9,14 +9,17 @@
 
 import pytest
 
-from refdata.repo import RepositoryManager, download_index
-from refdata.store import LocalStore
+from refdata.repo.loader import UrlLoader
+from refdata.store.base import LocalStore
+from refdata.version import __version__
 
 
 @pytest.fixture
 def store(mock_response, tmpdir):
     return LocalStore(
+        package_name='refdata_test',
+        package_version=__version__,
         basedir=tmpdir,
-        repo=RepositoryManager(doc=download_index('index.json')),
+        loader=UrlLoader(url='index.json'),
         auto_download=False
     )
diff --git a/tests/store/test_dataset_handle.py b/tests/store/test_dataset_handle.py
deleted file mode 100644
index ab6e399..0000000
--- a/tests/store/test_dataset_handle.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# This file is part of the Reference Data Repository (refdata).
-#
-# Copyright (C) 2021 New York University.
-#
-# refdata is free software; you can redistribute it and/or modify it under the
-# terms of the MIT License; see LICENSE file for more details.
-
-"""Unit tests for the dataset handle."""
-
-import pytest
-
-from refdata.store.dataset import DatasetHandle
-
-import refdata.error as err
-
-
-def test_format_error():
-    """Ensure that the proper error is raised when initializing a dataset
-    handle with an invalid format identifier.
-    """
-    doc = {
-        'id': '0000',
-        'url': 'countries.json',
-        "checksum": "889c264f2ac4629b4998aa8b8b1d4de45890c39c10e24cfd8a017e9924e805c7",
-        "schema": [{"id": "name"}, {"id": "alpha2Code"}],
-        "format": {
-            "type": "unknown",
-            "parameters": {}
-        }
-    }
-    with pytest.raises(err.InvalidFormatError):
-        DatasetHandle(doc=doc, datafile='/dev/null')
-
-
-def test_load_distinct(store):
-    """Test downloading the U.S. cities test dataset and getting a list of
-    distinct state names via the local data store.
-    """
-    values = store.distinct(key='cities', columns='state', auto_download=True)
-    assert len(values) == 1
-    assert 'Alabama' in values
-    values = store.distinct(key='cities', auto_download=False)
-    assert len(values) == 7
-
-
-def test_load_data_frame(store):
-    """Test downloading and loading the U.S. cities test dataset via the local
-    data store.
-    """
-    df = store.load(key='cities', auto_download=True)
-    assert df.shape == (7, 2)
-    assert list(df.columns) == ['city', 'state']
-    df = store.load(key='cities', columns=['city'])
-    assert df.shape == (7, 1)
-    assert list(df.columns) == ['city']
-
-
-def test_load_mapping(store):
-    """Test downloading the U.S. cities test dataset and getting a mapping of
-    values for columns from the downloaded dataset.
-    """
-    mapping = store.mapping(key='cities', lhs='city', rhs='state', auto_download=True)
-    assert len(mapping) == 7
-    assert mapping['Troy'] == 'Alabama'
-    values = store.mapping(key='cities', lhs='city', rhs=['city'])
-    assert len(values) == 0
-    values = store.mapping(key='cities', lhs='city', rhs=['city'], ignore_equal=False)
-    assert len(values) == 7
diff --git a/tests/store/test_dataset_load.py b/tests/store/test_dataset_load.py
new file mode 100644
index 0000000..92764b8
--- /dev/null
+++ b/tests/store/test_dataset_load.py
@@ -0,0 +1,63 @@
+# This file is part of the Reference Data Repository (refdata).
+#
+# Copyright (C) 2021 New York University.
+#
+# refdata is free software; you can redistribute it and/or modify it under the
+# terms of the MIT License; see LICENSE file for more details.
+
+"""Unit tests for creating different data objects for downloaded datasets."""
+
+
+def test_load_distinct(store):
+    """Test downloading the U.S. cities test dataset and getting a list of
+    distinct state names via the local data store.
+    """
+    values = store.load(key='cities', auto_download=True).distinct(columns='state')
+    assert len(values) == 1
+    assert 'Alabama' in values
+    values = store.load(key='cities').distinct(columns='state', transformer=str.lower)
+    assert len(values) == 1
+    assert 'alabama' in values
+    values = store.load(key='cities').distinct()
+    assert len(values) == 7
+
+
+def test_load_data_frame(store):
+    """Test downloading and loading the U.S. cities test dataset via the local
+    data store.
+    """
+    df = store.load(key='cities', auto_download=True).df()
+    assert df.shape == (7, 2)
+    assert list(df.columns) == ['city', 'state']
+    df = store.load(key='cities').df(columns=['city'])
+    assert df.shape == (7, 1)
+    assert list(df.columns) == ['city']
+
+
+def test_load_mapping(store):
+    """Test downloading the U.S. cities test dataset and getting a mapping of
+    values for columns from the downloaded dataset.
+    """
+    mapping = store.load(key='cities', auto_download=True).mapping(lhs='city', rhs='state')
+    assert len(mapping) == 7
+    assert mapping['Troy'] == 'Alabama'
+    dataset = store.load(key='cities')
+    mapping = dataset.mapping(lhs='city', rhs=['city'])
+    assert len(mapping) == 0
+    mapping = dataset.mapping(lhs='city', rhs=['city'], ignore_equal=False)
+    assert len(mapping) == 7
+    mapping = dataset.mapping(lhs='city', rhs='state', transformer=str.lower)
+    assert len(mapping) == 7
+    assert mapping['troy'] == 'alabama'
+    mapping = dataset.mapping(lhs='city', rhs='state', transformer=(str.lower, str.upper))
+    assert len(mapping) == 7
+    assert mapping['troy'] == 'ALABAMA'
+
+
+def test_read_dataset(store):
+    """Test reading the content of a downloaded dataset file."""
+    with store.open(key='cities', auto_download=True) as f:
+        linecount = 0
+        for line in f:
+            linecount += 1
+    assert linecount == 8
diff --git a/tests/store/test_local_store.py b/tests/store/test_local_store.py
index 024877e..cda7da3 100644
--- a/tests/store/test_local_store.py
+++ b/tests/store/test_local_store.py
@@ -7,26 +7,27 @@
 
 """Unit tests for the local datastore."""
 
+import json
 import os
 import pytest
 
 from refdata.db import Dataset
-from refdata.store import LocalStore, download_file
+from refdata.store import LocalStore, RefStore, download_file
+from refdata.version import __version__
 
+import refdata
 import refdata.error as err
 
 
 def test_download_dataset(store):
     """Test downloading datasets to the local store."""
-    dataset_id, descriptor = store.download(key='cities')
-    assert dataset_id is not None
-    assert descriptor['id'] == 'cities'
-    assert os.path.isfile(store._datafile(dataset_id))
+    dataset = store.download(key='cities')
+    assert dataset.identifier == 'cities'
+    assert os.path.isfile(dataset.datafile)
     # No issue downloading the datset again.
-    dataset_id, descriptor = store.download(key='cities')
-    assert dataset_id is not None
-    assert descriptor['id'] == 'cities'
-    assert os.path.isfile(store._datafile(dataset_id))
+    dataset = store.download(key='cities')
+    assert dataset.identifier == 'cities'
+    assert os.path.isfile(dataset.datafile)
     # Error when downloading unkown file.
     with pytest.raises(err.UnknownDatasetError):
         store.download(key='unknown')
@@ -53,27 +54,58 @@ def test_listing_dataset_in_local_store(store):
     assert 'countries' in datasets
 
 
+def test_load_dataset(store):
+    """Test opening a downloaded dataset."""
+    store.download(key='cities')
+    assert store.load('cities').identifier == 'cities'
+    # Error when opening a dataset that has not been downloaded and is not
+    # downloaded automatically.
+    with pytest.raises(err.NotDownloadedError):
+        store.load('countries')
+    with pytest.raises(err.NotDownloadedError):
+        store.load('countries', auto_download=False)
+    # The dataset can be opened if the auto_download flag is True.
+    assert store.load('countries', auto_download=True).identifier == 'countries'
+
+
 def test_local_store_init(tmpdir):
     """Test different scenarios for database creation when initializing the
     local store.
     """
     # First without connection url and no existing database.
     basedir = os.path.join(tmpdir, 'test')
-    store = LocalStore(basedir=basedir)
+    store = LocalStore(package_name='test', package_version='test.1', basedir=basedir)
+    assert store.package_name == 'test'
+    assert store.package_version == 'test.1'
     assert os.path.join(basedir, 'refdata.db')
     # A seocond call should not re-create the database. To validate this we
     # create a new dataset and ensure that after re-creating the store that
     # the database is not empty.
     with store.db.session() as session:
-        session.add(Dataset(key='my_key', descriptor={'id': 'my_key'}))
-    store = LocalStore(basedir=basedir)
+        session.add(
+            Dataset(
+                key='my_key',
+                descriptor={'id': 'my_key'},
+                package_name='test',
+                package_version='1'
+            )
+        )
+    store = RefStore(basedir=basedir)
+    assert store.package_name == refdata.__name__.split('.')[0]
+    assert store.package_version == __version__
     with store.db.session() as session:
         datasets = session.query(Dataset).all()
         assert len(datasets) == 1
+        ds = datasets[0]
+        assert ds.key == 'my_key'
+        assert ds.descriptor == {'id': 'my_key'}
+        assert ds.package_name == 'test'
+        assert ds.package_version == '1'
+        assert ds.created_at is not None
     # Create the store with a connection string that points to the created
     # database.
     dbfile = os.path.join(basedir, 'refdata.db')
-    store = LocalStore(basedir=basedir, connect_url='sqlite:///{}'.format(dbfile))
+    store = RefStore(basedir=basedir, connect_url='sqlite:///{}'.format(dbfile))
     with store.db.session() as session:
         datasets = session.query(Dataset).all()
         assert len(datasets) == 1
@@ -84,7 +116,7 @@ def test_local_store_repo_manager(mock_response, tmpdir):
     """
     # First without connection url and no existing database.
     basedir = os.path.join(tmpdir, 'test')
-    store = LocalStore(basedir=basedir)
+    store = RefStore(basedir=basedir)
     # Ensure that the default test repository was created.
     assert len(store.repository().find()) == 3
     # Hack to ensure that the manager is created only once.
@@ -92,18 +124,11 @@ def test_local_store_repo_manager(mock_response, tmpdir):
     assert len(store.repository().find()) == 0
 
 
-def test_open_dataset(store):
-    """Test opening a downloaded dataset."""
-    store.download(key='cities')
-    assert store.open('cities').identifier == 'cities'
-    # Error when opening a dataset that has not been downloaded and is not
-    # downloaded automatically.
-    with pytest.raises(err.NotDownloadedError):
-        store.open('countries')
-    with pytest.raises(err.NotDownloadedError):
-        store.open('countries', auto_download=False)
-    # The dataset can be opened if the auto_download flag is True.
-    assert store.open('countries', auto_download=True).identifier == 'countries'
+def test_read_dataset(store):
+    """Test reading a dataset using the open() method."""
+    with store.open(key='countries', auto_download=True) as f:
+        doc = json.load(f)
+    assert len(doc) == 2
 
 
 def test_remove_dataset(store):
diff --git a/tests/test_data_query.py b/tests/test_data_query.py
index 577573b..c37a73e 100644
--- a/tests/test_data_query.py
+++ b/tests/test_data_query.py
@@ -10,7 +10,8 @@
 import pytest
 
 from refdata.base import DatasetDescriptor
-from refdata.repo import RepositoryManager
+from refdata.repo.loader import UrlLoader
+from refdata.repo.manager import RepositoryManager
 
 
 """Dataset for single dataset match tests."""
@@ -66,6 +67,6 @@ def test_dataset_query(query, result):
 )
 def test_repository_query(query, result, mock_response):
     """Test querying the text repository."""
-    repo = RepositoryManager()
+    repo = RepositoryManager(doc=UrlLoader().load())
     datasets = [ds.identifier for ds in repo.find(filter=query)]
     assert datasets == result
diff --git a/tests/test_database.py b/tests/test_database.py
index 450685e..6bc33c2 100644
--- a/tests/test_database.py
+++ b/tests/test_database.py
@@ -31,13 +31,34 @@ def test_database_session():
     db.init()
     # -- Tests ----------------------------------------------------------------
     with db.session() as session:
-        session.add(Dataset(key='my_key', descriptor={'id': 'my_key'}))
+        session.add(
+            Dataset(
+                key='my_key',
+                descriptor={'id': 'my_key'},
+                package_name='test',
+                package_version='1'
+            )
+        )
     with db.session() as session:
-        session.add(Dataset(key='a_key', descriptor={'id': 'a_key'}))
+        session.add(
+            Dataset(
+                key='a_key',
+                descriptor={'id': 'a_key'},
+                package_name='test',
+                package_version='1'
+            )
+        )
     with db.session() as session:
         datasets = session.query(Dataset).all()
         assert len(datasets) == 2
     # Error when creating an entry with duplicate key.
     with pytest.raises(IntegrityError):
         with db.session() as session:
-            session.add(Dataset(key='my_key', descriptor={'id': 'my_key'}))
+            session.add(
+                Dataset(
+                    key='my_key',
+                    descriptor={'id': 'my_key'},
+                    package_name='test',
+                    package_version='1'
+                )
+            )
diff --git a/tests/test_version.py b/tests/test_version.py
deleted file mode 100644
index 3cdbf8f..0000000
--- a/tests/test_version.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# This file is part of the Reference Data Repository (refdata).
-#
-# Copyright (C) 2021 New York University.
-#
-# refdata is free software; you can redistribute it and/or modify it under the
-# terms of the MIT License; see LICENSE file for more details.
-
-"""Unit tests for version information."""
-
-from refdata.version import __version__
-
-
-def test_package_version():
-    """Test accessing package version information (for completion)."""
-    assert __version__ is not None