Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support for datapackage object #23

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 38 additions & 25 deletions pandas_datapackage_reader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,28 @@
import requests
import pandas as pd

import logging

try:
import frictionless
except ImportError:
logging.warning("Frictionless-py package missing: cannot import from datapackage object.")

from ._version import get_versions

__version__ = get_versions()["version"]
del get_versions


def read_datapackage(url_or_path, resource_name=None):
def read_datapackage(dp, resource_name=None):
"""
Read tabular CSV files from Data Packages into DataFrames.

Parameters:
-----------
path_or_url: string
dp: string, PathLike or DataPackage object
Local path or URL of a Data Package. For GitHub URLs the repository can
be used.
be used. You can also use a frictionless.package.Package object.
resource_name: string or list of strings
Name or names of resources to read. Lists of strings are used to
request multiple resources.
Expand All @@ -42,30 +49,36 @@ def read_datapackage(url_or_path, resource_name=None):
argument for more information on when a Dict of DataFrames is returned.

"""
url_or_path = str(url_or_path) # Allows using PosixPath
if url_or_path.startswith("https://github.com/") and not url_or_path.endswith(
"/datapackage.json"
):
username_project = url_or_path.split("https://github.com/")[1]
if username_project.endswith("/"):
username_project = username_project[:-1]
url_or_path = (
"https://raw.githubusercontent.com/"
+ username_project
+ "/master/datapackage.json"
)
elif not url_or_path.endswith("datapackage.json"):
url_or_path = os.path.join(url_or_path, "datapackage.json")

if url_or_path.startswith("http"):
r = requests.get(url_or_path)
if r.status_code == 200:
metadata = json.loads(r.text)
if isinstance(dp, (str, os.PathLike)):
url_or_path = str(dp) # Allows using PosixPath
if url_or_path.startswith("https://github.com/") and not url_or_path.endswith(
"/datapackage.json"
):
username_project = url_or_path.split("https://github.com/")[1]
if username_project.endswith("/"):
username_project = username_project[:-1]
url_or_path = (
"https://raw.githubusercontent.com/"
+ username_project
+ "/master/datapackage.json"
)
elif not url_or_path.endswith("datapackage.json"):
url_or_path = os.path.join(url_or_path, "datapackage.json")

if url_or_path.startswith("http"):
r = requests.get(url_or_path)
if r.status_code == 200:
metadata = json.loads(r.text)
else:
r.raise_for_status()
else:
r.raise_for_status()
with open(url_or_path, "r") as f:
metadata = json.load(f)
elif isinstance(dp, frictionless.package.Package):
url_or_path = "datapackage.json"
metadata = dp.to_dict()
else:
with open(url_or_path, "r") as f:
metadata = json.load(f)
logging.error("Format not recognized. Parameter dp accepts string, PathLike or DataPackage object.")

if type(resource_name) is str:
resource_name = [resource_name]
Expand Down