Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sharepoint list connector #795

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ jobs:
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY
black .
git fetch --all
git checkout $GITHUB_HEAD_REF
git commit -am "🎨 Format Python code with Black"
git push
Expand Down
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
- Added `validate_df` task to task_utils.
- Added `SharepointList` source class.
- Added `SharepointListToDF` task class.
- Added `SharepointListToADLS` flow class.
- Added tests for `SharepointList`.
- Added `get_nested_dict` to untils.py.
- Added `validate_df` task to `SharepointToADLS` class.

### Fixed

### Changed
Expand Down Expand Up @@ -618,4 +625,4 @@ specified in the `SUPERMETRICS_DEFAULT_USER` secret
- Moved from poetry to pip

### Fixed
- Fix `AzureBlobStorage`'s `to_storage()` method is missing the final upload blob part
- Fix `AzureBlobStorage`'s `to_storage()` method is missing the final upload blob part
9 changes: 4 additions & 5 deletions tests/integration/flows/test_bigquery_to_adls.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import os
from unittest import mock

import pandas as pd
import pendulum
import pytest
from unittest import mock
import pandas as pd

from prefect.tasks.secrets import PrefectSecret
from viadot.flows import BigQueryToADLS
from viadot.tasks import AzureDataLakeRemove

from viadot.exceptions import ValidationError
from viadot.flows import BigQueryToADLS
from viadot.tasks import AzureDataLakeRemove

ADLS_DIR_PATH = "raw/tests/"
ADLS_FILE_NAME = str(pendulum.now("utc")) + ".parquet"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from viadot.config import local_config
from viadot.flows import CloudForCustomersReportToADLS
from viadot.exceptions import ValidationError
from viadot.flows import CloudForCustomersReportToADLS


def test_cloud_for_customers_report_to_adls():
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/flows/test_customer_gauge_to_adls.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import pandas as pd
import pytest

from viadot.flows import CustomerGaugeToADLS
from viadot.exceptions import ValidationError
from viadot.flows import CustomerGaugeToADLS

DATA = {
"user_name": ["Jane", "Bob"],
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/flows/test_hubspot_to_adls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import pandas as pd
import pytest

from viadot.flows import HubspotToADLS
from viadot.exceptions import ValidationError
from viadot.flows import HubspotToADLS

DATA = {
"id": {"0": "820306930"},
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/flows/test_mediatool_to_adls.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import pandas as pd
import pytest

from viadot.flows import MediatoolToADLS
from viadot.exceptions import ValidationError
from viadot.flows import MediatoolToADLS

DATA = {"country": ["DK", "DE"], "sales": [3, 4]}
ADLS_FILE_NAME = "test_mediatool.parquet"
Expand Down
1 change: 1 addition & 0 deletions tests/integration/flows/test_mysql_to_adls.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from unittest import mock

from viadot.flows.mysql_to_adls import MySqlToADLS

query = """SELECT * FROM `example-views`.`sales`"""
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/flows/test_salesforce_to_adls.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

from prefect.tasks.secrets import PrefectSecret

from viadot.exceptions import ValidationError
from viadot.flows import SalesforceToADLS
from viadot.tasks import AzureDataLakeRemove
from viadot.exceptions import ValidationError

ADLS_FILE_NAME = "test_salesforce.parquet"
ADLS_DIR_PATH = "raw/tests/"
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/flows/test_sap_bw_to_adls.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import pandas as pd
import pytest

from viadot.flows import SAPBWToADLS
from viadot.exceptions import ValidationError
from viadot.flows import SAPBWToADLS

DATA = {
"[0CALMONTH].[LEVEL01].[DESCRIPTION]": ["January 2023"],
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/flows/test_sap_rfc_to_adls.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from viadot.config import local_config
from viadot.exceptions import ValidationError
from viadot.flows import SAPRFCToADLS
from viadot.sources import AzureDataLake
from viadot.tasks import AzureDataLakeRemove
from viadot.exceptions import ValidationError

try:
import pyrfc
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/flows/test_supermetrics_to_adls.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import pytest
from prefect.storage import Local

from viadot.flows import SupermetricsToADLS
from viadot.exceptions import ValidationError
from viadot.flows import SupermetricsToADLS

CWD = os.getcwd()
adls_dir_path = "raw/tests/supermetrics"
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/flows/test_vidclub_to_adls.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import pandas as pd
import pytest

from viadot.flows import VidClubToADLS
from viadot.exceptions import ValidationError
from viadot.flows import VidClubToADLS

DATA = {"col1": ["aaa", "bbb", "ccc"], "col2": [11, 22, 33]}
ADLS_FILE_NAME = "test_vid_club.parquet"
Expand Down
214 changes: 212 additions & 2 deletions tests/integration/test_sharepoint.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import os
import re
from copy import deepcopy

import pandas as pd
import pytest
from prefect.tasks.secrets import PrefectSecret

from viadot.config import local_config
from viadot.exceptions import CredentialError
from viadot.sources import Sharepoint
from viadot.sources import Sharepoint, SharepointList
from viadot.task_utils import df_get_data_types_task
from viadot.tasks.sharepoint import SharepointToDF

Expand All @@ -18,7 +20,7 @@ def get_url() -> str:
Returns:
str: File URL.
"""
return local_config["SHAREPOINT"].get("url")
return local_config["SHAREPOINT"].get("file_url")


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -163,3 +165,211 @@ def test_get_data_types(file_name):
dtypes = dtypes_map.values()

assert "String" in dtypes


# testing get_connection function passing invalid credentials and raises AuthenticationContext error.
def test_get_connection():
site_url = "https://velux.sharepoint.com/"
credentials = {
"SHAREPOINT_CERT": {
"TENANT": "xxx",
"CLIENT_ID": "123",
"SCOPES": "https://velux.sharepoint.com/",
"THUMBPRINT": "xyz",
"PRIVATE_KEY": "private",
}
}

spl = SharepointList(credentials=credentials)
with pytest.raises(
AttributeError, match="'SharepointList' object has no attribute 'ctx'"
):
spl.get_connection(site_url=site_url)


@pytest.fixture(scope="session")
def sharepoint_list():
"""
Fixture for creating a Sharepoint class instance.
The class instance can be used within a test functions to interact with Sharepoint.
"""
spl = SharepointList()
yield spl


def test_valid_filters(sharepoint_list):
filters = {
"filter1": {"dtype": "int", "operator1": "<", "value1": 10},
"filter2": {"dtype": "str", "operator1": "==", "value1": "value"},
}
result = sharepoint_list.check_filters(filters)
assert result is True


def test_invalid_dtype(sharepoint_list):
filters = {
"filter1": {"dtype": "list", "operator1": ">", "value1": 10},
}
with pytest.raises(ValueError, match="dtype not allowed!"):
sharepoint_list.check_filters(filters)


def test_missing_operator1(sharepoint_list):
filters = {
"filter1": {"dtype": "int", "value1": 10},
}
with pytest.raises(ValueError, match="Operator1 is missing!"):
sharepoint_list.check_filters(filters)


def test_invalid_operator1(sharepoint_list):
filters = {
"filter1": {"dtype": "int", "operator1": "*", "value1": 10},
}
with pytest.raises(ValueError, match="Operator type not allowed!"):
sharepoint_list.check_filters(filters)


def test_missing_value1(sharepoint_list):
filters = {
"filter1": {"dtype": "int", "operator1": ">", "value1": None},
}
with pytest.raises(ValueError, match="Value for operator1 is missing!"):
sharepoint_list.check_filters(filters)


def test_missing_operators_conjuction(sharepoint_list):
filters = {
"filter1": {
"dtype": "int",
"operator1": ">",
"value1": 10,
"operator2": "<",
"value2": 20,
},
}
with pytest.raises(ValueError, match="Operators for conjuction is missing!"):
sharepoint_list.check_filters(filters)


def test_invalid_operators_conjuction(sharepoint_list):
filters = {
"filter1": {
"dtype": "int",
"operator1": ">",
"value1": 10,
"operator2": "<",
"value2": 20,
"operators_conjuction": "!",
},
}
with pytest.raises(ValueError, match="Operators for conjuction not allowed!"):
sharepoint_list.check_filters(filters)


def test_invalid_filters_conjuction(sharepoint_list):
filters = {
"filter1": {
"dtype": "int",
"operator1": ">",
"value1": 10,
"filters_conjuction": "!",
},
}
with pytest.raises(
ValueError, match="Filters operators for conjuction not allowed!"
):
sharepoint_list.check_filters(filters)


def test_valid_mapping(sharepoint_list):
filters = {
"filter1": {
"operator1": ">",
"operator2": "<=",
"operators_conjuction": "&",
"filters_conjuction": "|",
},
"filter2": {"operator1": "==", "operator2": "!=", "operators_conjuction": "|"},
}
expected_result = {
"filter1": {
"operator1": "gt",
"operator2": "le",
"operators_conjuction": "and",
"filters_conjuction": "or",
},
"filter2": {"operator1": "eq", "operator2": "ne", "operators_conjuction": "or"},
}
result = sharepoint_list.operators_mapping(deepcopy(filters))
assert result == expected_result


def test_invalid_comparison_operator(sharepoint_list):
filters = {
"filter1": {
"operator1": "*",
"operator2": "<=",
"operators_conjuction": "&",
"filters_conjuction": "|",
},
}
error_message = "This comparison operator: * is not allowed. Please read the function documentation for details!"
with pytest.raises(ValueError, match=re.escape(error_message)):
sharepoint_list.operators_mapping(deepcopy(filters))


def test_invalid_logical_operator(sharepoint_list):
filters = {
"filter1": {
"operator1": ">",
"operator2": "<=",
"operators_conjuction": "!",
"filters_conjuction": "|",
},
}
error_message = "This conjuction(logical) operator: ! is not allowed. Please read the function documentation for details!"
with pytest.raises(ValueError, match=re.escape(error_message)):
sharepoint_list.operators_mapping(deepcopy(filters))


def test_single_filter_datetime_api(sharepoint_list):
filters = {
"date_column": {"dtype": "datetime", "operator1": ">", "value1": "2023-01-01"}
}
result = sharepoint_list.make_filter_for_api(filters)
expected_result = "date_column gt datetime'2023-01-01T00:00:00' "
assert result == expected_result


def test_multiple_filters_api(sharepoint_list):
filters = {
"int_column": {
"dtype": "int",
"operator1": ">=",
"value1": 10,
"operator2": "<",
"value2": 20,
},
"str_column": {"dtype": "str", "operator1": "==", "value1": "example"},
}
result = sharepoint_list.make_filter_for_api(filters)
expected_result = "int_column ge '10'int_column lt '20'str_column eq 'example'"
assert result == expected_result


def test_single_df_filter(sharepoint_list):
filters = {"column1": {"operator1": ">", "value1": 10}}
result = sharepoint_list.make_filter_for_df(filters)
expected_result = "df.loc[(df.column1 > '10')]"
assert result == expected_result


def test_multiple_df_filters(sharepoint_list):
filters = {
"column1": {"operator1": ">", "value1": 10, "filters_conjuction": "&"},
"column2": {"operator1": "<", "value1": 20},
}
result = sharepoint_list.make_filter_for_df(filters)
expected_result = "df.loc[(df.column1 > '10')&(df.column2 < '20')]"
assert result == expected_result
2 changes: 1 addition & 1 deletion tests/unit/test_task_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
df_to_parquet,
dtypes_to_json_task,
union_dfs_task,
write_to_json,
validate_df,
write_to_json,
)


Expand Down
Loading