quiltdata · drernie · Aug 13, 2024 · Aug 12, 2024 · Aug 12, 2024 · Aug 12, 2024
diff --git a/.coverage b/.coverage
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,11 @@
+[flake8]
+ignore = E203, E266, W503, BLK100, W291, I004
+max-line-length = 120
+max-complexity = 15
+
+[isort]
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+use_parentheses = true
+line_length = 120
diff --git a/.github/workflows/mega-linter.yml b/.github/workflows/mega-linter.yml
@@ -0,0 +1,56 @@
+---
+# MegaLinter GitHub Action configuration file
+# More info at https://megalinter.github.io
+name: MegaLinter
+on:  # yamllint disable-line rule:truthy
+  push: # Comment this line to trigger action only on pull-requests (not recommended if you don't pay for GH Actions)
+permissions: read-all
+env: # Comment env block if you do not want to apply fixes
+  APPLY_FIXES: all # When active, APPLY_FIXES must also be defined as environment variable (in github/workflows/mega-linter.yml or other CI tool)
+  DISABLE_LINTERS: SPELL_CSPELL,COPYPASTE_JSCPD,MAKEFILE_CHECKMAKE,PYTHON_BANDIT,PYTHON_MYPY,PYTHON_PYRIGHT,PYTHON_PYLINT,REPOSITORY_GRYPE,REPOSITORY_SECRETLINT,REPOSITORY_TRIVY,REPOSITORY_TRUFFLEHOG,REPOSITORY_CHECKOV
+  MARKDOWN_MARKDOWNLINT_FILTER_REGEX_EXCLUDE: "tests/example.*ME\\.md" # Exclude example markdown files from markdownlint
+concurrency:
+  group: ${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: MegaLinter
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write
+      issues: write
+      pull-requests: write
+    steps:
+      # Git Checkout
+      - name: Checkout Code
+        uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          fetch-depth: 0 # If you use VALIDATE_ALL_CODEBASE = true, you can remove this line to improve performances
+
+      # MegaLinter
+      - name: MegaLinter
+        id: ml
+        # You can override MegaLinter flavor used to have faster performances
+        # More info at https://megalinter.github.io/flavors/
+        uses: oxsecurity/megalinter/flavors/[email protected]
+        env:
+          # All available variables are described in documentation
+          # https://megalinter.github.io/configuration/
+          VALIDATE_ALL_CODEBASE: true  
+          # VALIDATE_ALL_CODEBASE: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} # Validates all source when push on main, else just the git diff with main. Override with true if you always want to lint all sources
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          # ADD YOUR CUSTOM ENV VARIABLES HERE OR DEFINE THEM IN A FILE .mega-linter.yml AT THE ROOT OF YOUR REPOSITORY
+          DISABLE: COPYPASTE,SPELL # Uncomment to disable copy-paste and spell checks
+
+      # Upload MegaLinter artifacts
+      - name: Archive production artifacts
+        uses: actions/upload-artifact@v4
+        if: true
+        with:
+          name: MegaLinter reports
+          path: |
+            megalinter-reports
+            mega-linter.log
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -0,0 +1,52 @@
+---
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+name: Python package
+on:  # yamllint disable-line rule:truthy
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+permissions: read-all
+
+jobs:
+  build:
+    permissions:
+      contents: read
+      id-token: write
+      issues: write
+      pull-requests: write
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.12"]
+        poetry-version: ["1.8.3"]
+        os: [ubuntu-latest, macos-latest] # , windows-latest
+    runs-on: ${{ matrix.os }}
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Run poetry image
+      uses: abatilo/actions-poetry@v2
+      with:
+        poetry-version: ${{ matrix.poetry-version }}
+    - name: Install dependencies
+      run: |
+        poetry --version
+        poetry install
+    - name: Test with pytest
+      run: |
+        make test TEST_OS=${{ matrix.os }}
+    - name: Get Coverage Report
+      uses: orgoro/[email protected]
+      with:
+          coverageFile: coverage.xml
+          token: ${{ secrets.GITHUB_TOKEN }}
+          thresholdAll: 0.0
+          thresholdNew: 0.0
+          thresholdModified: 0.0
+      if: github.event_name == 'pull_request'
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,4 @@ dist/
 
 # Environment info
 .env
+athena_federation/.DS_Store
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -3,5 +3,6 @@
     "python.pythonPath": ".venv/bin/python",
     "python.analysis.extraPaths": [
         "src.*"
-    ]
+    ],
+    "python.analysis.typeCheckingMode": "basic"
 }
diff --git a/Dockerfile b/Dockerfile
@@ -4,21 +4,22 @@ FROM python:3.8-slim AS build
 WORKDIR /app
 
 # Get ready to build
-RUN pip install build
+RUN pip install --no-cache-dir poetry==1.8.3
 
 # Now copy the app over and build a wheel
-COPY src /app/src
-COPY pyproject.toml setup.cfg /app/
-RUN python -m build
+COPY src /app/src/
+COPY pyproject.toml /app/
+RUN poetry install
 
 ## Now use the compiled wheel in our lambda function
-FROM amazon/aws-lambda-python:3.8.2021.12.09.15 AS lambda
+FROM amazon/aws-lambda-python:3.12.0 AS lambda
 
 ENV TARGET_BUCKET=replace_me
 
-COPY --from=build /app/dist/unoffical_athena_federation_sdk-*-py3-none-any.whl /
-RUN pip install /unoffical_athena_federation_sdk-*-py3-none-any.whl
+COPY --from=build /app/dist/athena_federation-*-py3-none-any.whl /
+RUN pip install --no-cache-dir /athena_federation-*-py3-none-any.whl
 
+WORKDIR /app
 COPY example/ ./
 RUN ls ./
 

diff --git a/Makefile b/Makefile
@@ -1,6 +1,32 @@
-build:
-	python3 -m build
-upload:
-	python3 -m twine upload dist/*
+.PHONY : help install update test lint clean build publish all
 
-.PHONY: build upload
+all: test
+
+# Install project dependencies
+install:
+	poetry install
+
+# Update project dependencies
+update:
+	poetry update
+
+# Run project tests
+test:
+	poetry run pytest --cov-report xml --cov=athena_federation
+
+# Lint code using flake8
+lint:
+	black .
+	flake8
+
+# Clean up generated files
+clean:
+	poetry run rm -rf dist build
+
+# Build project distribution
+build: lint
+	poetry build
+
+# Publish project to PyPI
+publish: build
+	poetry publish
diff --git a/README.md b/README.md
@@ -15,13 +15,14 @@ You can see an example implementation that [queries Google Sheets using Athena](
 - Partitions are not supported, so Athena will not parallelize the query using partitions.
 
 ## Example Implementations
+
 - [Athena data source connector for Minio](https://github.com/Proximie/athena-connector-for-minio/)
 
 ## Local Development
 
 - Ensure you've got the `build` module install and SDK dependencies.
 
-```
+```shell
 pip install build
 pip install -r requirements.txt
 ```
@@ -32,12 +33,12 @@ pip install -r requirements.txt
 python -m build
 ```
 
-This will create a file in `dist/`: `dist/unoffical_athena_federation_sdk-0.0.0-py3-none-any.whl`
+This will create a file in `dist/`: `dist/athena_federation-0.1.0-py3-none-any.whl`
 
 Copy that file to your example repo and you can include it in your `requirements.txt` like so:
 
-```
-unoffical-athena-federation-sdk @ file:///unoffical_athena_federation_sdk-0.0.0-py3-none-any.whl
+```shell
+unoffical-athena-federation-sdk @ file:///athena_federation-0.1.0-py3-none-any.whl
 ```
 
 ## Validating your connector
@@ -75,7 +76,7 @@ curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d
 
 💁 _Please note these are manual instructions until a [serverless application](https://aws.amazon.com/serverless/serverlessrepo/) can be built._
 
-0. First, let's define some variables we need throughout.
+1. First, let's define some variables we need throughout.
 
 ```shell
 export SPILL_BUCKET=<BUCKET_NAME>
@@ -90,21 +91,21 @@ export IMAGE_TAG=v0.0.1
 aws s3 mb ${SPILL_BUCKET}
 ```
 
-2. Create an ECR repository for this image
+1. Create an ECR repository for this image
 
 ```shell
 aws ecr create-repository --repository-name athena_example --image-scanning-configuration scanOnPush=true
 ```
 
-3. Push tag the image with the repo name and push it up
+1. Push tag the image with the repo name and push it up
 
 ```shell
 docker tag local/athena-python-example ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/athena_example:${IMAGE_TAG}
 aws ecr get-login-password | docker login --username AWS --password-stdin ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com
 docker push ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/athena_example:${IMAGE_TAG}
 ```
 
-4. Create an IAM role that will allow your Lambda function to execute
+1. Create an IAM role that will allow your Lambda function to execute
 
 _Note the `Arn` of the role that's returned_
 
@@ -117,7 +118,7 @@ aws iam attach-role-policy \
     --policy-arn arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole
 ```
 
-5. Grant the IAM role access to your S3 bucket
+1. Grant the IAM role access to your S3 bucket
 
 ```shell
 aws iam create-policy --policy-name athena-example-s3-access --policy-document '{
@@ -144,8 +145,7 @@ aws iam attach-role-policy \
     --policy-arn arn:aws:iam::${AWS_ACCOUNT_ID}:policy/athena-example-s3-access
 ```
 
-
-6. Now create your function pointing to the created repository image
+1. Now create your function pointing to the created repository image
 
 ```shell
 aws lambda create-function \
@@ -158,15 +158,11 @@ aws lambda create-function \
     --package-type Image
 ```
 
-## Connect with Athena!
+## Connect with Athena
 
 1. Choose "Data sources" on the top navigation bar in the Athena console and then click "Connect data source"
 
-![](docs/athena_connect.png)
-
-2. Choose the Lambda function you just created and click `Connect`!
-
-![](docs/athena_connect_lambda.png)
+1. Choose the Lambda function you just created and click `Connect`!
 
 ## Updating the Lambda function
 

diff --git a/athena_federation/__init__.py b/athena_federation/__init__.py
@@ -0,0 +1,10 @@
+"""Version number of our project"""
+
+__version__ = "0.0.3"
+
+# Import the SDK
+from .athena_data_source import AthenaDataSource  # noqa: F401
+from .lambda_handler import AthenaLambdaHandler  # noqa: F401
+from .models import *  # noqa: F401,F403
+from .sdk import AthenaFederationSDK  # noqa: F401
+from .utils import AthenaSDKUtils  # noqa: F401
diff --git a/src/athena/federation/athena_data_source.py → athena_federation/athena_data_source.py b/src/athena/federation/athena_data_source.py → athena_federation/athena_data_source.py
@@ -1,8 +1,9 @@
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Mapping, Union, Generator
+from typing import Any, Dict, Generator, List, Mapping, Union
 
 import pyarrow as pa
 
+
 class AthenaDataSource(ABC):
     """
     AthenaDataCatalog is a class that makes it easy to build a custom data connector for Athena using Python.
@@ -11,14 +12,15 @@ class AthenaDataSource(ABC):
     Once defined, it is then passed to `AthenaLambdaHandler`. That class handles the
     majority of encoding your responses in the format necessary for the Athena SDK.
     """
+
     def __init__(self) -> None:
         self._data_source_type = "athena_python_sdk"
 
     @property
     def data_source_type(self):
         """Get the data source type. Only used for PingRequest and debugging."""
         return self._data_source_type
-    
+
     @abstractmethod
     def databases(self) -> List[str]:
         """
@@ -46,7 +48,7 @@ def columns(self, database_name: str, table_name: str) -> List[str]:
         If you want to be more specific, override the `table_schema` method instead.
         """
         return []
-    
+
     @abstractmethod
     def schema(self, database_name: str, table_name: str) -> pa.Schema:
         """
@@ -58,7 +60,7 @@ def schema(self, database_name: str, table_name: str) -> pa.Schema:
         return pa.schema(
             [(col, pa.string()) for col in self.columns(database_name, table_name)]
         )
-    
+
     def splits(self, database_name: str, table_name: str) -> List[Dict]:
         """
         Return a list of splits for the given table.
@@ -71,9 +73,11 @@ def splits(self, database_name: str, table_name: str) -> List[Dict]:
         you can use the default implementation, which generates a single split.
         """
         return []
-    
+
     @abstractmethod
-    def records(self, database_name: str, table_name: str, split: Mapping[str,str]) -> Union[Dict[str,List[Any]], Generator[Dict[str,List[Any]],None,None]]:
+    def records(
+        self, database_name: str, table_name: str, split: Mapping[str, str]
+    ) -> Union[Dict[str, List[Any]], Generator[Dict[str, List[Any]], None, None]]:
         """
         Return a dictionary of records for the given table and split.
 

diff --git a/src/athena/federation/batch_writer.py → athena_federation/batch_writer.py b/src/athena/federation/batch_writer.py → athena_federation/batch_writer.py
@@ -1,4 +1,4 @@
-from typing import Dict, List
+from typing import Dict
 
 import pyarrow as pa
 from smart_open import open
@@ -57,4 +57,4 @@ def all_records(self):
         record_batches = [b for b in reader]
         one_chunk_table = pa.Table.from_batches(record_batches).combine_chunks()
         batches = one_chunk_table.to_batches(max_chunksize=None)
-        return batches[0]
+        return batches[0]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,3 +8,4 @@ dist/

		# Environment info
		.env
		athena_federation/.DS_Store