diff --git a/audits/.gitkeep b/audits/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/example.env b/example.env index e6984d0..8e80a10 100644 --- a/example.env +++ b/example.env @@ -1,5 +1,10 @@ SECRETS_YAML='mysay: - - {url: "...", "username": "...", "password": "..."} + - url: "https://.../api/v2" + username: "..." + password: "..." + agencylookups: + - { agency: "...", parent-id: ###, project-tag-list: "..." } + - { agency: "...", parent-id: ###, project-tag-list: "..." } citizenspace: - {url: "..."}' MYSQL_PWD='...' diff --git a/justfile b/justfile index 410df93..08cf205 100644 --- a/justfile +++ b/justfile @@ -22,7 +22,8 @@ mysql-svc: minikube # SQLMesh ui for local dev dev: mysql-svc - @just mysql sqlmesh -e exit || just mysql -e 'create database sqlmesh; SET GLOBAL pxc_strict_mode=PERMISSIVE;' + @just mysql sqlmesh -e exit || just mysql -e 'create database sqlmesh;' + @just mysql -e 'SET GLOBAL pxc_strict_mode=PERMISSIVE;' uv run sqlmesh ui # Build and test container (run dev first to make sure db exists) @@ -35,7 +36,6 @@ test: mysql-svc -e MYSQL_DUCKDB_PATH='{{env('MYSQL_DUCKDB_PATH')}}' \ harvest-consultations \ sqlmesh plan --auto-apply --run --verbose - trivy image harvest-consultations # skaffold configured with env and minikube [positional-arguments] diff --git a/macros/.gitkeep b/macros/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/models/citizenspace_view.sql b/models/citizenspace_view.sql index bee5b75..55e59b3 100644 --- a/models/citizenspace_view.sql +++ b/models/citizenspace_view.sql @@ -9,6 +9,7 @@ SELECT overview AS description, status, department AS agency, + NULL AS tags, 'Western Australia' AS region, url, startdate::DATE AS publishdate, diff --git a/models/consultations_table.sql b/models/consultations_table.sql index e0368cf..9e600d7 100644 --- a/models/consultations_table.sql +++ b/models/consultations_table.sql @@ -3,8 +3,9 @@ MODEL ( kind FULL ); -SELECT - * -FROM citizenspace.view; +SELECT * FROM citizenspace.view +UNION ALL BY NAME +SELECT * FROM mysay.view; + CREATE OR REPLACE TABLE mysqldb.sqlmesh.consultations AS SELECT * FROM consultations.table; diff --git a/models/mysay_api.py b/models/mysay_api.py index e61ed47..ffc4af8 100644 --- a/models/mysay_api.py +++ b/models/mysay_api.py @@ -10,20 +10,47 @@ def load(config: dict) -> pd.DataFrame: # Function to use a config to return a dataframe url, username, password = config["url"], config["username"], config["password"] + result = [] try: auth_token = requests.post(f"{url}/tokens", json={"data": {"attributes": { "login": username, "password": password}}}).json()['data']['attributes']['token'] result = requests.get(f"{url}/projects", params={"per_page": 10000}, headers={"Authorization": f"Bearer {auth_token}"}).json()["data"] + for row in result: + row.update(row.pop("attributes")) + row["url"] = row["links"].pop("self") + row["agency"] = None + # Please refer to the `example.env` file in this repo to see an example config of agency mapping to attributes + for lookup in config.get("agencylookups", []): + # if a higher entry matched, break to avoid clobbering + if row["agency"] is not None: + break + agency = lookup["agency"] + for key, value in lookup.items(): + # check each attribute if there is a substring match + if str(row[key]).lower().find(str(value).lower()) > -1: + # on match set agency and break + row["agency"] = agency + break + except Exception as e: print(e) - result = [] return pd.DataFrame(result) @model( "mysay.api", columns={ - "id": "text", "type": "text", "attributes": "json", - "relationships": "json", "links": "json" + "state": "text", + "published-at": "text", + "type": "text", + "name": "text", + "url": "text", + "description": "text", + "visibility-mode": "text", + "image-url": "text", + "agency": "text", + "project-tag-list": "text[]", + "view-count": "text", + "id": "text" } ) def execute(context: ExecutionContext, start: datetime, end: datetime, execution_time: datetime, **kwargs: Any) -> pd.DataFrame: diff --git a/models/mysay_view.sql b/models/mysay_view.sql index 0851227..51bc150 100644 --- a/models/mysay_view.sql +++ b/models/mysay_view.sql @@ -4,9 +4,14 @@ MODEL ( ); SELECT - id AS ConsultationIdentifier, - CAST(attributes ->> '$.name' AS TEXT) AS ConsultationTitle, - CAST(links ->> '$.self' AS TEXT) AS ConsultationUrl, - CAST(attributes ->> '$.description' AS TEXT) AS ConsultationShortDescription, - 'Current' AS syncstate + 'mysay' AS source, + name, + description, + state AS status, + agency, + ARRAY_TO_STRING("project-tag-list", ',') AS tags, + 'Western Australia' AS region, + url, + "published-at"::DATE AS publishdate, + NULL::DATE AS expirydate FROM mysay.api \ No newline at end of file diff --git a/seeds/.gitkeep b/seeds/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/.gitkeep b/tests/.gitkeep deleted file mode 100644 index e69de29..0000000