TechForPalestine · idris · Jan 21, 2024 · Jan 21, 2024 · Jan 21, 2024
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
@@ -0,0 +1,11 @@
+name: Lint
+on:
+  pull_request:
+    paths: ['**/*.py']
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+      - uses: psf/black@stable
diff --git a/.github/workflows/check_yaml_consistency.yml → ...ub/workflows/check_output_consistency.yml b/.github/workflows/check_yaml_consistency.yml → ...ub/workflows/check_output_consistency.yml
@@ -1,10 +1,10 @@
-name: Check YAML Consistency
+name: Check Output Consistency
 'on':
   pull_request:
     paths:
       - '**/*.yaml'
 jobs:
-  check_yaml:
+  check_output:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
@@ -17,5 +17,5 @@ jobs:
         run: pip install -r requirements.txt
       - name: Run export and check consistency
         run: |
-          python export.py
+          python scripts/export.py
           git diff --exit-code
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -32,7 +32,7 @@ jobs:
       - name: Validate YAML
         run: |
           python --version
-          python validate_yaml.py
+          python scripts/validate_yaml.py
 
   if_merged:
     if: github.event.pull_request.merged == true
@@ -49,8 +49,7 @@ jobs:
           cache: 'pip' # caching pip dependencies
       - run: |
           pip install -r requirements.txt
-          python validate_yaml.py
-          python generate_model.py
+          python scripts/validate_yaml.py
       - uses: mikeal/publish-to-github-action@master
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,9 +1,19 @@
 repos:
 -   repo: local
     hooks:
+    -   id: validate-yaml
+        name: Validate YAML against the schema
+        language: python
+        entry: python scripts/validate_yaml.py
+        pass_filenames: false
+        additional_dependencies: [pyyaml, jsonschema]
     -   id: export-yaml
         name: Export YAML to CSV and JSON
         language: python
-        entry: python export.py
-        additional_dependencies: ['pyyaml']
-
+        entry: python scripts/export.py
+        pass_filenames: false
+        additional_dependencies: [pyyaml]
+-   repo: https://github.com/psf/black
+    rev: 22.10.0
+    hooks:
+    -   id: black
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ Output formats, such as CSV and JSON are in the `generated/` directory.
 
 Schemas for the YAML data can be found in the `schemas` directory, along with descriptions for each field.
 These schemas are in [JSON Schema](https://json-schema.org/) format, but represented in YAML for simplicity.
-The `validate_yaml.py` script validates all brands and companies using the schemas.
+The `scripts/validate_yaml.py` script validates all brands and companies using the schemas.
 
 ## Useful Resources & Links
 

diff --git a/generate_model.py b/generate_model.py
diff --git a/generated/placeholder.txt b/generated/placeholder.txt
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,4 @@
+black == 23.12.1
 jsonschema == 4.20.0
+pre-commit == 3.6.0
 pyyaml == 6.0.1
diff --git a/export.py → scripts/export.py b/export.py → scripts/export.py
@@ -1,28 +1,32 @@
-
 import yaml
 import csv
 import json
 import csv
 import os
+
+
 def read_yaml(file_path):
-    with open(file_path, 'r') as file:
+    with open(file_path, "r") as file:
         data = yaml.safe_load(file)
     return data
 
+
 def clean_value(value):
     if isinstance(value, list):
-        return ', '.join(map(str, value))
+        return ", ".join(map(str, value))
     else:
         return value
+
+
 def export_to_csv(input_dir, output_csv, schema_file):
     schema = read_yaml(schema_file)
 
-    with open(output_csv, 'w', newline='') as csvfile:
-        schema_fields = list(schema['properties'].keys())
-        if 'stakeholders' in schema_fields:
+    with open(output_csv, "w", newline="") as csvfile:
+        schema_fields = list(schema["properties"].keys())
+        if "stakeholders" in schema_fields:
             # Haven't decided how to represent stakeholders in the CSV format, so just remove it for now.
-            schema_fields.remove('stakeholders')
-        fieldnames = ['id'] + schema_fields
+            schema_fields.remove("stakeholders")
+        fieldnames = ["id"] + schema_fields
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
 
         # Write header based on the schema
@@ -31,59 +35,62 @@ def export_to_csv(input_dir, output_csv, schema_file):
         for yaml_file in sorted(os.listdir(input_dir)):
             if yaml_file.endswith(".yaml"):
                 yaml_file_path = os.path.join(input_dir, yaml_file)
-                with open(yaml_file_path, 'r') as file:
+                with open(yaml_file_path, "r") as file:
                     data = yaml.safe_load(file)
 
-                    cleaned_data = {key: clean_value(data.get(key, None)) for key in fieldnames}
-                    cleaned_data['id'] = os.path.splitext(yaml_file)[0]
-
+                    cleaned_data = {
+                        key: clean_value(data.get(key, None)) for key in fieldnames
+                    }
+                    cleaned_data["id"] = os.path.splitext(yaml_file)[0]
+
                     writer.writerow(cleaned_data)
 
                     print(f"Converted {yaml_file} to CSV")
+
+
 def convert_yaml_to_json(directory_path, key):
     data = {}
-    
+
     for file_name in sorted(os.listdir(directory_path)):
         if file_name.endswith(".yaml"):
             file_path = os.path.join(directory_path, file_name)
-            with open(file_path, 'r') as yaml_file:
+            with open(file_path, "r") as yaml_file:
                 yaml_data = {}
-            
+
                 yaml_data = yaml.safe_load(yaml_file)
-
-                data[os.path.splitext(file_name)[0]] = {'id':os.path.splitext(file_name)[0], **yaml_data}
-
+
+                data[os.path.splitext(file_name)[0]] = {
+                    "id": os.path.splitext(file_name)[0],
+                    **yaml_data,
+                }
+
     return {key: data}
 
+
 def export_to_json(directory1, directory2, output_json):
-    brands_data = convert_yaml_to_json(directory1, 'brands')
-    companies_data = convert_yaml_to_json(directory2, 'companies')
-    
+    brands_data = convert_yaml_to_json(directory1, "brands")
+    companies_data = convert_yaml_to_json(directory2, "companies")
+
     combined_data = {**brands_data, **companies_data}
 
-    with open(output_json, 'w') as json_file:
+    with open(output_json, "w") as json_file:
         json.dump(combined_data, json_file, indent=2)
 
         print(f"Converted data to JSON")
 
 
 if __name__ == "__main__":
-    brands_yaml = 'data/brands'
-    companies_yaml = 'data/companies'
+    brands_yaml = "data/brands"
+    companies_yaml = "data/companies"
 
-    brands_csv_file = 'output/csv/brands.csv'
-    companies_csv_file = 'output/csv/companies.csv'
+    brands_csv_file = "output/csv/brands.csv"
+    companies_csv_file = "output/csv/companies.csv"
 
-    data_json_file = 'output/json/data.json'
+    data_json_file = "output/json/data.json"
 
-    brand_schema = 'schemas/brand_schema.yaml'
-    company_schema = 'schemas/company_schema.yaml'
+    brand_schema = "schemas/brand_schema.yaml"
+    company_schema = "schemas/company_schema.yaml"
 
     export_to_csv(brands_yaml, brands_csv_file, brand_schema)
     export_to_csv(companies_yaml, companies_csv_file, company_schema)
-    export_to_json(brands_yaml,companies_yaml,data_json_file)
-
-
-
-
-
+    export_to_json(brands_yaml, companies_yaml, data_json_file)
diff --git a/import.py → scripts/import.py b/import.py → scripts/import.py
@@ -57,18 +57,21 @@ def parent_from_details(details):
                 return parse_for_parent[1]
     return ""
 
+
 # Custom representer for multiline strings
 def literal_presenter(dumper, data):
-    if '\n' in data or len(data)>30:
-        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
-    if ' ' in data:
-        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='"')
+    if "\n" in data or len(data) > 30:
+        return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
+    if " " in data:
+        return dumper.represent_scalar("tag:yaml.org,2002:str", data, style='"')
     if not data or data == "":
-        return dumper.represent_scalar('tag:yaml.org,2002:null', '')
-    return dumper.represent_scalar('tag:yaml.org,2002:str', data)
+        return dumper.represent_scalar("tag:yaml.org,2002:null", "")
+    return dumper.represent_scalar("tag:yaml.org,2002:str", data)
+
 
 def none_representer(dumper, data):
-    return dumper.represent_scalar('tag:yaml.org,2002:null', '')
+    return dumper.represent_scalar("tag:yaml.org,2002:null", "")
+
 
 # Apply the custom representer
 yaml.add_representer(str, literal_presenter)
@@ -90,18 +93,20 @@ def create_data_model(data, default_level=ALTERNATIVE):
         #     country = alternative.get('attributes').get('Market', None)
         #     categories_raw = alternative.get('attributes').get('tags')
         #     categories = categories.split(",") if categories_raw else [] #split_categories(categories_raw)
-        
-        brand_name = row.get('attributes').get('name')
-        reason = row.get('attributes').get('proof')
+
+        brand_name = row.get("attributes").get("name")
+        reason = row.get("attributes").get("proof")
         parent_name = parent_from_details(reason)
-        source_url = row.get('attributes').get('proofUrl')
-        image_url = row.get('attributes').get('imageUrl')
-        website = row.get('attributes').get('Website', '')
-        level = row.get('attributes').get('Level', default_level)
-        country = row.get('attributes').get('Market', None)
-        categories_raw = row.get('attributes').get('tags')
-        categories = categories.split(",") if categories_raw else [] #split_categories(categories_raw)
-        location = GLOBAL if not country or country == '' else country
+        source_url = row.get("attributes").get("proofUrl")
+        image_url = row.get("attributes").get("imageUrl")
+        website = row.get("attributes").get("Website", "")
+        level = row.get("attributes").get("Level", default_level)
+        country = row.get("attributes").get("Market", None)
+        categories_raw = row.get("attributes").get("tags")
+        categories = (
+            categories.split(",") if categories_raw else []
+        )  # split_categories(categories_raw)
+        location = GLOBAL if not country or country == "" else country
 
         if not brand_name:
             continue
@@ -111,51 +116,46 @@ def create_data_model(data, default_level=ALTERNATIVE):
                 NAME: parent_name,
                 LOCATION: location,
                 LEVEL: level,
-                DETAILS: {
-                    REASON: reason,
-                    SOURCE: source_url
-                }
+                DETAILS: {REASON: reason, SOURCE: source_url},
             }
             yaml_data[brand_name][PARENTS].append(new_parent)
         else:
             yaml_data[brand_name] = {
-                        NAME: brand_name,
-                        WEBSITE: website,
-                        IMAGE_URL: image_url,
-                        CATEGORIES: categories,
-                        PARENTS: [{
-                            NAME: parent_name,
-                            LOCATION: location,
-                            LEVEL: level,
-                            DETAILS: {
-                                REASON: reason,
-                                SOURCE: source_url
-                            }
-                        }]
+                NAME: brand_name,
+                WEBSITE: website,
+                IMAGE_URL: image_url,
+                CATEGORIES: categories,
+                PARENTS: [
+                    {
+                        NAME: parent_name,
+                        LOCATION: location,
+                        LEVEL: level,
+                        DETAILS: {REASON: reason, SOURCE: source_url},
+                    }
+                ],
             }
     return yaml_data
 
 
 def write_yaml(data, file_name):
     data_list = {"brands": list(data.values())}
-    with open(file_name, 'w', encoding='utf-8') as yaml_file:
+    with open(file_name, "w", encoding="utf-8") as yaml_file:
         yaml.dump(data_list, yaml_file, default_flow_style=False, sort_keys=False)
 
 
 def json_to_csv(json_file, output):
-    with open(json_file, encoding='utf-8') as fh:
+    with open(json_file, encoding="utf-8") as fh:
         data = json.load(fh)
         yaml_data = create_data_model(data)
         write_yaml(yaml_data, output)
 
     pdb.set_trace()
 
+
 if __name__ == "__main__":
     args = sys.argv[1:]
 
     file_to_parse = args[0]
     output_yaml = args[1]
 
     json_to_csv(file_to_parse, output_yaml)
-
-