Merge pull request #9 from SelmaGuedidi/main

Export data as CSV and JSON
TechForPalestine · Jan 21, 2024 · 07207e3 · 07207e3
2 parents bda1caf + c277bdd
commit 07207e3
Show file tree

Hide file tree

Showing 9 changed files with 266 additions and 19,096 deletions.
diff --git a/.github/workflows/check_yaml_consistency.yml b/.github/workflows/check_yaml_consistency.yml
@@ -0,0 +1,21 @@
+name: Check YAML Consistency
+'on':
+  pull_request:
+    paths:
+      - '**/*.yaml'
+jobs:
+  check_yaml:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.12
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+      - name: Run export and check consistency
+        run: |
+          python export.py
+          git diff --exit-code
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,9 @@
+repos:
+-   repo: local
+    hooks:
+    -   id: export-yaml
+        name: Export YAML to CSV and JSON
+        language: python
+        entry: python export.py
+        additional_dependencies: ['pyyaml']
+
diff --git a/export.py b/export.py
@@ -0,0 +1,89 @@
+
+import yaml
+import csv
+import json
+import csv
+import os
+def read_yaml(file_path):
+    with open(file_path, 'r') as file:
+        data = yaml.safe_load(file)
+    return data
+
+def clean_value(value):
+    if isinstance(value, list):
+        return ', '.join(map(str, value))
+    else:
+        return value
+def export_to_csv(input_dir, output_csv, schema_file):
+    schema = read_yaml(schema_file)
+
+    with open(output_csv, 'w', newline='') as csvfile:
+        schema_fields = list(schema['properties'].keys())
+        if 'stakeholders' in schema_fields:
+            # Haven't decided how to represent stakeholders in the CSV format, so just remove it for now.
+            schema_fields.remove('stakeholders')
+        fieldnames = ['id'] + schema_fields
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+
+        # Write header based on the schema
+        writer.writeheader()
+
+        for yaml_file in sorted(os.listdir(input_dir)):
+            if yaml_file.endswith(".yaml"):
+                yaml_file_path = os.path.join(input_dir, yaml_file)
+                with open(yaml_file_path, 'r') as file:
+                    data = yaml.safe_load(file)
+
+                    cleaned_data = {key: clean_value(data.get(key, None)) for key in fieldnames}
+                    cleaned_data['id'] = os.path.splitext(yaml_file)[0]
+
+                    writer.writerow(cleaned_data)
+
+                    print(f"Converted {yaml_file} to CSV")
+def convert_yaml_to_json(directory_path, key):
+    data = {}
+
+    for file_name in sorted(os.listdir(directory_path)):
+        if file_name.endswith(".yaml"):
+            file_path = os.path.join(directory_path, file_name)
+            with open(file_path, 'r') as yaml_file:
+                yaml_data = {}
+
+                yaml_data = yaml.safe_load(yaml_file)
+
+                data[os.path.splitext(file_name)[0]] = {'id':os.path.splitext(file_name)[0], **yaml_data}
+
+    return {key: data}
+
+def export_to_json(directory1, directory2, output_json):
+    brands_data = convert_yaml_to_json(directory1, 'brands')
+    companies_data = convert_yaml_to_json(directory2, 'companies')
+
+    combined_data = {**brands_data, **companies_data}
+
+    with open(output_json, 'w') as json_file:
+        json.dump(combined_data, json_file, indent=2)
+
+        print(f"Converted data to JSON")
+
+
+if __name__ == "__main__":
+    brands_yaml = 'data/brands'
+    companies_yaml = 'data/companies'
+
+    brands_csv_file = 'output/csv/brands.csv'
+    companies_csv_file = 'output/csv/companies.csv'
+
+    data_json_file = 'output/json/data.json'
+
+    brand_schema = 'schemas/brand_schema.yaml'
+    company_schema = 'schemas/company_schema.yaml'
+
+    export_to_csv(brands_yaml, brands_csv_file, brand_schema)
+    export_to_csv(companies_yaml, companies_csv_file, company_schema)
+    export_to_json(brands_yaml,companies_yaml,data_json_file)
+
+
+
+
+