From edd4aad71125077d903d61d54224417a6558e054 Mon Sep 17 00:00:00 2001 From: idris <> Date: Sun, 7 Jan 2024 14:12:45 -0500 Subject: [PATCH 1/3] Recenter around brands instead of companies. Consolidate brands with alternative_brands. Add schema validation --- .github/workflows/ci.yml | 4 +- .gitignore | 1 + data/alternative_brands/drinkmate.yaml | 2 - data/{companies => brands}/ahava.yaml | 13 ++-- data/brands/drinkmate.yaml | 5 ++ .../puma-se.yaml => brands/puma.yaml} | 11 ++- data/brands/sabra.yaml | 16 ++++ data/brands/sodastream.yaml | 18 +++++ data/companies/pepsico.yaml | 10 +-- data/companies/strauss-group.yaml | 4 + requirements.txt | 3 +- schemas/brand_schema.yaml | 76 +++++++++++++++++++ schemas/company_schema.yaml | 17 +++++ validate_model.py | 1 - validate_yaml.py | 47 ++++++++++++ 15 files changed, 202 insertions(+), 26 deletions(-) delete mode 100644 data/alternative_brands/drinkmate.yaml rename data/{companies => brands}/ahava.yaml (60%) create mode 100644 data/brands/drinkmate.yaml rename data/{companies/puma-se.yaml => brands/puma.yaml} (75%) create mode 100644 data/brands/sabra.yaml create mode 100644 data/brands/sodastream.yaml create mode 100644 data/companies/strauss-group.yaml create mode 100644 schemas/brand_schema.yaml create mode 100644 schemas/company_schema.yaml delete mode 100644 validate_model.py create mode 100644 validate_yaml.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 18d1e2e..058f18a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,7 +38,7 @@ jobs: - run: | pip install -r requirements.txt python --version - python validate_model.py + python validate_yaml.py if_merged: if: github.event.pull_request.merged == true @@ -55,7 +55,7 @@ jobs: cache: 'pip' # caching pip dependencies - run: | pip install -r requirements.txt - python validate_model.py + python validate_yaml.py python generate_model.py - uses: mikeal/publish-to-github-action@master env: diff --git a/.gitignore b/.gitignore index e43b0f9..673ae00 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .DS_Store +.rtx.toml diff --git a/data/alternative_brands/drinkmate.yaml b/data/alternative_brands/drinkmate.yaml deleted file mode 100644 index 17f62eb..0000000 --- a/data/alternative_brands/drinkmate.yaml +++ /dev/null @@ -1,2 +0,0 @@ -name: "Drinkmate" -website: https://idrinkproducts.com/ \ No newline at end of file diff --git a/data/companies/ahava.yaml b/data/brands/ahava.yaml similarity index 60% rename from data/companies/ahava.yaml rename to data/brands/ahava.yaml index e83dec0..6ee252b 100644 --- a/data/companies/ahava.yaml +++ b/data/brands/ahava.yaml @@ -1,12 +1,13 @@ name: "Ahava" -reason: |- +status: avoid +reasons: [operations_in_settlements, operations_in_israel] +countries: [global] +website: https://www.ahava.com/ +logo_url: https://upload.wikimedia.org/wikipedia/commons/8/81/AhavaLogo.png +description: |- Ahava is an Israeli cosmetics company which operates its main manufacturing plant and showroom in Mitzpe Shalem, an illegal settlement in the West Bank.[^1] The Palestinian BDS National Committee has called for a boycott of Ahava.[^2] [^1]: https://en.wikipedia.org/wiki/Ahava - [^2]: https://bdsmovement.net/Act-Now-Against-These-Companies-Profiting-From-Genocide -brands: - - name: Ahava - slug: ahava - alternatives: [] \ No newline at end of file + [^2]: https://bdsmovement.net/Act-Now-Against-These-Companies-Profiting-From-Genocide \ No newline at end of file diff --git a/data/brands/drinkmate.yaml b/data/brands/drinkmate.yaml new file mode 100644 index 0000000..b5f2ddd --- /dev/null +++ b/data/brands/drinkmate.yaml @@ -0,0 +1,5 @@ +name: "Drinkmate" +status: neutral +countries: [us, ca, gb, eu, au, co, pe, ae, sa, jp, sg, tw] +website: https://idrinkproducts.com/ +description: Drinkmate offers sparkling water and soda makers (also known as soda machines). \ No newline at end of file diff --git a/data/companies/puma-se.yaml b/data/brands/puma.yaml similarity index 75% rename from data/companies/puma-se.yaml rename to data/brands/puma.yaml index 91cfecc..9f87db5 100644 --- a/data/companies/puma-se.yaml +++ b/data/brands/puma.yaml @@ -1,11 +1,10 @@ -name: "Puma SE" -reason: |- +name: "Puma" +status: avoid +reasons: [operations_in_israel] +countries: [global] +description: |- The Palestinian BDS National Committee has called for a boycott of Puma[^1] for several reasons, including their sponsorship of the Israeli Football Association (IFA), which includes teams in illegal settlements on occupied Palestinian land. [^1]: https://bdsmovement.net/boycott-puma -brands: - - name: Puma - slug: puma - alternatives: [] \ No newline at end of file diff --git a/data/brands/sabra.yaml b/data/brands/sabra.yaml new file mode 100644 index 0000000..e4f8d17 --- /dev/null +++ b/data/brands/sabra.yaml @@ -0,0 +1,16 @@ +name: "Sabra" +status: avoid +reasons: [operations_in_israel] +categories: [food] +website: https://sabra.com/ +description: |- + Sabra was an Isareli hummus company, and is now a joint venture between Pepsi and Strauss. Strauss is an Israeli food company. +alternatives_text: |- + Most hummus is not Israeli. Any non-Israeli hummus at your local grocery store is preferred over Sabra. +stakeholders: + - id: pepsico + type: owner + percent: 50 + - id: strauss-group + type: owner + percent: 50 diff --git a/data/brands/sodastream.yaml b/data/brands/sodastream.yaml new file mode 100644 index 0000000..b9268c9 --- /dev/null +++ b/data/brands/sodastream.yaml @@ -0,0 +1,18 @@ +name: "SodaStream" +status: avoid +reasons: [operations_in_israel] +countries: [global] +website: https://sodastream.com/ +logo_url: https://upload.wikimedia.org/wikipedia/commons/thumb/6/63/Soda_Stream_Logo.svg/2560px-Soda_Stream_Logo.svg.png +description: |- # optional + SodaStream has long been a BDS target due to their operation of factories on stolen land and their racial discrimination against Palestinian workers.[^1] + + [^1]: https://bdsmovement.net/Act-Now-Against-These-Companies-Profiting-From-Genocide +alternatives: [drinkmate] +stakeholders: + - id: pepsi + type: owner + percent: 50 + - id: strauss-group + type: owner + percent: 50 \ No newline at end of file diff --git a/data/companies/pepsico.yaml b/data/companies/pepsico.yaml index 63e4a27..7b927bf 100644 --- a/data/companies/pepsico.yaml +++ b/data/companies/pepsico.yaml @@ -1,5 +1,6 @@ name: "PepsiCo, Inc." -reason: |- +status: avoid +description: |- PepsiCo purchased the Israeli company SodaStream in August 2018.[^1] SodaStream has long been a BDS target due to their operation of factories on stolen land and their racial discrimination against Palestinian workers.[^2] @@ -7,10 +8,3 @@ reason: |- [^1]: https://www.nasdaq.com/articles/sodastream-sells-out-price-too-low-2018-08-20 [^2]: https://bdsmovement.net/Act-Now-Against-These-Companies-Profiting-From-Genocide -brands: - - name: SodaStream - slug: sodastream - alternatives: [drinkmate] - - name: Sabra - slug: sabra - alternatives: [] \ No newline at end of file diff --git a/data/companies/strauss-group.yaml b/data/companies/strauss-group.yaml new file mode 100644 index 0000000..23a1e40 --- /dev/null +++ b/data/companies/strauss-group.yaml @@ -0,0 +1,4 @@ +name: "Strauss Group Ltd." +status: avoid +description: |- + Strauss Group is an Israeli manufacturer and marketer of consumer foods. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 8b13789..0999e11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ - +jsonschema == 4.20.0 +pyyaml == 6.0.1 \ No newline at end of file diff --git a/schemas/brand_schema.yaml b/schemas/brand_schema.yaml new file mode 100644 index 0000000..81b8a20 --- /dev/null +++ b/schemas/brand_schema.yaml @@ -0,0 +1,76 @@ +type: object +properties: + name: + type: string + description: + type: string + status: + type: string + enum: + - support + - neutral + - avoid + reasons: + type: array + items: + type: string + enum: + - operations_in_israel + - operations_in_settlements + countries: + type: array + items: + anyOf: + - type: string + enum: [global] + - type: string + pattern: "^[a-z]{2}$" + categories: + type: array + items: + type: string + enum: [food] + website: + type: string + format: uri + pattern: "^https?://" + logo_url: + type: string + format: uri + pattern: "^https?://" + alternatives: + type: array + items: + type: string + pattern: "^[a-z][a-z-]+[a-z]$" + alternatives_text: + type: string + stakeholders: + type: array + items: + anyOf: + - type: object + properties: + id: + type: string + pattern: "^[a-z][a-z-]+[a-z]$" + type: + type: string + enum: [owner] + ownership_percent: + type: number + minimum: 10 + maximum: 100 + allOf: + if: + properties: + status: + const: avoid + required: [status] + then: + required: [reasons] +required: + - name + - status + - description +additionalProperties: false \ No newline at end of file diff --git a/schemas/company_schema.yaml b/schemas/company_schema.yaml new file mode 100644 index 0000000..40f64db --- /dev/null +++ b/schemas/company_schema.yaml @@ -0,0 +1,17 @@ +type: object +properties: + name: + type: string + description: + type: string + status: + type: string + enum: + - support + - neutral + - avoid +required: + - name + - status + - description +additionalProperties: false \ No newline at end of file diff --git a/validate_model.py b/validate_model.py deleted file mode 100644 index 8b13789..0000000 --- a/validate_model.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/validate_yaml.py b/validate_yaml.py new file mode 100644 index 0000000..914e8b2 --- /dev/null +++ b/validate_yaml.py @@ -0,0 +1,47 @@ +import os +import glob +import yaml +import logging +import jsonschema +from jsonschema import validate + +def get_filename_only(file_path): + base_name = os.path.basename(file_path) + filename_only, _ = os.path.splitext(base_name) + return filename_only + +def load_yaml(file_path): + with open(file_path, 'r') as file: + return yaml.safe_load(file) + +def validate_with_schema(file_path, schema): + try: + validate(load_yaml(file_path), schema) + return True + except jsonschema.exceptions.ValidationError as ve: + logging.error('Validation error in ' + get_filename_only(file_path)) + logging.error(ve) + return False + +def main(): + global root_path + root_path = os.path.dirname(os.path.realpath(__file__)) + + brand_schema = load_yaml(os.path.join(root_path, 'schemas/brand_schema.yaml')) + brand_files = glob.glob(os.path.join(root_path, 'data/brands/') + '*.yaml') + print('Validating', len(brand_files), 'brands') + for file in brand_files: + if not validate_with_schema(file, brand_schema): + exit(1) + print('All brands are valid.') + + company_schema = load_yaml(os.path.join(root_path, 'schemas/company_schema.yaml')) + company_files = glob.glob(os.path.join(root_path, 'data/companies/') + '*.yaml') + print('Validating', len(company_files), 'companies') + for file in company_files: + if not validate_with_schema(file, company_schema): + exit(1) + print('All companies are valid.') + +if __name__ == "__main__": + main() \ No newline at end of file From 93a52e96388f0fc47fd157e599d012dae184ed19 Mon Sep 17 00:00:00 2001 From: idris <> Date: Sun, 7 Jan 2024 14:22:22 -0500 Subject: [PATCH 2/3] Remove unnecesary actions in ci.yml --- .github/workflows/ci.yml | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 058f18a..e109fe9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,21 +22,15 @@ jobs: # Steps represent a sequence of tasks that will be executed as part of the job steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v3 - - # Runs a single command using the runners shell - - name: Run a one-line script - run: echo Hello, world! - # Setup and run python script, generates a json file - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: '3.12' cache: 'pip' # caching pip dependencies - - run: | - pip install -r requirements.txt + - run: pip install -r requirements.txt + - name: Validate YAML + run: | python --version python validate_yaml.py From 9e0b1978ea4bb002eb61e6443926ff80c17cfd4b Mon Sep 17 00:00:00 2001 From: idris <> Date: Sun, 7 Jan 2024 14:47:15 -0500 Subject: [PATCH 3/3] Remove schema from readme and add descriptions to the schema yaml files --- README.md | 49 +++++-------------------------------- schemas/brand_schema.yaml | 16 ++++++++++++ schemas/company_schema.yaml | 4 +++ 3 files changed, 26 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index 3a87022..f4eb435 100644 --- a/README.md +++ b/README.md @@ -11,50 +11,13 @@ Sources: - https://pastebin.com/raw/ks9GRE4L +## Data +All data is inputted & stored as YAML files in the `data/` directory. +Output formats, such as CSV and JSON are in the `generated/` directory. -## Model - -Standardised data model for each company: - -Example: - -```yaml - - # name: required, brand name -- name: "Brand" - - # website: optional, website of company, useful for alternatives to help users navigate to and shop from - website: - # image_url: optional, link to brand logo image - image_url: |- - https://1000logos.net/wp-content/uploads/2023/03/Whiskas-Logo-2003.png - - # categories/tags: optional, for grouping data, makes it easier to find alternatives - categories: [] - - # parents: required, list of parents, since one brand can have different parent companies in a different country/region - parents: - # name: required, parent company name - - name: "Parent Company" - - # location: required, list of locations where this is the parent company of the brand - location: [global] - - # level: required, boycotting level, one of direct, alternative - level: direct - - # details: optional - boycotting details, relevant for boycott brand, can be empty for alternative - # reason: reason for boycotting (if applicable) - # source_url: evidence for why this brand/parent should be boycotted - details: - reason: |- - **Wilson Partnership** - - **Wilson** has partnered with **Delta Galil Industries**, Ltd. (DELT/Tel Aviv Stock Exchange), the global manufacturer and marketer of branded and private label apparel products for men, women, and children. **Delta Galil Industries** is an **Israeli** textile firm headquartered in **Tel Aviv**, with plants around the world. - source_url: |- - https://deltagalil.com/brands/licensed-brands/default.aspx -``` - +Schemas for the YAML data can be found in the `schemas` directory, along with descriptions for each field. +These schemas are in [JSON Schema](https://json-schema.org/) format, but represented in YAML for simplicity. +The `validate_yaml.py` script validates all brands and companies using the schemas. ## Useful Resources & Links diff --git a/schemas/brand_schema.yaml b/schemas/brand_schema.yaml index 81b8a20..54bf33c 100644 --- a/schemas/brand_schema.yaml +++ b/schemas/brand_schema.yaml @@ -1,16 +1,21 @@ type: object +description: A brand is something that a consumer may search to know whether or not to purchase a product. properties: name: + description: Name of the brand type: string description: + description: Explanation of why the brand is on this list. Or, if it's not a brand to avoid, just a description of the brand. type: string status: + description: Whether to support or avoid this brand type: string enum: - support - neutral - avoid reasons: + description: A list of reasons for why consumers should avoid this brand. type: array items: type: string @@ -18,6 +23,10 @@ properties: - operations_in_israel - operations_in_settlements countries: + description: |- + A list of countries (ISO alpha-2 country codes) that the brand operates in. + Useful for filtering for brands that are relevant to a specific region. + "global" means it is available in all countries. type: array items: anyOf: @@ -35,17 +44,24 @@ properties: format: uri pattern: "^https?://" logo_url: + description: Logo should be at least 200x200 pixels type: string format: uri pattern: "^https?://" alternatives: + description: List of brands that would be an alternative option to purchasing from this brand. type: array items: type: string pattern: "^[a-z][a-z-]+[a-z]$" alternatives_text: + description: |- + Plain text description of alternatives. + Especially useful when alternatives are difficult to enumerate. type: string stakeholders: + description: |- + If useful, a list of stakeholders, such as companies who own this brand. type: array items: anyOf: diff --git a/schemas/company_schema.yaml b/schemas/company_schema.yaml index 40f64db..1148811 100644 --- a/schemas/company_schema.yaml +++ b/schemas/company_schema.yaml @@ -1,11 +1,15 @@ type: object +description: A company is an entity that owns more than one brand, or owns other companies. properties: name: type: string + description: Name of the company description: type: string + description: Description of the company, including why it is on the list status: type: string + description: Whether to support or avoid this company enum: - support - neutral