From ebb7f4f554c5c1b3658ed90c0f27d77338a15d8a Mon Sep 17 00:00:00 2001 From: Luna McNulty Date: Tue, 2 Jan 2024 19:22:36 -0500 Subject: [PATCH 1/2] =?UTF-8?q?Checklists=20=E2=80=93=20Test=20sorting=20r?= =?UTF-8?q?isks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../e2e/integration/apps/checklistsForm.cy.js | 40 ++ .../checklists/riskSortingChecklist.json | 137 ++++++ .../fixtures/checklists/riskSortingRisks.json | 415 ++++++++++++++++++ .../components/checklists/CheckListForm.js | 2 +- 4 files changed, 593 insertions(+), 1 deletion(-) create mode 100644 site/gatsby-site/cypress/fixtures/checklists/riskSortingChecklist.json create mode 100644 site/gatsby-site/cypress/fixtures/checklists/riskSortingRisks.json diff --git a/site/gatsby-site/cypress/e2e/integration/apps/checklistsForm.cy.js b/site/gatsby-site/cypress/e2e/integration/apps/checklistsForm.cy.js index 1798f5d6d3..016d518a37 100644 --- a/site/gatsby-site/cypress/e2e/integration/apps/checklistsForm.cy.js +++ b/site/gatsby-site/cypress/e2e/integration/apps/checklistsForm.cy.js @@ -1,4 +1,6 @@ import { maybeIt } from '../../../support/utils'; +import riskSortingRisks from '../../../fixtures/checklists/riskSortingRisks.json'; +import riskSortingChecklist from '../../../fixtures/checklists/riskSortingChecklist.json'; const { gql } = require('@apollo/client'); describe('Checklists App Form', () => { @@ -59,6 +61,15 @@ describe('Checklists App Form', () => { ); }; + const interceptFindRisks = (risks) => { + cy.conditionalIntercept( + '**/graphql', + (req) => req.body.operationName == 'findRisks', + 'findRisks', + { data: { risks } } + ); + }; + it('Should have read-only access for non-logged-in users', () => { interceptFindChecklist(defaultChecklist); @@ -132,4 +143,33 @@ describe('Checklists App Form', () => { cy.visit(url); }); }); + + it('Should change sort order of risk items', () => { + cy.viewport(1920, 1080); + + withLogin(({ user }) => { + interceptFindChecklist({ + ...riskSortingChecklist.data.checklist, + owner_id: user.userId, + }); + + interceptFindRisks(riskSortingRisks.data.risks); + + cy.visit(url); + + cy.wait(['@findChecklist']); + + cy.wait(['@findRisks']); + + cy.waitForStableDOM(); + + cy.contains('Mitigated').click(); + + cy.get('details:nth(1)').contains('Distributional Bias').should('exist'); + + cy.contains('Minor').click(); + + cy.get('details:nth(1)').contains('Dataset Imbalance').should('exist'); + }); + }); }); diff --git a/site/gatsby-site/cypress/fixtures/checklists/riskSortingChecklist.json b/site/gatsby-site/cypress/fixtures/checklists/riskSortingChecklist.json new file mode 100644 index 0000000000..e9299b1765 --- /dev/null +++ b/site/gatsby-site/cypress/fixtures/checklists/riskSortingChecklist.json @@ -0,0 +1,137 @@ +{ + "data": { + "checklist": { + "__typename": "Checklist", + "about": "", + "date_created": "2024-01-02T23:38:07.875Z", + "date_updated": "2024-01-02T23:38:07.875Z", + "id": "1b4d984c-84c9-4417-a57f-a04acde37c36", + "name": "Unspecified System", + "risks": [ + { + "__typename": "ChecklistRisk", + "generated": false, + "id": "7876ca98-ca8a-4365-8c39-203474c1dc38", + "likelihood": "", + "precedents": [ + { + "__typename": "ChecklistRiskPrecedent", + "description": "The French digital care company, Nabla, in researching GPT-3’s capabilities for medical documentation, diagnosis support, and treatment recommendation, found its inconsistency and lack of scientific and medical expertise unviable and risky in healthcare applications. This incident has been downgraded to an issue as it does not meet current ingestion criteria.", + "incident_id": 287, + "tags": [ + "GMF:Known AI Goal:Question Answering", + "GMF:Known AI Technology:Transformer", + "GMF:Known AI Technology:Language Modeling", + "GMF:Known AI Technology:Distributional Learning", + "GMF:Known AI Technology Classification Discussion:Distributional Learning: If no training data citing sources is available and/or not enough data from a medical domain were available.", + "GMF:Potential AI Technical Failure:Limited Dataset", + "GMF:Potential AI Technical Failure:Problematic Input", + "GMF:Potential AI Technical Failure:Robustness Failure", + "GMF:Potential AI Technical Failure:Overfitting", + "GMF:Potential AI Technical Failure:Underfitting", + "GMF:Potential AI Technical Failure:Inadequate Sequential Memory", + "GMF:Potential AI Technical Failure Classification Discussion:Limited Dataset: If no training data citing sources is available and/or not enough data from a medical domain were available.\n\nProblematic Input: If the prompt does not state that references are required.\n\nOverfitting: System does not capture the semantic content of the prompt, but focuses on specific verbage.\n\nUnderfitting: Due to lack of fine-tuning, the model can be considered as having a poor fit for specific medical questions.", + "GMF:Known AI Technical Failure:Distributional Artifacts" + ], + "title": "OpenAI’s GPT-3 Reported as Unviable in Medical Tasks by Healthcare Firm" + } + ], + "risk_notes": "", + "risk_status": "Mitigated", + "severity": "Minor", + "tags": [ + "GMF:Known AI Technical Failure:Distributional Artifacts" + ], + "title": "Distributional Artifacts", + "touched": false + }, + { + "__typename": "ChecklistRisk", + "generated": false, + "id": "9a1d30d3-b45c-4d7f-a593-8d7dfe78ffb7", + "likelihood": "", + "precedents": [ + { + "__typename": "ChecklistRiskPrecedent", + "description": "Facebook's automatic language translation software incorrectly translated an Arabic post saying \"Good morning\" into Hebrew saying \"hurt them,\" leading to the arrest of a Palestinian man in Beitar Illit, Israel.", + "incident_id": 72, + "tags": [ + "GMF:Known AI Goal:Translation", + "GMF:Known AI Goal Classification Discussion:Translation: Presumably the arabic dialect in the text is not represented adequately in the training data, hence the translation performance issues. \n", + "GMF:Known AI Technical Failure:Dataset Imbalance", + "GMF:Known AI Technical Failure:Distributional Bias", + "GMF:Known AI Technical Failure Classification Discussion:Dataset Imbalance: Presumably the arabic dialect in the text is not represented adequately in the training data, hence the translation performance issues. \n\n\nDistributional Bias: Biased language in Western / Israeli media texts about Arabs could build false associations and high priors to terrorism and violence.", + "GMF:Potential AI Technical Failure:Generalization Failure", + "GMF:Potential AI Technical Failure Classification Discussion:Generalization Failure: Perhaps only one (standard arabic) or a few dialects are supported, and one or more language models is used as fallback for all arabic languages.", + "GMF:Known AI Technology:Convolutional Neural Network", + "GMF:Known AI Technology:Recurrent Neural Network", + "GMF:Known AI Technology:Distributional Learning", + "GMF:Potential AI Technology:Intermediate modeling", + "GMF:Potential AI Technology:Classification", + "GMF:Potential AI Technology:Multimodal Learning", + "GMF:Potential AI Technology:Image Classification", + "GMF:Potential AI Technology Classification Discussion:Intermediate modeling: Perhaps intermmediate languages are used (i.e. if no model has been trained to translate X to Y, use X->Z and then Z->Y), which accumulate errors.\n\nClassification: GIven the amount of supported languages for translation, a system must exist to detect the input language and classify amongst supported languages.\n\nMultimodal Learning: If image was also utilized to generate the translation, that would provide additional evidence to the mistranslation.\n\nImage Classification: If multimodal learning is used, perhaps the buldozer was recognized and its extracted keyword contributed to the bias in the NLP domain." + ], + "title": "Facebook translates 'good morning' into 'attack them', leading to arrest" + } + ], + "risk_notes": "", + "risk_status": "Not Mitigated", + "severity": "Minor", + "tags": [ + "GMF:Known AI Technical Failure:Dataset Imbalance" + ], + "title": "Dataset Imbalance", + "touched": false + }, + { + "__typename": "ChecklistRisk", + "generated": false, + "id": "687fe402-3dad-4630-beef-7e423e64e4fd", + "likelihood": "", + "precedents": [ + { + "__typename": "ChecklistRiskPrecedent", + "description": "Facebook's automatic language translation software incorrectly translated an Arabic post saying \"Good morning\" into Hebrew saying \"hurt them,\" leading to the arrest of a Palestinian man in Beitar Illit, Israel.", + "incident_id": 72, + "tags": [ + "GMF:Known AI Goal:Translation", + "GMF:Known AI Goal Classification Discussion:Translation: Presumably the arabic dialect in the text is not represented adequately in the training data, hence the translation performance issues. \n", + "GMF:Known AI Technical Failure:Dataset Imbalance", + "GMF:Known AI Technical Failure:Distributional Bias", + "GMF:Known AI Technical Failure Classification Discussion:Dataset Imbalance: Presumably the arabic dialect in the text is not represented adequately in the training data, hence the translation performance issues. \n\n\nDistributional Bias: Biased language in Western / Israeli media texts about Arabs could build false associations and high priors to terrorism and violence.", + "GMF:Potential AI Technical Failure:Generalization Failure", + "GMF:Potential AI Technical Failure Classification Discussion:Generalization Failure: Perhaps only one (standard arabic) or a few dialects are supported, and one or more language models is used as fallback for all arabic languages.", + "GMF:Known AI Technology:Convolutional Neural Network", + "GMF:Known AI Technology:Recurrent Neural Network", + "GMF:Known AI Technology:Distributional Learning", + "GMF:Potential AI Technology:Intermediate modeling", + "GMF:Potential AI Technology:Classification", + "GMF:Potential AI Technology:Multimodal Learning", + "GMF:Potential AI Technology:Image Classification", + "GMF:Potential AI Technology Classification Discussion:Intermediate modeling: Perhaps intermmediate languages are used (i.e. if no model has been trained to translate X to Y, use X->Z and then Z->Y), which accumulate errors.\n\nClassification: GIven the amount of supported languages for translation, a system must exist to detect the input language and classify amongst supported languages.\n\nMultimodal Learning: If image was also utilized to generate the translation, that would provide additional evidence to the mistranslation.\n\nImage Classification: If multimodal learning is used, perhaps the buldozer was recognized and its extracted keyword contributed to the bias in the NLP domain." + ], + "title": "Facebook translates 'good morning' into 'attack them', leading to arrest" + } + ], + "risk_notes": "", + "risk_status": "Mitigated", + "severity": "Severe", + "tags": [ + "GMF:Known AI Technical Failure:Distributional Bias" + ], + "title": "Distributional Bias", + "touched": false + } + ], + "tags_goals": [ + "GMF:Known AI Goal:Question Answering" + ], + "tags_methods": [ + "GMF:Potential AI Technology:Classification", + "GMF:Known AI Technology:Language Modeling" + ], + "tags_other": [] + } + } +} diff --git a/site/gatsby-site/cypress/fixtures/checklists/riskSortingRisks.json b/site/gatsby-site/cypress/fixtures/checklists/riskSortingRisks.json new file mode 100644 index 0000000000..7bb6f3732c --- /dev/null +++ b/site/gatsby-site/cypress/fixtures/checklists/riskSortingRisks.json @@ -0,0 +1,415 @@ +{ + "data": { + "risks": [ + { + "__typename": "RisksPayloadItem", + "precedents": [ + { + "__typename": "RisksPayloadPrecedent", + "description": "There are multiple reports of Amazon Alexa products (Echo, Echo Dot) reacting and acting upon unintended stimulus, usually from television commercials or news reporter's voices.", + "incident_id": 34, + "tags": [ + "GMF:Known AI Goal:AI Voice Assistant", + "GMF:Known AI Technology:Automatic Speech Recognition", + "GMF:Known AI Technology:Language Modeling", + "GMF:Known AI Technology:Acoustic Fingerprint", + "GMF:Known AI Technical Failure:Unsafe Exposure or Access", + "GMF:Known AI Technical Failure:Misuse", + "GMF:Potential AI Technical Failure:Unauthorized Data", + "GMF:Potential AI Technical Failure:Inadequate Anonymization", + "GMF:Potential AI Technical Failure:Context Misidentification", + "GMF:Potential AI Technical Failure:Lack of Capability Control", + "GMF:Potential AI Technical Failure:Underspecification", + "GMF:Potential AI Technical Failure Classification Discussion:Unauthorized Data: This may apply on an ethical level: presumably the users sign an agreement consenting to passive voice capture.\n\nInadequate Anonymization: This may apply on an ethical level: presumably the users sign an agreement consenting to passive voice capture.\n\nLack of Capability Control: This is relevant if the order went through.\n\nUnderspecification: Speaker diarization / recognition missing." + ], + "title": "Amazon Alexa Responding to Environmental Inputs" + }, + { + "__typename": "RisksPayloadPrecedent", + "description": "In Taiwan, a Tesla Model 3 on Autopilot mode whose driver did not pay attention to the road collided with a road repair truck; a road engineer immediately placed crash warnings in front of the Tesla, but soon after got hit and was killed by a BMW when its driver failed to see the sign and crashed into the accident.", + "incident_id": 221, + "tags": [ + "GMF:Potential AI Technology:Convolutional Neural Network", + "GMF:Potential AI Technology:Visual Object Detection", + "GMF:Potential AI Technology:Classification", + "GMF:Known AI Technology:Image Segmentation", + "GMF:Potential AI Technology Classification Discussion:Visual Object Detection: Potentially subtask of segmentation.\n\nClassification: Potentially subtask of segmentation.", + "GMF:Known AI Technical Failure:Misuse", + "GMF:Known AI Technical Failure:Generalization Failure", + "GMF:Known AI Technical Failure Classification Discussion:Misuse: Driver should not use autopilot without supervision.", + "GMF:Known AI Goal:Autonomous Driving" + ], + "title": "A Road Engineer Killed Following a Collision Involving a Tesla on Autopilot" + } + ], + "tags": [ + "GMF:Known AI Technical Failure:Misuse" + ], + "title": "Misuse" + }, + { + "__typename": "RisksPayloadItem", + "precedents": [ + { + "__typename": "RisksPayloadPrecedent", + "description": "Facebook's automatic language translation software incorrectly translated an Arabic post saying \"Good morning\" into Hebrew saying \"hurt them,\" leading to the arrest of a Palestinian man in Beitar Illit, Israel.", + "incident_id": 72, + "tags": [ + "GMF:Known AI Goal:Translation", + "GMF:Known AI Goal Classification Discussion:Translation: Presumably the arabic dialect in the text is not represented adequately in the training data, hence the translation performance issues. \n", + "GMF:Known AI Technical Failure:Dataset Imbalance", + "GMF:Known AI Technical Failure:Distributional Bias", + "GMF:Known AI Technical Failure Classification Discussion:Dataset Imbalance: Presumably the arabic dialect in the text is not represented adequately in the training data, hence the translation performance issues. \n\n\nDistributional Bias: Biased language in Western / Israeli media texts about Arabs could build false associations and high priors to terrorism and violence.", + "GMF:Potential AI Technical Failure:Generalization Failure", + "GMF:Potential AI Technical Failure Classification Discussion:Generalization Failure: Perhaps only one (standard arabic) or a few dialects are supported, and one or more language models is used as fallback for all arabic languages.", + "GMF:Known AI Technology:Convolutional Neural Network", + "GMF:Known AI Technology:Recurrent Neural Network", + "GMF:Known AI Technology:Distributional Learning", + "GMF:Potential AI Technology:Intermediate modeling", + "GMF:Potential AI Technology:Classification", + "GMF:Potential AI Technology:Multimodal Learning", + "GMF:Potential AI Technology:Image Classification", + "GMF:Potential AI Technology Classification Discussion:Intermediate modeling: Perhaps intermmediate languages are used (i.e. if no model has been trained to translate X to Y, use X->Z and then Z->Y), which accumulate errors.\n\nClassification: GIven the amount of supported languages for translation, a system must exist to detect the input language and classify amongst supported languages.\n\nMultimodal Learning: If image was also utilized to generate the translation, that would provide additional evidence to the mistranslation.\n\nImage Classification: If multimodal learning is used, perhaps the buldozer was recognized and its extracted keyword contributed to the bias in the NLP domain." + ], + "title": "Facebook translates 'good morning' into 'attack them', leading to arrest" + }, + { + "__typename": "RisksPayloadPrecedent", + "description": "A publicly accessible research model that was trained via Reddit threads showed racially biased advice on moral dilemmas, allegedly demonstrating limitations of language-based models trained on moral judgments.", + "incident_id": 146, + "tags": [ + "GMF:Known AI Goal:Question Answering", + "GMF:Known AI Technology:Distributional Learning", + "GMF:Known AI Technology:Language Modeling", + "GMF:Potential AI Technology:Transformer", + "GMF:Known AI Technical Failure:Distributional Bias", + "GMF:Known AI Technical Failure:Gaming Vulnerability", + "GMF:Potential AI Technical Failure:Overfitting", + "GMF:Potential AI Technical Failure:Robustness Failure", + "GMF:Potential AI Technical Failure:Context Misidentification", + "GMF:Potential AI Technical Failure:Limited Dataset", + "GMF:Potential AI Technical Failure Classification Discussion:Limited Dataset: US ethics only, data sourced from two subreddits and a column." + ], + "title": "Research Prototype AI, Delphi, Reportedly Gave Racially Biased Answers on Ethics" + } + ], + "tags": [ + "GMF:Known AI Technical Failure:Distributional Bias" + ], + "title": "Distributional Bias" + }, + { + "__typename": "RisksPayloadItem", + "precedents": [ + { + "__typename": "RisksPayloadPrecedent", + "description": "YouTube’s content filtering and recommendation algorithms exposed children to disturbing and inappropriate videos.", + "incident_id": 1, + "tags": [ + "GMF:Known AI Goal:Content Recommendation", + "GMF:Known AI Goal:Content Search", + "GMF:Known AI Goal:Hate Speech Detection", + "GMF:Known AI Goal:NSFW Content Detection", + "GMF:Known AI Technology:Content-based Filtering", + "GMF:Known AI Technology:Collaborative Filtering", + "GMF:Potential AI Technology:Classification", + "GMF:Potential AI Technology:Ensemble Aggregation", + "GMF:Potential AI Technology:Distributional Learning", + "GMF:Potential AI Technology Classification Discussion:Classification: Appropriateness could arise by appropriateness classifiers\n\nEnsemble Aggregation: In cases where \"child-appropriateness\" measure arises from multiple marginal detectors of-related subclasses (e.g. violent, adult, political themes)", + "GMF:Potential AI Technical Failure:Concept Drift", + "GMF:Potential AI Technical Failure:Generalization Failure", + "GMF:Potential AI Technical Failure:Misconfigured Aggregation", + "GMF:Potential AI Technical Failure:Distributional Bias", + "GMF:Potential AI Technical Failure:Misaligned Objective", + "GMF:Potential AI Technical Failure Classification Discussion:Concept Drift: Concept drift in cases where appropriateness evolves and changes with the passage of time and is culturally determined -- e.g. akin to old messed up disney cartoons.\n\nGeneralization Failure: Based on huge dataset size.\n\nMisconfigured Aggregation: In cases where \"child-appropriateness\" measure arises from multiple marginal detectors of-related subclasses (e.g. violent, adult, political themes)\n\nMisaligned Objective: Recommendation training is using child-appropriateness in its objective in a diminished capacity (as a component with a small contribution), or not at all (completely relying in post-hoc reviewing and filtering by other systems and humans).", + "GMF:Known AI Technical Failure:Tuning Issues", + "GMF:Known AI Technical Failure:Lack of Adversarial Robustness", + "GMF:Known AI Technical Failure:Adversarial Data", + "GMF:Known AI Technical Failure Classification Discussion:Tuning Issues: Default classification, in cases where the poor consideration of child -appropriateness context information does not fall under current subclasses of this classification." + ], + "title": "Google’s YouTube Kids App Presents Inappropriate Content" + } + ], + "tags": [ + "GMF:Known AI Technical Failure:Tuning Issues" + ], + "title": "Tuning Issues" + }, + { + "__typename": "RisksPayloadItem", + "precedents": [ + { + "__typename": "RisksPayloadPrecedent", + "description": "YouTube’s content filtering and recommendation algorithms exposed children to disturbing and inappropriate videos.", + "incident_id": 1, + "tags": [ + "GMF:Known AI Goal:Content Recommendation", + "GMF:Known AI Goal:Content Search", + "GMF:Known AI Goal:Hate Speech Detection", + "GMF:Known AI Goal:NSFW Content Detection", + "GMF:Known AI Technology:Content-based Filtering", + "GMF:Known AI Technology:Collaborative Filtering", + "GMF:Potential AI Technology:Classification", + "GMF:Potential AI Technology:Ensemble Aggregation", + "GMF:Potential AI Technology:Distributional Learning", + "GMF:Potential AI Technology Classification Discussion:Classification: Appropriateness could arise by appropriateness classifiers\n\nEnsemble Aggregation: In cases where \"child-appropriateness\" measure arises from multiple marginal detectors of-related subclasses (e.g. violent, adult, political themes)", + "GMF:Potential AI Technical Failure:Concept Drift", + "GMF:Potential AI Technical Failure:Generalization Failure", + "GMF:Potential AI Technical Failure:Misconfigured Aggregation", + "GMF:Potential AI Technical Failure:Distributional Bias", + "GMF:Potential AI Technical Failure:Misaligned Objective", + "GMF:Potential AI Technical Failure Classification Discussion:Concept Drift: Concept drift in cases where appropriateness evolves and changes with the passage of time and is culturally determined -- e.g. akin to old messed up disney cartoons.\n\nGeneralization Failure: Based on huge dataset size.\n\nMisconfigured Aggregation: In cases where \"child-appropriateness\" measure arises from multiple marginal detectors of-related subclasses (e.g. violent, adult, political themes)\n\nMisaligned Objective: Recommendation training is using child-appropriateness in its objective in a diminished capacity (as a component with a small contribution), or not at all (completely relying in post-hoc reviewing and filtering by other systems and humans).", + "GMF:Known AI Technical Failure:Tuning Issues", + "GMF:Known AI Technical Failure:Lack of Adversarial Robustness", + "GMF:Known AI Technical Failure:Adversarial Data", + "GMF:Known AI Technical Failure Classification Discussion:Tuning Issues: Default classification, in cases where the poor consideration of child -appropriateness context information does not fall under current subclasses of this classification." + ], + "title": "Google’s YouTube Kids App Presents Inappropriate Content" + } + ], + "tags": [ + "GMF:Known AI Technical Failure:Lack of Adversarial Robustness" + ], + "title": "Lack of Adversarial Robustness" + }, + { + "__typename": "RisksPayloadItem", + "precedents": [ + { + "__typename": "RisksPayloadPrecedent", + "description": "YouTube’s content filtering and recommendation algorithms exposed children to disturbing and inappropriate videos.", + "incident_id": 1, + "tags": [ + "GMF:Known AI Goal:Content Recommendation", + "GMF:Known AI Goal:Content Search", + "GMF:Known AI Goal:Hate Speech Detection", + "GMF:Known AI Goal:NSFW Content Detection", + "GMF:Known AI Technology:Content-based Filtering", + "GMF:Known AI Technology:Collaborative Filtering", + "GMF:Potential AI Technology:Classification", + "GMF:Potential AI Technology:Ensemble Aggregation", + "GMF:Potential AI Technology:Distributional Learning", + "GMF:Potential AI Technology Classification Discussion:Classification: Appropriateness could arise by appropriateness classifiers\n\nEnsemble Aggregation: In cases where \"child-appropriateness\" measure arises from multiple marginal detectors of-related subclasses (e.g. violent, adult, political themes)", + "GMF:Potential AI Technical Failure:Concept Drift", + "GMF:Potential AI Technical Failure:Generalization Failure", + "GMF:Potential AI Technical Failure:Misconfigured Aggregation", + "GMF:Potential AI Technical Failure:Distributional Bias", + "GMF:Potential AI Technical Failure:Misaligned Objective", + "GMF:Potential AI Technical Failure Classification Discussion:Concept Drift: Concept drift in cases where appropriateness evolves and changes with the passage of time and is culturally determined -- e.g. akin to old messed up disney cartoons.\n\nGeneralization Failure: Based on huge dataset size.\n\nMisconfigured Aggregation: In cases where \"child-appropriateness\" measure arises from multiple marginal detectors of-related subclasses (e.g. violent, adult, political themes)\n\nMisaligned Objective: Recommendation training is using child-appropriateness in its objective in a diminished capacity (as a component with a small contribution), or not at all (completely relying in post-hoc reviewing and filtering by other systems and humans).", + "GMF:Known AI Technical Failure:Tuning Issues", + "GMF:Known AI Technical Failure:Lack of Adversarial Robustness", + "GMF:Known AI Technical Failure:Adversarial Data", + "GMF:Known AI Technical Failure Classification Discussion:Tuning Issues: Default classification, in cases where the poor consideration of child -appropriateness context information does not fall under current subclasses of this classification." + ], + "title": "Google’s YouTube Kids App Presents Inappropriate Content" + } + ], + "tags": [ + "GMF:Known AI Technical Failure:Adversarial Data" + ], + "title": "Adversarial Data" + }, + { + "__typename": "RisksPayloadItem", + "precedents": [ + { + "__typename": "RisksPayloadPrecedent", + "description": "There are multiple reports of Amazon Alexa products (Echo, Echo Dot) reacting and acting upon unintended stimulus, usually from television commercials or news reporter's voices.", + "incident_id": 34, + "tags": [ + "GMF:Known AI Goal:AI Voice Assistant", + "GMF:Known AI Technology:Automatic Speech Recognition", + "GMF:Known AI Technology:Language Modeling", + "GMF:Known AI Technology:Acoustic Fingerprint", + "GMF:Known AI Technical Failure:Unsafe Exposure or Access", + "GMF:Known AI Technical Failure:Misuse", + "GMF:Potential AI Technical Failure:Unauthorized Data", + "GMF:Potential AI Technical Failure:Inadequate Anonymization", + "GMF:Potential AI Technical Failure:Context Misidentification", + "GMF:Potential AI Technical Failure:Lack of Capability Control", + "GMF:Potential AI Technical Failure:Underspecification", + "GMF:Potential AI Technical Failure Classification Discussion:Unauthorized Data: This may apply on an ethical level: presumably the users sign an agreement consenting to passive voice capture.\n\nInadequate Anonymization: This may apply on an ethical level: presumably the users sign an agreement consenting to passive voice capture.\n\nLack of Capability Control: This is relevant if the order went through.\n\nUnderspecification: Speaker diarization / recognition missing." + ], + "title": "Amazon Alexa Responding to Environmental Inputs" + } + ], + "tags": [ + "GMF:Known AI Technical Failure:Unsafe Exposure or Access" + ], + "title": "Unsafe Exposure or Access" + }, + { + "__typename": "RisksPayloadItem", + "precedents": [ + { + "__typename": "RisksPayloadPrecedent", + "description": "Facebook's automatic language translation software incorrectly translated an Arabic post saying \"Good morning\" into Hebrew saying \"hurt them,\" leading to the arrest of a Palestinian man in Beitar Illit, Israel.", + "incident_id": 72, + "tags": [ + "GMF:Known AI Goal:Translation", + "GMF:Known AI Goal Classification Discussion:Translation: Presumably the arabic dialect in the text is not represented adequately in the training data, hence the translation performance issues. \n", + "GMF:Known AI Technical Failure:Dataset Imbalance", + "GMF:Known AI Technical Failure:Distributional Bias", + "GMF:Known AI Technical Failure Classification Discussion:Dataset Imbalance: Presumably the arabic dialect in the text is not represented adequately in the training data, hence the translation performance issues. \n\n\nDistributional Bias: Biased language in Western / Israeli media texts about Arabs could build false associations and high priors to terrorism and violence.", + "GMF:Potential AI Technical Failure:Generalization Failure", + "GMF:Potential AI Technical Failure Classification Discussion:Generalization Failure: Perhaps only one (standard arabic) or a few dialects are supported, and one or more language models is used as fallback for all arabic languages.", + "GMF:Known AI Technology:Convolutional Neural Network", + "GMF:Known AI Technology:Recurrent Neural Network", + "GMF:Known AI Technology:Distributional Learning", + "GMF:Potential AI Technology:Intermediate modeling", + "GMF:Potential AI Technology:Classification", + "GMF:Potential AI Technology:Multimodal Learning", + "GMF:Potential AI Technology:Image Classification", + "GMF:Potential AI Technology Classification Discussion:Intermediate modeling: Perhaps intermmediate languages are used (i.e. if no model has been trained to translate X to Y, use X->Z and then Z->Y), which accumulate errors.\n\nClassification: GIven the amount of supported languages for translation, a system must exist to detect the input language and classify amongst supported languages.\n\nMultimodal Learning: If image was also utilized to generate the translation, that would provide additional evidence to the mistranslation.\n\nImage Classification: If multimodal learning is used, perhaps the buldozer was recognized and its extracted keyword contributed to the bias in the NLP domain." + ], + "title": "Facebook translates 'good morning' into 'attack them', leading to arrest" + } + ], + "tags": [ + "GMF:Known AI Technical Failure:Dataset Imbalance" + ], + "title": "Dataset Imbalance" + }, + { + "__typename": "RisksPayloadItem", + "precedents": [ + { + "__typename": "RisksPayloadPrecedent", + "description": "A publicly accessible research model that was trained via Reddit threads showed racially biased advice on moral dilemmas, allegedly demonstrating limitations of language-based models trained on moral judgments.", + "incident_id": 146, + "tags": [ + "GMF:Known AI Goal:Question Answering", + "GMF:Known AI Technology:Distributional Learning", + "GMF:Known AI Technology:Language Modeling", + "GMF:Potential AI Technology:Transformer", + "GMF:Known AI Technical Failure:Distributional Bias", + "GMF:Known AI Technical Failure:Gaming Vulnerability", + "GMF:Potential AI Technical Failure:Overfitting", + "GMF:Potential AI Technical Failure:Robustness Failure", + "GMF:Potential AI Technical Failure:Context Misidentification", + "GMF:Potential AI Technical Failure:Limited Dataset", + "GMF:Potential AI Technical Failure Classification Discussion:Limited Dataset: US ethics only, data sourced from two subreddits and a column." + ], + "title": "Research Prototype AI, Delphi, Reportedly Gave Racially Biased Answers on Ethics" + } + ], + "tags": [ + "GMF:Known AI Technical Failure:Gaming Vulnerability" + ], + "title": "Gaming Vulnerability" + }, + { + "__typename": "RisksPayloadItem", + "precedents": [ + { + "__typename": "RisksPayloadPrecedent", + "description": "Three make-up artists lost their positions following an algorithmically-assessed video interview by HireVue who reportedly failed to provide adequate explanation of the findings.", + "incident_id": 192, + "tags": [ + "GMF:Known AI Goal:Automatic Skill Assessment", + "GMF:Known AI Technology:Automatic Speech Recognition", + "GMF:Potential AI Technology:Regression", + "GMF:Potential AI Technology:Classification", + "GMF:Potential AI Technical Failure:Dataset Imbalance", + "GMF:Potential AI Technical Failure:Context Misidentification", + "GMF:Potential AI Technical Failure:Inadequate Data Sampling", + "GMF:Potential AI Technical Failure:Problematic Input", + "GMF:Known AI Technical Failure:Lack of Explainability", + "GMF:Known AI Technical Failure:Incomplete Data Attribute Capture", + "GMF:Known AI Technical Failure Classification Discussion:Incomplete Data Attribute Capture: Makeup skill assessment with verbal descriptions, rather visual media." + ], + "title": "Three Make-Up Artists Lost Jobs Following Black-Box Automated Decision by HireVue" + } + ], + "tags": [ + "GMF:Known AI Technical Failure:Lack of Explainability" + ], + "title": "Lack of Explainability" + }, + { + "__typename": "RisksPayloadItem", + "precedents": [ + { + "__typename": "RisksPayloadPrecedent", + "description": "Three make-up artists lost their positions following an algorithmically-assessed video interview by HireVue who reportedly failed to provide adequate explanation of the findings.", + "incident_id": 192, + "tags": [ + "GMF:Known AI Goal:Automatic Skill Assessment", + "GMF:Known AI Technology:Automatic Speech Recognition", + "GMF:Potential AI Technology:Regression", + "GMF:Potential AI Technology:Classification", + "GMF:Potential AI Technical Failure:Dataset Imbalance", + "GMF:Potential AI Technical Failure:Context Misidentification", + "GMF:Potential AI Technical Failure:Inadequate Data Sampling", + "GMF:Potential AI Technical Failure:Problematic Input", + "GMF:Known AI Technical Failure:Lack of Explainability", + "GMF:Known AI Technical Failure:Incomplete Data Attribute Capture", + "GMF:Known AI Technical Failure Classification Discussion:Incomplete Data Attribute Capture: Makeup skill assessment with verbal descriptions, rather visual media." + ], + "title": "Three Make-Up Artists Lost Jobs Following Black-Box Automated Decision by HireVue" + } + ], + "tags": [ + "GMF:Known AI Technical Failure:Incomplete Data Attribute Capture" + ], + "title": "Incomplete Data Attribute Capture" + }, + { + "__typename": "RisksPayloadItem", + "precedents": [ + { + "__typename": "RisksPayloadPrecedent", + "description": "In Taiwan, a Tesla Model 3 on Autopilot mode whose driver did not pay attention to the road collided with a road repair truck; a road engineer immediately placed crash warnings in front of the Tesla, but soon after got hit and was killed by a BMW when its driver failed to see the sign and crashed into the accident.", + "incident_id": 221, + "tags": [ + "GMF:Potential AI Technology:Convolutional Neural Network", + "GMF:Potential AI Technology:Visual Object Detection", + "GMF:Potential AI Technology:Classification", + "GMF:Known AI Technology:Image Segmentation", + "GMF:Potential AI Technology Classification Discussion:Visual Object Detection: Potentially subtask of segmentation.\n\nClassification: Potentially subtask of segmentation.", + "GMF:Known AI Technical Failure:Misuse", + "GMF:Known AI Technical Failure:Generalization Failure", + "GMF:Known AI Technical Failure Classification Discussion:Misuse: Driver should not use autopilot without supervision.", + "GMF:Known AI Goal:Autonomous Driving" + ], + "title": "A Road Engineer Killed Following a Collision Involving a Tesla on Autopilot" + } + ], + "tags": [ + "GMF:Known AI Technical Failure:Generalization Failure" + ], + "title": "Generalization Failure" + }, + { + "__typename": "RisksPayloadItem", + "precedents": [ + { + "__typename": "RisksPayloadPrecedent", + "description": "The French digital care company, Nabla, in researching GPT-3’s capabilities for medical documentation, diagnosis support, and treatment recommendation, found its inconsistency and lack of scientific and medical expertise unviable and risky in healthcare applications. This incident has been downgraded to an issue as it does not meet current ingestion criteria.", + "incident_id": 287, + "tags": [ + "GMF:Known AI Goal:Question Answering", + "GMF:Known AI Technology:Transformer", + "GMF:Known AI Technology:Language Modeling", + "GMF:Known AI Technology:Distributional Learning", + "GMF:Known AI Technology Classification Discussion:Distributional Learning: If no training data citing sources is available and/or not enough data from a medical domain were available.", + "GMF:Potential AI Technical Failure:Limited Dataset", + "GMF:Potential AI Technical Failure:Problematic Input", + "GMF:Potential AI Technical Failure:Robustness Failure", + "GMF:Potential AI Technical Failure:Overfitting", + "GMF:Potential AI Technical Failure:Underfitting", + "GMF:Potential AI Technical Failure:Inadequate Sequential Memory", + "GMF:Potential AI Technical Failure Classification Discussion:Limited Dataset: If no training data citing sources is available and/or not enough data from a medical domain were available.\n\nProblematic Input: If the prompt does not state that references are required.\n\nOverfitting: System does not capture the semantic content of the prompt, but focuses on specific verbage.\n\nUnderfitting: Due to lack of fine-tuning, the model can be considered as having a poor fit for specific medical questions.", + "GMF:Known AI Technical Failure:Distributional Artifacts" + ], + "title": "OpenAI’s GPT-3 Reported as Unviable in Medical Tasks by Healthcare Firm" + } + ], + "tags": [ + "GMF:Known AI Technical Failure:Distributional Artifacts" + ], + "title": "Distributional Artifacts" + } + ] + } +} diff --git a/site/gatsby-site/src/components/checklists/CheckListForm.js b/site/gatsby-site/src/components/checklists/CheckListForm.js index c276edda20..18fb1f4ad8 100644 --- a/site/gatsby-site/src/components/checklists/CheckListForm.js +++ b/site/gatsby-site/src/components/checklists/CheckListForm.js @@ -536,7 +536,7 @@ const searchRisks = async ({ const risksResponse = await apolloClient.query({ query: gql` - query { + query findRisks { risks(input: { tags: [${queryTags.map((t) => `"${t}"`).join(', ')}] }) { tags title From ea0955c47143df05509a8f4ee289788d12259d51 Mon Sep 17 00:00:00 2001 From: Luna McNulty Date: Fri, 29 Mar 2024 14:19:22 -0400 Subject: [PATCH 2/2] Update search to match new query --- .../cypress/e2e/integration/apps/checklistsForm.cy.js | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/site/gatsby-site/cypress/e2e/integration/apps/checklistsForm.cy.js b/site/gatsby-site/cypress/e2e/integration/apps/checklistsForm.cy.js index 96555ad6f9..eb5795dcb8 100644 --- a/site/gatsby-site/cypress/e2e/integration/apps/checklistsForm.cy.js +++ b/site/gatsby-site/cypress/e2e/integration/apps/checklistsForm.cy.js @@ -62,12 +62,9 @@ describe('Checklists App Form', () => { }; const interceptFindRisks = (risks) => { - cy.conditionalIntercept( - '**/graphql', - (req) => req.body.operationName == 'findRisks', - 'findRisks', - { data: { risks } } - ); + cy.conditionalIntercept('**/graphql', (req) => req.body.query.includes('GMF'), 'findRisks', { + data: { risks }, + }); }; it('Should have read-only access for non-logged-in users', () => {