diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f6c3ace..3632b87 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -52,7 +52,7 @@ check spelling and syntax: - changes: - data/*.yaml # Source data was updated - tests/*.py # Any tests changed - - tests/custom_words.txt # Exclusion words updated + - tests/custom_words.txt # Exclusion words updated - conftest.py # Any test fixtures changed validate data: diff --git a/CHANGELOG.md b/CHANGELOG.md index 39c8028..8fd9403 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # ATLAS Data Changelog +## [4.4.0]() (2023-04-12) + +Initial mitigations + ## [4.3.0]() (2023-02-28) New case study on prompt injection and adapted new associated techniques from ATT&CK. diff --git a/data/case-studies/AML.CS0009.yaml b/data/case-studies/AML.CS0009.yaml index 592483e..9154233 100644 --- a/data/case-studies/AML.CS0009.yaml +++ b/data/case-studies/AML.CS0009.yaml @@ -45,6 +45,8 @@ case-study-type: incident references: - title: 'AIID - Incident 6: TayBot' url: https://incidentdatabase.ai/cite/6 +- title: 'AVID - Vulnerability: AVID-2022-v013' + url: https://avidml.org/database/avid-2022-v013/ - title: Microsoft BlogPost, "Learning from Tay's introduction" url: https://blogs.microsoft.com/blog/2016/03/25/learning-tays-introduction/ - title: IEEE Article, "In 2016, Microsoft's Racist Chatbot Revealed the Dangers of diff --git a/data/data.yaml b/data/data.yaml index 4a3f3e9..b48c4f5 100644 --- a/data/data.yaml +++ b/data/data.yaml @@ -2,7 +2,7 @@ id: ATLAS name: Adversarial Threat Landscape for AI Systems -version: 4.3.0 +version: 4.4.0 matrices: - !include . diff --git a/data/matrix.yaml b/data/matrix.yaml index 5f4a23c..ee9ae5f 100644 --- a/data/matrix.yaml +++ b/data/matrix.yaml @@ -20,3 +20,4 @@ tactics: data: - !include tactics.yaml - !include techniques.yaml + - !include mitigations.yaml diff --git a/data/mitigations.yaml b/data/mitigations.yaml new file mode 100644 index 0000000..42aeef4 --- /dev/null +++ b/data/mitigations.yaml @@ -0,0 +1,456 @@ +--- +- &limit_info_release + id: AML.M0000 + name: Limit Release of Public Information + object-type: mitigation + tags: + - Model Agnostic + description: > + Limit the public release of technical information about the machine learning stack used in an organization's products or services. + Technical knowledge of how machine learning is used can be leveraged by adversaries to perform targeting and tailor attacks to the target system. + Additionally, consider limiting the release of organizational information - including physical locations, researcher names, and department structures - from which technical details such as machine learning techniques, model architectures, or datasets may be inferred. + techniques: + - id: '{{victim_research.id}}' + use: | + Limit the connection between publicly disclosed approaches and the data, models, and algorithms used in production. + - id: '{{victim_website.id}}' + use: | + Restrict release of technical information on ML-enabled products and organizational information on the teams supporting ML-enabled products. +- &limit_model_release + id: AML.M0001 + name: Limit Model Artifact Release + object-type: mitigation + tags: + - Model Agnostic + - Data Focused + - Model Focused + description: | + Limit public release of technical project details including data, algorithms, model architectures, and model checkpoints that are used in production, or that are representative of those used in production. + techniques: + - id: '{{acquire_ml_artifacts_data.id}}' + use: | + Limiting the release of datasets can reduce an adversary's ability to target production models trained on the same or similar data. + - id: '{{acquire_ml_artifacts_model.id}}' + use: | + Limiting the release of model architectures and checkpoints can reduce an adversary's ability to target those models. + - id: '{{poison_data.id}}' + use: | + Published datasets can be a target for poisoning attacks. +- &passive_output_obfuscation + id: AML.M0002 + name: Passive ML Output Obfuscation + object-type: mitigation + tags: + - Passive + - Model Agnostic + - Model Operations + description: | + Decreasing the fidelity of model outputs provided to the end user can reduce an adversaries ability to extract information about the model and optimize attacks for the model. + techniques: + - id: '{{discover_model_ontology.id}}' + use: | + Suggested approaches: + - Restrict the number of results shown + - Limit specificity of output class ontology + - Use randomized smoothing techniques + - Reduce the precision of numerical outputs + - id: '{{discover_model_family.id}}' + use: | + Suggested approaches: + - Restrict the number of results shown + - Limit specificity of output class ontology + - Use randomized smoothing techniques + - Reduce the precision of numerical outputs + - id: '{{craft_adv_blackbox.id}}' + use: | + Suggested approaches: + - Restrict the number of results shown + - Limit specificity of output class ontology + - Use randomized smoothing techniques + - Reduce the precision of numerical outputs + - id: '{{membership_inference.id}}' + use: | + Suggested approaches: + - Restrict the number of results shown + - Limit specificity of output class ontology + - Use randomized smoothing techniques + - Reduce the precision of numerical outputs + - id: '{{model_inversion.id}}' + use: | + Suggested approaches: + - Restrict the number of results shown + - Limit specificity of output class ontology + - Use randomized smoothing techniques + - Reduce the precision of numerical outputs + - id: '{{extract_model.id}}' + use: | + Suggested approaches: + - Restrict the number of results shown + - Limit specificity of output class ontology + - Use randomized smoothing techniques + - Reduce the precision of numerical outputs +- &model_hardening + id: AML.M0003 + name: Model Hardening + object-type: mitigation + tags: + - Model Focused + - Model Enhancement + - Model Development + description: | + Use techniques to make machine learning models robust to adversarial inputs such as adversarial training or network distillation. + techniques: + - id: '{{evade_model.id}}' + use: | + Hardened models are more difficult to evade. + - id: '{{erode_integrity.id}}' + use: | + Hardened models are less susceptible to integrity attacks. +- &restrict_queries + id: AML.M0004 + name: Restrict Number of ML Model Queries + object-type: mitigation + tags: + - Passive + - Model Agnostic + - Model Operations + description: | + Limit the total number and rate of queries a user can perform. + techniques: + - id: '{{cost_harvesting.id}}' + use: | + Limit the number of queries users can perform in a given interval to hinder an attacker's ability to send computationally expensive inputs + - id: '{{discover_model_ontology.id}}' + use: | + Limit the amount of information an attacker can learn about a model's ontology through API queries. + - id: '{{discover_model_family.id}}' + use: | + Limit the amount of information an attacker can learn about a model's ontology through API queries. + - id: '{{exfiltrate_via_api.id}}' + use: | + Limit the volume of API queries in a given period of time to regulate the amount and fidelity of potentially sensitive information an attacker can learn. + - id: '{{membership_inference.id}}' + use: | + Limit the volume of API queries in a given period of time to regulate the amount and fidelity of potentially sensitive information an attacker can learn. + - id: '{{model_inversion.id}}' + use: | + Limit the volume of API queries in a given period of time to regulate the amount and fidelity of potentially sensitive information an attacker can learn. + - id: '{{extract_model.id}}' + use: | + Limit the volume of API queries in a given period of time to regulate the amount and fidelity of potentially sensitive information an attacker can learn. + - id: '{{craft_adv_blackbox.id}}' + use: | + Limit the number of queries users can perform in a given interval to shrink the attack surface for black-box attacks. + - id: '{{ml_dos.id}}' + use: | + Limit the number of queries users can perform in a given interval to prevent a denial of service. + - id: '{{chaff_data.id}}' + use: | + Limit the number of queries users can perform in a given interval to protect the system from chaff data spam. +- &control_access_rest + id: AML.M0005 + name: Control Access to ML Models and Data at Rest + object-type: mitigation + tags: + - Model Agnostic + - Production + description: | + Establish access controls on internal model registries and limit internal access to production models. Limit access to training data only to approved users. + techniques: + - id: '{{supply_chain_data.id}}' + use: | + Access controls can prevent tampering with ML artifacts and prevent unauthorized copying. + - id: '{{poison_data.id}}' + use: | + Access controls can prevent tampering with ML artifacts and prevent unauthorized copying. + - id: '{{poison_model.id}}' + use: | + Access controls can prevent tampering with ML artifacts and prevent unauthorized copying. + - id: '{{inject_payload.id}}' + use: | + Access controls can prevent tampering with ML artifacts and prevent unauthorized copying. + - id: '{{supply_chain_model.id}}' + use: | + Access controls can prevent tampering with ML artifacts and prevent unauthorized copying. + - id: '{{exfiltrate_via_cyber.id}}' + use: | + Access controls can prevent exfiltration. + - id: '{{ip_theft.id}}' + use: | + Access controls can prevent theft of intellectual property. +- &ensemble_methods + id: AML.M0006 + name: Use Ensemble Methods + object-type: mitigation + tags: + - Model Focused + - Model Enhancement + - Model Development + description: | + Use an ensemble of models for inference to increase robustness to adversarial inputs. Some attacks may effectively evade one model or model family but be ineffective against others. + techniques: + - id: '{{erode_integrity.id}}' + use: | + Using multiple different models increases robustness to attack. + - id: '{{supply_chain_software.id}}' + use: | + Using multiple different models ensures minimal performance loss if security flaw is found in tool for one model or family. + - id: '{{supply_chain_model.id}}' + use: | + Using multiple different models ensures minimal performance loss if security flaw is found in tool for one model or family. + - id: '{{evade_model.id}}' + use: | + Using multiple different models increases robustness to attack. + - id: '{{discover_model_family.id}}' + use: | + Use multiple different models to fool adversaries of which type of model is used and how the model used. +- &sanitize_training_data + id: AML.M0007 + name: Sanitize Training Data + object-type: mitigation + tags: + - Passive + - Active + - Data Focused + - Model Agnostic + - Model Enhancement + - Model Development + - Model Operations + description: | + Detect and remove or remediate poisoned training data. Training data should be sanitized prior to model training and recurrently for an active learning model. + + Implement a filter to limit ingested training data. Establish a content policy that would remove unwanted content such as certain explicit or offensive language from being used. + techniques: + - id: '{{supply_chain_data.id}}' + use: | + Detect and remove or remediate poisoned data to avoid adversarial model drift or backdoor attacks. + - id: '{{poison_data.id}}' + use: | + Detect modification of data and labels which may cause adversarial model drift or backdoor attacks. + - id: '{{poison_model.id}}' + use: | + Prevent attackers from leveraging poisoned datasets to launch backdoor attacks against a model. +- &validate_model + id: AML.M0008 + name: Validate ML Model + object-type: mitigation + tags: + - Active + - Model Focused + - Model Agnostic + - Model Development + description: | + Validate that machine learning models perform as intended by testing for backdoor triggers or adversarial bias. + techniques: + - id: '{{supply_chain_model.id}}' + use: | + Ensure that acquired models do not respond to potential backdoor triggers or adversarial bias. + - id: '{{poison_model.id}}' + use: | + Ensure that trained models do not respond to potential backdoor triggers or adversarial bias. + - id: '{{inject_payload.id}}' + use: | + Ensure that acquired models do not respond to potential backdoor triggers or adversarial bias. +- &multi_modal_sensors + id: AML.M0009 + name: Use Multi-Modal Sensors + object-type: mitigation + tags: + - Model Enhancement + - Model Focused + - Data Focused + - Model Development + description: | + Incorporate multiple sensors to integrate varying perspectives and modalities to avoid a single point of failure susceptible to physical attacks. + techniques: + - id: '{{physical_env.id}}' + use: | + Using a variety of sensors can make it more difficult for an attacker with physical access to compromise and produce malicious results. + - id: '{{evade_model.id}}' + use: | + Using a variety of sensors can make it more difficult for an attacker to compromise and produce malicious results. +- &input_restoration + id: AML.M0010 + name: Input Restoration + object-type: mitigation + tags: + - Passive + - Data Focused + - Model Agnostic + - Model Operations + description: | + Preprocess all inference data to nullify or reverse potential adversarial perturbations. + techniques: + - id: '{{craft_adv_blackbox.id}}' + use: | + Input restoration adds an extra layer of unknowns and randomness when an adversary evaluates the input-output relationship. + - id: '{{evade_model.id}}' + use: | + Preprocessing model inputs can prevent malicious data from going through the machine learning pipeline. + - id: '{{erode_integrity.id}}' + use: | + Preprocessing model inputs can prevent malicious data from going through the machine learning pipeline. +- &restrict_lib_loading + id: AML.M0011 + name: Restrict Library Loading + object-type: mitigation + tags: + - Model Agnostic + description: | + Prevent abuse of library loading mechanisms in the operating system and software to load untrusted code by configuring appropriate library loading mechanisms and investigating potential vulnerable software. + + File formats such as pickle files that are commonly used to store machine learning models can contain exploits that allow for loading of malicious libraries. + techniques: + - id: '{{unsafe_ml_artifacts.id}}' + use: | + Restrict library loading by ML artifacts. + ATT&CK-reference: + id: M1044 + url: https://attack.mitre.org/mitigations/M1044/ +- &encrypt_info + id: AML.M0012 + name: Encrypt Sensitive Information + object-type: mitigation + tags: + - Model Agnostic + - Model Operations + description: | + Encrypt sensitive data such as ML models to protect against adversaries attempting to access sensitive data. + ATT&CK-reference: + id: M1041 + url: https://attack.mitre.org/mitigations/M1041/ + techniques: + - id: '{{ml_artifact_collection.id}}' + use: | + Protect machine learning artifacts with encryption. + - id: '{{ip_theft.id}}' + use: | + Protect machine learning artifacts with encryption. + - id: '{{discover_ml_artifacts.id}}' + use: | + Protect machine learning artifacts from adversaries who gather private information to target and improve attacks. +- &code_signing + id: AML.M0013 + name: Code Signing + object-type: mitigation + tags: + - Model Agnostic + - Model Development + - Model Operations + description: | + Enforce binary and application integrity with digital signature verification to prevent untrusted code from executing. Adversaries can embed malicious code in ML software or models. Enforcement of code signing can prevent the compromise of the machine learning supply chain and prevent execution of malicious code. + techniques: + - id: '{{unsafe_ml_artifacts.id}}' + use: | + Prevent execution of ML artifacts that are not properly signed. + - id: '{{supply_chain_software.id}}' + use: | + Enforce properly signed drivers and ML software frameworks. + - id: '{{supply_chain_model.id}}' + use: | + Enforce properly signed model files. + ATT&CK-reference: + id: M1045 + url: https://attack.mitre.org/mitigations/M1045/ +- &verify_ml_artifacts + id: AML.M0014 + name: Verify ML Artifacts + object-type: mitigation + tags: + - Model Focused + - Data Focused + - Model Agnostic + - Model Development + description: | + Verify the cryptographic checksum of all machine learning artifacts to verify that the file was not modified by an attacker. + techniques: + - id: '{{publish_poisoned_data.id}}' + use: | + Determine validity of published data in order to avoid using poisoned data that introduces vulnerabilities. + - id: '{{unsafe_ml_artifacts.id}}' + use: | + Introduce proper checking of signatures to ensure that unsafe ML artifacts will not be executed in the system. + - id: '{{supply_chain.id}}' + use: | + Introduce proper checking of signatures to ensure that unsafe ML artifacts will not be introduced to the system. +- &adv_input_detection + id: AML.M0015 + name: Adversarial Input Detection + object-type: mitigation + tags: + - Active + - Data Focused + - Model Agnostic + - Model Operations + description: > + Detect and block adversarial inputs or atypical queries that deviate from known benign behavior, exhibit behavior patterns observed in previous attacks or that come from potentially malicious IPs. + + Incorporate adversarial detection algorithms into the ML system prior to the ML model. + techniques: + - id: '{{evade_model.id}}' + use: | + Prevent an attacker from introducing adversarial data into the system. + - id: '{{craft_adv_blackbox.id}}' + use: | + Monitor queries and query patterns to the target model, block access if suspicious queries are detected. + - id: '{{ml_dos.id}}' + use: | + Assess queries before inference call or enforce timeout policy for queries which consume excessive resources. + - id: '{{erode_integrity.id}}' + use: | + Incorporate adversarial input detection into the pipeline before inputs reach the model. +- &vuln_scanning + id: AML.M0016 + name: Vulnerability Scanning + object-type: mitigation + tags: + - Active + - Model Agnostic + description: | + Vulnerability scanning is used to find potentially exploitable software vulnerabilities to remediate them. + + File formats such as pickle files that are commonly used to store machine learning models can contain exploits that allow for arbitrary code execution. + techniques: + - id: '{{unsafe_ml_artifacts.id}}' + use: | + Scan ML artifacts for vulnerabilities before execution. + ATT&CK-reference: + id: M1016 + url: https://attack.mitre.org/mitigations/M1016/ +- &distribution_methods + id: AML.M0017 + name: Model Distribution Methods + object-type: mitigation + tags: + - Model Focused + - Model Agnostic + description: | + Deploying ML models to edge devices can increase the attack surface of the system. Consider serving models in the cloud to reduce the level of access the adversary has to the model. + techniques: + - id: '{{full_access.id}}' + use: | + Not distributing the model in software to edge devices, can limit an adversary's ability to gain full access to the model. + - id: '{{craft_adv_whitebox.id}}' + use: | + With full access to the model, an adversary could perform white-box attacks. + - id: '{{supply_chain_model.id}}' + use: | + An adversary could repackage the application with a malicious version of the model. +- &user_training + id: AML.M0018 + name: User Training + object-type: mitigation + tags: + - Model Agnostic + description: | + Educate ML model developers on secure coding practices and ML vulnerabilities. + techniques: + - id: '{{user_execution.id}}' + use: | + Training users to be able to identify attempts at manipulation will make them less susceptible to performing techniques that cause the execution of malicious code. + - id: '{{unsafe_ml_artifacts.id}}' + use: | + Train users to identify attempts of manipulation to prevent them from running unsafe code which when executed could develop unsafe artifacts. These artifacts may have a detrimental effect on the system. + ATT&CK-reference: + id: M1017 + url: https://attack.mitre.org/mitigations/M1017/ diff --git a/dist/ATLAS.yaml b/dist/ATLAS.yaml index 37a80d5..eaf648b 100644 --- a/dist/ATLAS.yaml +++ b/dist/ATLAS.yaml @@ -1,7 +1,7 @@ --- id: ATLAS name: Adversarial Threat Landscape for AI Systems -version: 4.3.0 +version: 4.4.0 matrices: - id: ATLAS name: ATLAS Machine Learning Threat Matrix @@ -1429,6 +1429,593 @@ matrices: ATT&CK-reference: id: T1159 url: https://attack.mitre.org/techniques/T1059/ + mitigations: + - id: AML.M0000 + name: Limit Release of Public Information + object-type: mitigation + tags: + - Model Agnostic + description: 'Limit the public release of technical information about the machine + learning stack used in an organization''s products or services. Technical knowledge + of how machine learning is used can be leveraged by adversaries to perform targeting + and tailor attacks to the target system. Additionally, consider limiting the + release of organizational information - including physical locations, researcher + names, and department structures - from which technical details such as machine + learning techniques, model architectures, or datasets may be inferred. + + ' + techniques: + - id: AML.T0000 + use: 'Limit the connection between publicly disclosed approaches and the data, + models, and algorithms used in production. + + ' + - id: AML.T0003 + use: 'Restrict release of technical information on ML-enabled products and organizational + information on the teams supporting ML-enabled products. + + ' + - id: AML.M0001 + name: Limit Model Artifact Release + object-type: mitigation + tags: + - Model Agnostic + - Data Focused + - Model Focused + description: 'Limit public release of technical project details including data, + algorithms, model architectures, and model checkpoints that are used in production, + or that are representative of those used in production. + + ' + techniques: + - id: AML.T0002.000 + use: 'Limiting the release of datasets can reduce an adversary''s ability to + target production models trained on the same or similar data. + + ' + - id: AML.T0002.001 + use: 'Limiting the release of model architectures and checkpoints can reduce + an adversary''s ability to target those models. + + ' + - id: AML.T0020 + use: 'Published datasets can be a target for poisoning attacks. + + ' + - id: AML.M0002 + name: Passive ML Output Obfuscation + object-type: mitigation + tags: + - Passive + - Model Agnostic + - Model Operations + description: 'Decreasing the fidelity of model outputs provided to the end user + can reduce an adversaries ability to extract information about the model and + optimize attacks for the model. + + ' + techniques: + - id: AML.T0013 + use: "Suggested approaches:\n - Restrict the number of results shown\n - Limit\ + \ specificity of output class ontology\n - Use randomized smoothing techniques\n\ + \ - Reduce the precision of numerical outputs\n" + - id: AML.T0014 + use: "Suggested approaches:\n - Restrict the number of results shown\n - Limit\ + \ specificity of output class ontology\n - Use randomized smoothing techniques\n\ + \ - Reduce the precision of numerical outputs\n" + - id: AML.T0043.001 + use: "Suggested approaches:\n - Restrict the number of results shown\n - Limit\ + \ specificity of output class ontology\n - Use randomized smoothing techniques\n\ + \ - Reduce the precision of numerical outputs\n" + - id: AML.T0024.000 + use: "Suggested approaches:\n - Restrict the number of results shown\n - Limit\ + \ specificity of output class ontology\n - Use randomized smoothing techniques\n\ + \ - Reduce the precision of numerical outputs\n" + - id: AML.T0024.001 + use: "Suggested approaches:\n - Restrict the number of results shown\n - Limit\ + \ specificity of output class ontology\n - Use randomized smoothing techniques\n\ + \ - Reduce the precision of numerical outputs\n" + - id: AML.T0024.002 + use: "Suggested approaches:\n - Restrict the number of results shown\n - Limit\ + \ specificity of output class ontology\n - Use randomized smoothing techniques\n\ + \ - Reduce the precision of numerical outputs\n" + - id: AML.M0003 + name: Model Hardening + object-type: mitigation + tags: + - Model Focused + - Model Enhancement + - Model Development + description: 'Use techniques to make machine learning models robust to adversarial + inputs such as adversarial training or network distillation. + + ' + techniques: + - id: AML.T0015 + use: 'Hardened models are more difficult to evade. + + ' + - id: AML.T0031 + use: 'Hardened models are less susceptible to integrity attacks. + + ' + - id: AML.M0004 + name: Restrict Number of ML Model Queries + object-type: mitigation + tags: + - Passive + - Model Agnostic + - Model Operations + description: 'Limit the total number and rate of queries a user can perform. + + ' + techniques: + - id: AML.T0034 + use: 'Limit the number of queries users can perform in a given interval to hinder + an attacker''s ability to send computationally expensive inputs + + ' + - id: AML.T0013 + use: 'Limit the amount of information an attacker can learn about a model''s + ontology through API queries. + + ' + - id: AML.T0014 + use: 'Limit the amount of information an attacker can learn about a model''s + ontology through API queries. + + ' + - id: AML.T0024 + use: 'Limit the volume of API queries in a given period of time to regulate + the amount and fidelity of potentially sensitive information an attacker can + learn. + + ' + - id: AML.T0024.000 + use: 'Limit the volume of API queries in a given period of time to regulate + the amount and fidelity of potentially sensitive information an attacker can + learn. + + ' + - id: AML.T0024.001 + use: 'Limit the volume of API queries in a given period of time to regulate + the amount and fidelity of potentially sensitive information an attacker can + learn. + + ' + - id: AML.T0024.002 + use: 'Limit the volume of API queries in a given period of time to regulate + the amount and fidelity of potentially sensitive information an attacker can + learn. + + ' + - id: AML.T0043.001 + use: 'Limit the number of queries users can perform in a given interval to shrink + the attack surface for black-box attacks. + + ' + - id: AML.T0029 + use: 'Limit the number of queries users can perform in a given interval to prevent + a denial of service. + + ' + - id: AML.T0046 + use: 'Limit the number of queries users can perform in a given interval to protect + the system from chaff data spam. + + ' + - id: AML.M0005 + name: Control Access to ML Models and Data at Rest + object-type: mitigation + tags: + - Model Agnostic + - Production + description: 'Establish access controls on internal model registries and limit + internal access to production models. Limit access to training data only to + approved users. + + ' + techniques: + - id: AML.T0010.002 + use: 'Access controls can prevent tampering with ML artifacts and prevent unauthorized + copying. + + ' + - id: AML.T0020 + use: 'Access controls can prevent tampering with ML artifacts and prevent unauthorized + copying. + + ' + - id: AML.T0018.000 + use: 'Access controls can prevent tampering with ML artifacts and prevent unauthorized + copying. + + ' + - id: AML.T0018.001 + use: 'Access controls can prevent tampering with ML artifacts and prevent unauthorized + copying. + + ' + - id: AML.T0010.003 + use: 'Access controls can prevent tampering with ML artifacts and prevent unauthorized + copying. + + ' + - id: AML.T0025 + use: 'Access controls can prevent exfiltration. + + ' + - id: AML.T0045 + use: 'Access controls can prevent theft of intellectual property. + + ' + - id: AML.M0006 + name: Use Ensemble Methods + object-type: mitigation + tags: + - Model Focused + - Model Enhancement + - Model Development + description: 'Use an ensemble of models for inference to increase robustness to + adversarial inputs. Some attacks may effectively evade one model or model family + but be ineffective against others. + + ' + techniques: + - id: AML.T0031 + use: 'Using multiple different models increases robustness to attack. + + ' + - id: AML.T0010.001 + use: 'Using multiple different models ensures minimal performance loss if security + flaw is found in tool for one model or family. + + ' + - id: AML.T0010.003 + use: 'Using multiple different models ensures minimal performance loss if security + flaw is found in tool for one model or family. + + ' + - id: AML.T0015 + use: 'Using multiple different models increases robustness to attack. + + ' + - id: AML.T0014 + use: 'Use multiple different models to fool adversaries of which type of model + is used and how the model used. + + ' + - id: AML.M0007 + name: Sanitize Training Data + object-type: mitigation + tags: + - Passive + - Active + - Data Focused + - Model Agnostic + - Model Enhancement + - Model Development + - Model Operations + description: 'Detect and remove or remediate poisoned training data. Training + data should be sanitized prior to model training and recurrently for an active + learning model. + + + Implement a filter to limit ingested training data. Establish a content policy + that would remove unwanted content such as certain explicit or offensive language + from being used. + + ' + techniques: + - id: AML.T0010.002 + use: 'Detect and remove or remediate poisoned data to avoid adversarial model + drift or backdoor attacks. + + ' + - id: AML.T0020 + use: 'Detect modification of data and labels which may cause adversarial model + drift or backdoor attacks. + + ' + - id: AML.T0018.000 + use: 'Prevent attackers from leveraging poisoned datasets to launch backdoor + attacks against a model. + + ' + - id: AML.M0008 + name: Validate ML Model + object-type: mitigation + tags: + - Active + - Model Focused + - Model Agnostic + - Model Development + description: 'Validate that machine learning models perform as intended by testing + for backdoor triggers or adversarial bias. + + ' + techniques: + - id: AML.T0010.003 + use: 'Ensure that acquired models do not respond to potential backdoor triggers + or adversarial bias. + + ' + - id: AML.T0018.000 + use: 'Ensure that trained models do not respond to potential backdoor triggers + or adversarial bias. + + ' + - id: AML.T0018.001 + use: 'Ensure that acquired models do not respond to potential backdoor triggers + or adversarial bias. + + ' + - id: AML.M0009 + name: Use Multi-Modal Sensors + object-type: mitigation + tags: + - Model Enhancement + - Model Focused + - Data Focused + - Model Development + description: 'Incorporate multiple sensors to integrate varying perspectives and + modalities to avoid a single point of failure susceptible to physical attacks. + + ' + techniques: + - id: AML.T0041 + use: 'Using a variety of sensors can make it more difficult for an attacker + with physical access to compromise and produce malicious results. + + ' + - id: AML.T0015 + use: 'Using a variety of sensors can make it more difficult for an attacker + to compromise and produce malicious results. + + ' + - id: AML.M0010 + name: Input Restoration + object-type: mitigation + tags: + - Passive + - Data Focused + - Model Agnostic + - Model Operations + description: 'Preprocess all inference data to nullify or reverse potential adversarial + perturbations. + + ' + techniques: + - id: AML.T0043.001 + use: 'Input restoration adds an extra layer of unknowns and randomness when + an adversary evaluates the input-output relationship. + + ' + - id: AML.T0015 + use: 'Preprocessing model inputs can prevent malicious data from going through + the machine learning pipeline. + + ' + - id: AML.T0031 + use: 'Preprocessing model inputs can prevent malicious data from going through + the machine learning pipeline. + + ' + - id: AML.M0011 + name: Restrict Library Loading + object-type: mitigation + tags: + - Model Agnostic + description: 'Prevent abuse of library loading mechanisms in the operating system + and software to load untrusted code by configuring appropriate library loading + mechanisms and investigating potential vulnerable software. + + + File formats such as pickle files that are commonly used to store machine learning + models can contain exploits that allow for loading of malicious libraries. + + ' + techniques: + - id: AML.T0011.000 + use: 'Restrict library loading by ML artifacts. + + ' + ATT&CK-reference: + id: M1044 + url: https://attack.mitre.org/mitigations/M1044/ + - id: AML.M0012 + name: Encrypt Sensitive Information + object-type: mitigation + tags: + - Model Agnostic + - Model Operations + description: 'Encrypt sensitive data such as ML models to protect against adversaries + attempting to access sensitive data. + + ' + ATT&CK-reference: + id: M1041 + url: https://attack.mitre.org/mitigations/M1041/ + techniques: + - id: AML.T0035 + use: 'Protect machine learning artifacts with encryption. + + ' + - id: AML.T0045 + use: 'Protect machine learning artifacts with encryption. + + ' + - id: AML.T0007 + use: 'Protect machine learning artifacts from adversaries who gather private + information to target and improve attacks. + + ' + - id: AML.M0013 + name: Code Signing + object-type: mitigation + tags: + - Model Agnostic + - Model Development + - Model Operations + description: 'Enforce binary and application integrity with digital signature + verification to prevent untrusted code from executing. Adversaries can embed + malicious code in ML software or models. Enforcement of code signing can prevent + the compromise of the machine learning supply chain and prevent execution of + malicious code. + + ' + techniques: + - id: AML.T0011.000 + use: 'Prevent execution of ML artifacts that are not properly signed. + + ' + - id: AML.T0010.001 + use: 'Enforce properly signed drivers and ML software frameworks. + + ' + - id: AML.T0010.003 + use: 'Enforce properly signed model files. + + ' + ATT&CK-reference: + id: M1045 + url: https://attack.mitre.org/mitigations/M1045/ + - id: AML.M0014 + name: Verify ML Artifacts + object-type: mitigation + tags: + - Model Focused + - Data Focused + - Model Agnostic + - Model Development + description: 'Verify the cryptographic checksum of all machine learning artifacts + to verify that the file was not modified by an attacker. + + ' + techniques: + - id: AML.T0019 + use: 'Determine validity of published data in order to avoid using poisoned + data that introduces vulnerabilities. + + ' + - id: AML.T0011.000 + use: 'Introduce proper checking of signatures to ensure that unsafe ML artifacts + will not be executed in the system. + + ' + - id: AML.T0010 + use: 'Introduce proper checking of signatures to ensure that unsafe ML artifacts + will not be introduced to the system. + + ' + - id: AML.M0015 + name: Adversarial Input Detection + object-type: mitigation + tags: + - Active + - Data Focused + - Model Agnostic + - Model Operations + description: 'Detect and block adversarial inputs or atypical queries that deviate + from known benign behavior, exhibit behavior patterns observed in previous attacks + or that come from potentially malicious IPs. + + Incorporate adversarial detection algorithms into the ML system prior to the + ML model. + + ' + techniques: + - id: AML.T0015 + use: 'Prevent an attacker from introducing adversarial data into the system. + + ' + - id: AML.T0043.001 + use: 'Monitor queries and query patterns to the target model, block access if + suspicious queries are detected. + + ' + - id: AML.T0029 + use: 'Assess queries before inference call or enforce timeout policy for queries + which consume excessive resources. + + ' + - id: AML.T0031 + use: 'Incorporate adversarial input detection into the pipeline before inputs + reach the model. + + ' + - id: AML.M0016 + name: Vulnerability Scanning + object-type: mitigation + tags: + - Active + - Model Agnostic + description: 'Vulnerability scanning is used to find potentially exploitable software + vulnerabilities to remediate them. + + + File formats such as pickle files that are commonly used to store machine learning + models can contain exploits that allow for arbitrary code execution. + + ' + techniques: + - id: AML.T0011.000 + use: 'Scan ML artifacts for vulnerabilities before execution. + + ' + ATT&CK-reference: + id: M1016 + url: https://attack.mitre.org/mitigations/M1016/ + - id: AML.M0017 + name: Model Distribution Methods + object-type: mitigation + tags: + - Model Focused + - Model Agnostic + description: 'Deploying ML models to edge devices can increase the attack surface + of the system. Consider serving models in the cloud to reduce the level of access + the adversary has to the model. + + ' + techniques: + - id: AML.T0044 + use: 'Not distributing the model in software to edge devices, can limit an adversary''s + ability to gain full access to the model. + + ' + - id: AML.T0043.000 + use: 'With full access to the model, an adversary could perform white-box attacks. + + ' + - id: AML.T0010.003 + use: 'An adversary could repackage the application with a malicious version + of the model. + + ' + - id: AML.M0018 + name: User Training + object-type: mitigation + tags: + - Model Agnostic + description: 'Educate ML model developers on secure coding practices and ML vulnerabilities. + + ' + techniques: + - id: AML.T0011 + use: 'Training users to be able to identify attempts at manipulation will make + them less susceptible to performing techniques that cause the execution of + malicious code. + + ' + - id: AML.T0011.000 + use: 'Train users to identify attempts of manipulation to prevent them from + running unsafe code which when executed could develop unsafe artifacts. These + artifacts may have a detrimental effect on the system. + + ' + ATT&CK-reference: + id: M1017 + url: https://attack.mitre.org/mitigations/M1017/ case-studies: - id: AML.CS0000 object-type: case-study @@ -1998,6 +2585,8 @@ case-studies: references: - title: 'AIID - Incident 6: TayBot' url: https://incidentdatabase.ai/cite/6 + - title: 'AVID - Vulnerability: AVID-2022-v013' + url: https://avidml.org/database/avid-2022-v013/ - title: Microsoft BlogPost, "Learning from Tay's introduction" url: https://blogs.microsoft.com/blog/2016/03/25/learning-tays-introduction/ - title: IEEE Article, "In 2016, Microsoft's Racist Chatbot Revealed the Dangers diff --git a/schemas/atlas_obj.py b/schemas/atlas_obj.py index e23003f..b70574e 100644 --- a/schemas/atlas_obj.py +++ b/schemas/atlas_obj.py @@ -101,11 +101,19 @@ "description": str, Optional("techniques"): [ Or( - TECHNIQUE_ID_REGEX_EXACT, # top-level techniquye - SUBTECHNIQUE_ID_REGEX_EXACT # subtechnique - ), + TECHNIQUE_ID_REGEX_EXACT, # top-level techniquye + SUBTECHNIQUE_ID_REGEX_EXACT, # subtechnique + { # Specific mitigation for each technique + "id": Or ( + TECHNIQUE_ID_REGEX_EXACT, + SUBTECHNIQUE_ID_REGEX_EXACT + ), + "use": str + } + ), ] }, name="mitigation", - as_reference=True + as_reference=True, + ignore_extra_keys=True ) \ No newline at end of file