Skip to content

Commit

Permalink
working doc gen script
Browse files Browse the repository at this point in the history
  • Loading branch information
clavedeluna committed Oct 13, 2023
1 parent d908de3 commit 1e10304
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 26 deletions.
4 changes: 4 additions & 0 deletions src/codemodder/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ def description(self):
# TODO: temporary workaround
return self.METADATA.DESCRIPTION

@property
def review_guidance(self):
return self.METADATA.REVIEW_GUIDANCE.name.replace("_", " ").title()

@property
def yaml_files(self):
return [
Expand Down
2 changes: 1 addition & 1 deletion src/core_codemods/lxml_safe_parser_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
class LxmlSafeParserDefaults(SemgrepCodemod):
NAME = "safe-lxml-parser-defaults"
REVIEW_GUIDANCE = ReviewGuidance.MERGE_WITHOUT_REVIEW
SUMMARY = "Use safe defaults for lxml parsers"
SUMMARY = "Use Safe Defaults for lxml Parsers"
DESCRIPTION = "Replace lxml parser parameters with safe defaults"

@classmethod
Expand Down
176 changes: 151 additions & 25 deletions src/scripts/generate_docs.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,177 @@
import argparse
from dataclasses import dataclass
from codemodder.registry import load_registered_codemods
from pathlib import Path


@dataclass
class DocMetadata:
"""Codemod specific metadata only for documentation"""

importance: str
guidance_explained: str
need_sarif: str = "No"


# codemod-specific metadata that's used only for docs, not for codemod API
METADATA = {
"django-debug-flag-on": DocMetadata(
importance="Medium",
guidance_explained="Django's `DEBUG` flag may be overridden somewhere else or the runtime settings file may be set with the `DJANGO_SETTINGS_MODULE` environment variable. This means that the `DEBUG` flag may intentionally be left on as a development aid.",
),
"django-session-cookie-secure-off": DocMetadata(
importance="Medium",
guidance_explained="Django's `SESSION_COOKIE_SECURE` flag may be overridden somewhere else or the runtime settings file may be set with the `DJANGO_SETTINGS_MODULE` environment variable. This means that the flag may intentionally be left off or missing. Also some applications may still want to support pure http. This is often the case for legacy apps.",
),
"enable-jinja2-autoescape": DocMetadata(
importance="High",
guidance_explained="This codemod protects your applications against XSS attacks. We believe using the default behavior is unsafe.",
),
"fix-mutable-params": DocMetadata(
importance="Medium",
guidance_explained="We believe that this codemod fixes an unsafe practice and that the changes themselves are safe and reliable.",
),
"harden-pyyaml": DocMetadata(
importance="Medium",
guidance_explained="This codemod replaces any unsafe loaders with the `SafeLoader`, which is already the recommended replacement suggested in `yaml` documentation. We believe this replacement is safe and should not result in any issues.",
),
"harden-ruamel": DocMetadata(
importance="Medium",
guidance_explained="This codemod replaces any unsafe `typ` argument with `typ='safe'`, which makes safety explicit and is one of the recommended uses suggested in `ruamel` documentation. We believe this replacement is safe and should not result in any issues.",
),
"https-connection": DocMetadata(
importance="High",
guidance_explained="Support for HTTPS is widespread which, save in some legacy applications, makes this change safe.",
),
"jwt-decode-verify": DocMetadata(
importance="SOMETHING", guidance_explained="SOMETHING"
),
"limit-readline": DocMetadata(
importance="Medium",
guidance_explained="This codemod sets a maximum of 5MB allowed per line read by default. It is unlikely but possible that your code may receive lines that are greater than 5MB _and_ you'd still be interested in reading them, so there is some nominal risk of exceptional cases.",
),
"safe-lxml-parser-defaults": DocMetadata(
importance="High",
guidance_explained="We believe this change is safe, effective, and protects your code against very serious security attacks.",
),
"safe-lxml-parsing": DocMetadata(
importance="High",
guidance_explained="We believe this change is safe, effective, and protects your code against very serious security attacks.",
),
"order-imports": DocMetadata(
importance="Low",
guidance_explained="We believe this codemod is safe and will not cause any issues. It is important to note that importing modules may have side-effects that alter the behavior, even if unused, but we believe those cases are rare enough to be safe.",
),
"sandbox-process-creation": DocMetadata(
importance="High",
guidance_explained="We believe this change is safe and effective. The behavior of sandboxing `subprocess.run` and `subprocess.call` calls will only throw `SecurityException` if they see behavior involved in malicious code execution, which is extremely unlikely to happen in normal operation.",
),
"remove-unnecessary-f-str": DocMetadata(
importance="Low",
guidance_explained="We believe this codemod is safe and will not cause any issues.",
),
"unused-imports": DocMetadata(
importance="Low",
guidance_explained="We believe this codemod is safe and will not cause any issues. It is important to note that importing modules may have side-effects that alter the behavior, even if unused, but we believe those cases are rare enough to be safe.",
),
"requests-verify": DocMetadata(
importance="High",
guidance_explained="There may be times when setting `verify=False` is useful for testing though we discourage it. \nYou may also decide to set `verify=/path/to/ca/bundle`. This codemod will not attempt to modify the `verify` value if you do set it to a path.",
),
"secure-random": DocMetadata(
importance="High",
guidance_explained="While most of the functions in the `random` module aren't cryptographically secure, there are still valid use cases for `random.random()` such as for simulations or games.",
),
"secure-tempfile": DocMetadata(
importance="High",
guidance_explained="We believe this codemod is safe and will cause no unexpected errors.",
),
"upgrade-sslcontext-minimum-version": DocMetadata(
importance="SOMETHING", guidance_explained="SOMETHING"
),
"upgrade-sslcontext-tls": DocMetadata(
importance="High",
guidance_explained="This codemod updates the minimum supported version of TLS. Since this is an important security fix and since all modern servers offer TLSv1.2, we believe this change can be safely merged without review.",
),
"url-sandbox": DocMetadata(
importance="High",
guidance_explained="""By default, the protection only weaves in 2 checks, which we believe will not cause any issues with the vast majority of code:
1. The given URL must be HTTP/HTTPS.
2. The given URL must not point to a "well-known infrastructure target", which includes things like AWS Metadata Service endpoints, and internal routers (e.g., 192.168.1.1) which are common targets of attacks.
However, on rare occasions an application may use a URL protocol like "file://" or "ftp://" in backend or middleware code.
If you want to allow those protocols, change the incoming PR to look more like this and get the best security possible:
```diff
-resp = requests.get(url)
+resp = safe_requests.get.get(url, allowed_protocols=("ftp",))
```""",
),
"use-walrus-if": DocMetadata(
importance="Low",
guidance_explained="We believe that using the walrus operator is an improvement in terms of clarity and readability. However, this change is only compatible with codebases that support Python 3.8 and later, so it requires quick validation before merging.",
),
# "bad-lock-with-statement": DocMetadata(
# importance="Low", guidance_explained="TODO AFTER PR MERGE"
# ),
}


def generate_docs(codemod):
breakpoint()
output = f"""
---
title: Safe lxml Parsing
try:
codemod_data = METADATA[codemod.name]
except KeyError as exc:
raise KeyError(f"Must add {codemod.name} to METADATA") from exc

output = f"""---
title: {codemod.summary}
sidebar_position: 1
---
## {codemod.id}
| Importance | Review Guidance | Requires SARIF Tool |
|------------|----------------------|---------------------|
| High | Merge Without Review | No |
| {codemod_data.importance} | {codemod.review_guidance} | {codemod_data.need_sarif} |
This codemod sets the `parser` parameter in calls to `lxml.etree.parse` and `lxml.etree.fromstring`
if omitted or set to `None` (the default value). Unfortunately, the default `parser=None` means `lxml`
will rely on an unsafe parser, making your code potentially vulnerable to entity expansion
attacks and external entity (XXE) attacks.
{codemod.description}
The changes look as follows:
If you have feedback on this codemod, [please let us know](mailto:[email protected])!
```diff
import lxml.etree
- lxml.etree.parse("path_to_file")
- lxml.etree.fromstring("xml_str")
+ lxml.etree.parse("path_to_file", parser=lxml.etree.XMLParser(resolve_entities=False))
+ lxml.etree.fromstring("xml_str", parser=lxml.etree.XMLParser(resolve_entities=False))
```
## F.A.Q.
If you have feedback on this codemod, [please let us know](mailto:[email protected])!
### Why is this codemod marked as {codemod.review_guidance}?
## F.A.Q.
{codemod_data.guidance_explained}
### Why is this codemod marked as Merge Without Review?
## Codemod Settings
We believe this change is safe, effective, and protects your code against very serious security attacks.
N/A
## References
* [https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XMLParser](https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XMLParser)
* [https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing](https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing)
* [https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html](https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html)
* [https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html](https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html)
#codemod.references
"""
codemod._get_description()
return output


def main():
# todo: get dir path
parser = argparse.ArgumentParser(
description="Generate public docs for registered codemods."
)

parser.add_argument(
"directory", type=str, help="directory path where to create doc files"
)
argv = parser.parse_args()
parent_dir = Path(argv.directory)

registry = load_registered_codemods()
for codemod in registry.codemods:
generate_docs(codemod)
doc = generate_docs(codemod)
codemod_doc_name = f"{codemod.id.replace(':', '_').replace('/', '_')}.md"
with open(parent_dir / codemod_doc_name, "w", encoding="utf-8") as f:
f.write(doc)

0 comments on commit 1e10304

Please sign in to comment.