diff --git a/tests/data b/tests/data index ad887bbed..20cade75b 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit ad887bbed928097655282d2a53b3450575067f3f +Subproject commit 20cade75bab34788ea8395ec61a7e7b7fa7d7c59 diff --git a/web/rules/scripts/build_root.py b/web/rules/scripts/build_root.py index 0b891c3aa..67f1f6f3b 100644 --- a/web/rules/scripts/build_root.py +++ b/web/rules/scripts/build_root.py @@ -1,343 +1,343 @@ -""" -Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. -You may obtain a copy of the License at: [package root]/LICENSE.txt -Unless required by applicable law or agreed to in writing, software distributed under the License - is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and limitations under the License. -""" - -import sys -import random -import logging -from typing import Dict, List -from pathlib import Path - -import capa.rules - -logger = logging.getLogger(__name__) - -start_dir = Path(sys.argv[1]) -txt_file_path = Path(sys.argv[2]) -out_dir = Path(sys.argv[3]) -output_html_path = out_dir / "index.html" - -assert start_dir.exists(), "input directory must exist" -assert txt_file_path.exists(), "file-modification txt file must exist" -assert out_dir.exists(), "output directory must exist" - -predefined_colors = [ - "#9CAFAA", - "#577590", - "#a98467", - "#D6DAC8", - "#adc178", - "#f4d35e", - "#85182a", - "#d6c399", - "#dde5b6", - "#8da9c4", - "#fcd5ce", - "#706993", - "#FBF3D5", - "#1a659e", - "#c71f37", - "#EFBC9B", - "#7e7f9a", -] - - -def read_file_paths(txt_file_path: Path): - categorized_files: Dict[str, List[Path]] = { - "modified in the last day": [], - "modified in the last week": [], - "modified in the last month": [], - "modified in the last three months": [], - "modified in the last year": [], - "older": [], - } - - lines = txt_file_path.read_text(encoding="utf-8").splitlines() - - current_category = None - for line in lines: - line = line.strip() - if not line: - continue - if "===" in line: - category = line.strip("=").strip() - if category in categorized_files: - current_category = category - else: - logger.warning("Unrecognized category '%s'", category) - current_category = None - elif current_category: - parts = line.split(" ", 1) - if len(parts) == 2: - file_path, last_modified_date_str = parts - categorized_files[current_category].append(Path(file_path)) - else: - logger.warning("Skipping line due to unexpected format: %s", line) - - return categorized_files - - -def parse_rule(file_path: Path): - rule = capa.rules.Rule.from_yaml_file(file_path) - - return { - "name": rule.name, - "namespace": rule.meta.get("namespace", ""), - "authors": rule.meta.get("authors", []), - "path": file_path, - "filename": file_path.name, - } - - -def generate_color(): - return "#{:06x}".format(random.randint(0, 0xFFFFFF)) - - -def get_first_word(namespace): - return namespace.split("/")[0] if "/" in namespace else namespace - - -def generate_html(categories_data, color_map): - html_content = """ - - - - - capa rules - - - - - - - - - - - - - -
- - - - - -
- -
- -""" - - for category, files in categories_data.items(): - if not files: - continue - - html_content += f'

{category}

' - cards_data = [] - for file_path in files: - try: - card_data = parse_rule(file_path) - cards_data.append(card_data) - except Exception as e: - logger.error("error parsing %s: %s", file_path, e) - - for card in cards_data: - first_word = get_first_word(card["namespace"]) - rectangle_color = color_map[first_word] - file_name = card["filename"].rpartition(".yml")[0] - - card_html = f""" -
-
-
-
-
{card['namespace']}
- -
{', '.join(card['authors'])}
-
-
-
""" - - html_content += card_html - - num_cards = len(cards_data) - num_empty_cells = (4 - (num_cards % 4)) % 4 - if num_empty_cells > 0: - for _ in range(num_empty_cells): - html_content += """ -
- -
""" - - html_content += "
" - - html_content += """ -
- - - -""" - - output_html_path.write_text(html_content, encoding="utf-8") - - -categories_data = read_file_paths(txt_file_path) - - -color_map = {} -used_colors = set(predefined_colors) -color_index = 0 - - -all_files = [file for category in categories_data.values() for file in category] -for file_path in all_files: - try: - card_data = parse_rule(file_path) - first_word = get_first_word(card_data["namespace"]) - if first_word not in color_map: - if color_index < len(predefined_colors): - color_map[first_word] = predefined_colors[color_index] - color_index += 1 - else: - new_color = generate_color() - while new_color in used_colors: - new_color = generate_color() - color_map[first_word] = new_color - used_colors.add(new_color) - except Exception as e: - logger.error("error parsing %s: %s", file_path, e) - -generate_html(categories_data, color_map) -logger.info("HTML file has been generated: %s", output_html_path) +""" +Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. +You may obtain a copy of the License at: [package root]/LICENSE.txt +Unless required by applicable law or agreed to in writing, software distributed under the License + is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. +""" + +import sys +import random +import logging +from typing import Dict, List +from pathlib import Path + +import capa.rules + +logger = logging.getLogger(__name__) + +start_dir = Path(sys.argv[1]) +txt_file_path = Path(sys.argv[2]) +out_dir = Path(sys.argv[3]) +output_html_path = out_dir / "index.html" + +assert start_dir.exists(), "input directory must exist" +assert txt_file_path.exists(), "file-modification txt file must exist" +assert out_dir.exists(), "output directory must exist" + +predefined_colors = [ + "#9CAFAA", + "#577590", + "#a98467", + "#D6DAC8", + "#adc178", + "#f4d35e", + "#85182a", + "#d6c399", + "#dde5b6", + "#8da9c4", + "#fcd5ce", + "#706993", + "#FBF3D5", + "#1a659e", + "#c71f37", + "#EFBC9B", + "#7e7f9a", +] + + +def read_file_paths(txt_file_path: Path): + categorized_files: Dict[str, List[Path]] = { + "modified in the last day": [], + "modified in the last week": [], + "modified in the last month": [], + "modified in the last three months": [], + "modified in the last year": [], + "older": [], + } + + lines = txt_file_path.read_text(encoding="utf-8").splitlines() + + current_category = None + for line in lines: + line = line.strip() + if not line: + continue + if "===" in line: + category = line.strip("=").strip() + if category in categorized_files: + current_category = category + else: + logger.warning("Unrecognized category '%s'", category) + current_category = None + elif current_category: + parts = line.split(" ", 1) + if len(parts) == 2: + file_path, last_modified_date_str = parts + categorized_files[current_category].append(Path(file_path)) + else: + logger.warning("Skipping line due to unexpected format: %s", line) + + return categorized_files + + +def parse_rule(file_path: Path): + rule = capa.rules.Rule.from_yaml_file(file_path) + + return { + "name": rule.name, + "namespace": rule.meta.get("namespace", ""), + "authors": rule.meta.get("authors", []), + "path": file_path, + "filename": file_path.name, + } + + +def generate_color(): + return "#{:06x}".format(random.randint(0, 0xFFFFFF)) + + +def get_first_word(namespace): + return namespace.split("/")[0] if "/" in namespace else namespace + + +def generate_html(categories_data, color_map): + html_content = """ + + + + + capa rules + + + + + + + + + + + + + +
+ + + + + +
+ +
+ +""" + + for category, files in categories_data.items(): + if not files: + continue + + html_content += f'

{category}

' + cards_data = [] + for file_path in files: + try: + card_data = parse_rule(file_path) + cards_data.append(card_data) + except Exception as e: + logger.error("error parsing %s: %s", file_path, e) + + for card in cards_data: + first_word = get_first_word(card["namespace"]) + rectangle_color = color_map[first_word] + file_name = card["filename"].rpartition(".yml")[0] + + card_html = f""" +
+
+
+
+
{card['namespace']}
+ +
{', '.join(card['authors'])}
+
+
+
""" + + html_content += card_html + + num_cards = len(cards_data) + num_empty_cells = (4 - (num_cards % 4)) % 4 + if num_empty_cells > 0: + for _ in range(num_empty_cells): + html_content += """ +
+ +
""" + + html_content += "
" + + html_content += """ +
+ + + +""" + + output_html_path.write_text(html_content, encoding="utf-8") + + +categories_data = read_file_paths(txt_file_path) + + +color_map = {} +used_colors = set(predefined_colors) +color_index = 0 + + +all_files = [file for category in categories_data.values() for file in category] +for file_path in all_files: + try: + card_data = parse_rule(file_path) + first_word = get_first_word(card_data["namespace"]) + if first_word not in color_map: + if color_index < len(predefined_colors): + color_map[first_word] = predefined_colors[color_index] + color_index += 1 + else: + new_color = generate_color() + while new_color in used_colors: + new_color = generate_color() + color_map[first_word] = new_color + used_colors.add(new_color) + except Exception as e: + logger.error("error parsing %s: %s", file_path, e) + +generate_html(categories_data, color_map) +logger.info("HTML file has been generated: %s", output_html_path) diff --git a/web/rules/scripts/build_rules.py b/web/rules/scripts/build_rules.py index 5a3a33c92..4aaff906a 100644 --- a/web/rules/scripts/build_rules.py +++ b/web/rules/scripts/build_rules.py @@ -1,136 +1,136 @@ """ -Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. -You may obtain a copy of the License at: [package root]/LICENSE.txt -Unless required by applicable law or agreed to in writing, software distributed under the License - is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and limitations under the License. -""" - -import os -import sys -import urllib.parse -from glob import glob -from pathlib import Path - -import pygments -from pygments.lexers import YamlLexer -from pygments.formatters import HtmlFormatter - -import capa.rules - -input_directory = Path(sys.argv[1]) -txt_file_path = Path(sys.argv[2]) -output_directory = Path(sys.argv[3]) - -assert input_directory.exists(), "input directory must exist" -assert txt_file_path.exists(), "file-modification txt file must exist" -assert output_directory.exists(), "output directory must exist" - - -def convert_yaml_to_html(timestamps, yaml_file: Path, output_dir: Path): - rule_content = yaml_file.read_text(encoding="utf-8") - rule = capa.rules.Rule.from_yaml(rule_content, use_ruamel=True) - - filename = os.path.basename(yaml_file).rpartition(".yml")[0] - namespace = rule.meta.get("namespace", "") - timestamp = timestamps[yaml_file.as_posix()] - - rendered_rule = pygments.highlight( - rule_content, - YamlLexer(), - HtmlFormatter( - style="xcode", - noclasses=True, - wrapcode=True, - nobackground=True, - ), - ) - - gh_link = f"https://github.com/mandiant/capa-rules/tree/master/{namespace}/{filename}.yml" - vt_query = 'behavior_signature:"' + rule.name + '"' - vt_fragment = urllib.parse.quote(urllib.parse.quote(vt_query)) - vt_link = f"https://www.virustotal.com/gui/search/{vt_fragment}/files" - ns_query = f'"namespace: {namespace} "' - ns_link = f"./?{urllib.parse.urlencode({'q': ns_query})}" - - html_content = f""" - - - - - - {rule.name} - - - - - +Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. +You may obtain a copy of the License at: [package root]/LICENSE.txt +Unless required by applicable law or agreed to in writing, software distributed under the License + is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. +""" + +import os +import sys +import urllib.parse +from glob import glob +from pathlib import Path + +import pygments +from pygments.lexers import YamlLexer +from pygments.formatters import HtmlFormatter + +import capa.rules + +input_directory = Path(sys.argv[1]) +txt_file_path = Path(sys.argv[2]) +output_directory = Path(sys.argv[3]) + +assert input_directory.exists(), "input directory must exist" +assert txt_file_path.exists(), "file-modification txt file must exist" +assert output_directory.exists(), "output directory must exist" + + +def convert_yaml_to_html(timestamps, yaml_file: Path, output_dir: Path): + rule_content = yaml_file.read_text(encoding="utf-8") + rule = capa.rules.Rule.from_yaml(rule_content, use_ruamel=True) + + filename = os.path.basename(yaml_file).rpartition(".yml")[0] + namespace = rule.meta.get("namespace", "") + timestamp = timestamps[yaml_file.as_posix()] + + rendered_rule = pygments.highlight( + rule_content, + YamlLexer(), + HtmlFormatter( + style="xcode", + noclasses=True, + wrapcode=True, + nobackground=True, + ), + ) + + gh_link = f"https://github.com/mandiant/capa-rules/tree/master/{namespace}/{filename}.yml" + vt_query = 'behavior_signature:"' + rule.name + '"' + vt_fragment = urllib.parse.quote(urllib.parse.quote(vt_query)) + vt_link = f"https://www.virustotal.com/gui/search/{vt_fragment}/files" + ns_query = f'"namespace: {namespace} "' + ns_link = f"./?{urllib.parse.urlencode({'q': ns_query})}" + + html_content = f""" + + + + + + {rule.name} + + + + + - - - -
- - - - - -
- -
-
-

- - {namespace} - -

-

{rule.name}

- - - -
- {rendered_rule} -
-

last edited: {timestamp}

-
-
- - - """ - - output_dir.mkdir(parents=True, exist_ok=True) - output_file_path = output_dir / (filename + ".html") - output_file_path.write_text(html_content, encoding="utf-8") - - -yaml_files = glob(os.path.join(input_directory, "**/*.yml"), recursive=True) - -timestamps = {} -for line in txt_file_path.read_text(encoding="utf-8").splitlines(): - if not line: - continue - if line.startswith("==="): + + + +
+ + + + + +
+ +
+
+

+ + {namespace} + +

+

{rule.name}

+ + + +
+ {rendered_rule} +
+

last edited: {timestamp}

+
+
+ + + """ + + output_dir.mkdir(parents=True, exist_ok=True) + output_file_path = output_dir / (filename + ".html") + output_file_path.write_text(html_content, encoding="utf-8") + + +yaml_files = glob(os.path.join(input_directory, "**/*.yml"), recursive=True) + +timestamps = {} +for line in txt_file_path.read_text(encoding="utf-8").splitlines(): + if not line: + continue + if line.startswith("==="): continue - - path, _, timestamp = line.partition(" ") - timestamps[path] = timestamp - -for yaml_file in yaml_files: - convert_yaml_to_html(timestamps, Path(yaml_file), output_directory) + + path, _, timestamp = line.partition(" ") + timestamps[path] = timestamp + +for yaml_file in yaml_files: + convert_yaml_to_html(timestamps, Path(yaml_file), output_directory) diff --git a/web/rules/scripts/modified-dates.py b/web/rules/scripts/modified-dates.py index 38ca38ded..66a2af9dc 100644 --- a/web/rules/scripts/modified-dates.py +++ b/web/rules/scripts/modified-dates.py @@ -1,92 +1,92 @@ -""" -Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. -You may obtain a copy of the License at: [package root]/LICENSE.txt -Unless required by applicable law or agreed to in writing, software distributed under the License - is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and limitations under the License. -""" - -import os -import sys -import logging -import subprocess -from pathlib import Path -from datetime import datetime, timedelta - -logger = logging.getLogger(__name__) - -start_dir = Path(sys.argv[1]) -output_file = Path(sys.argv[2]) - -assert start_dir.exists(), "start directory must exist" - - -def get_yml_files_and_dates(start_dir: Path): - yml_files = [] - for root, _, files in os.walk(start_dir): - for file in files: - if file.endswith(".yml") or file.endswith(".yaml"): - file_path = Path(root) / file - - proc = subprocess.run( - [ - "git", - "log", - "-1", # only show most recent commit - '--pretty="%ct"', # unix timestmp, https://git-scm.com/docs/pretty-formats#Documentation/pretty-formats.txt-emctem - file, # just the filename, will run from the containing directory - ], - cwd=root, # the directory with the file we're inspecting - check=True, - capture_output=True, - ) - - last_modified_date = int(proc.stdout.decode("utf-8").partition("\n")[0].strip('"')) - - yml_files.append((file_path, last_modified_date)) - return yml_files - - -yml_files_and_dates = get_yml_files_and_dates(start_dir) - -yml_files_and_dates.sort(key=lambda x: x[1], reverse=True) - - -current_date = datetime.now() - -categories = [ - ("modified in the last day", current_date - timedelta(days=1)), - ("modified in the last week", current_date - timedelta(days=7)), - ("modified in the last month", current_date - timedelta(days=30)), - ("modified in the last three months", current_date - timedelta(days=90)), - ("modified in the last year", current_date - timedelta(days=365)), -] - - -def write_category(f, category_name, files): - f.write(f"=== {category_name} ===\n") - for file_path, last_modified_date in files: - last_modified_date_str = datetime.fromtimestamp(last_modified_date).strftime("%Y-%m-%d %H:%M:%S") - f.write(f"{file_path} {last_modified_date_str}\n") - f.write("\n") - - -with output_file.open("wt", encoding="utf-8") as f: - for title, delta in categories: - current_files = [] - for file_path, last_modified_date in yml_files_and_dates: - last_modified_date_dt = datetime.fromtimestamp(last_modified_date) - if last_modified_date_dt > delta: - current_files.append((file_path, last_modified_date)) - - write_category(f, title, current_files) - - for item in current_files: - yml_files_and_dates.remove(item) - - write_category(f, "older", yml_files_and_dates) - - -logger.info("File names and modification dates have been written to %s", output_file) +""" +Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. +You may obtain a copy of the License at: [package root]/LICENSE.txt +Unless required by applicable law or agreed to in writing, software distributed under the License + is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and limitations under the License. +""" + +import os +import sys +import logging +import subprocess +from pathlib import Path +from datetime import datetime, timedelta + +logger = logging.getLogger(__name__) + +start_dir = Path(sys.argv[1]) +output_file = Path(sys.argv[2]) + +assert start_dir.exists(), "start directory must exist" + + +def get_yml_files_and_dates(start_dir: Path): + yml_files = [] + for root, _, files in os.walk(start_dir): + for file in files: + if file.endswith(".yml") or file.endswith(".yaml"): + file_path = Path(root) / file + + proc = subprocess.run( + [ + "git", + "log", + "-1", # only show most recent commit + '--pretty="%ct"', # unix timestmp, https://git-scm.com/docs/pretty-formats#Documentation/pretty-formats.txt-emctem + file, # just the filename, will run from the containing directory + ], + cwd=root, # the directory with the file we're inspecting + check=True, + capture_output=True, + ) + + last_modified_date = int(proc.stdout.decode("utf-8").partition("\n")[0].strip('"')) + + yml_files.append((file_path, last_modified_date)) + return yml_files + + +yml_files_and_dates = get_yml_files_and_dates(start_dir) + +yml_files_and_dates.sort(key=lambda x: x[1], reverse=True) + + +current_date = datetime.now() + +categories = [ + ("modified in the last day", current_date - timedelta(days=1)), + ("modified in the last week", current_date - timedelta(days=7)), + ("modified in the last month", current_date - timedelta(days=30)), + ("modified in the last three months", current_date - timedelta(days=90)), + ("modified in the last year", current_date - timedelta(days=365)), +] + + +def write_category(f, category_name, files): + f.write(f"=== {category_name} ===\n") + for file_path, last_modified_date in files: + last_modified_date_str = datetime.fromtimestamp(last_modified_date).strftime("%Y-%m-%d %H:%M:%S") + f.write(f"{file_path} {last_modified_date_str}\n") + f.write("\n") + + +with output_file.open("wt", encoding="utf-8") as f: + for title, delta in categories: + current_files = [] + for file_path, last_modified_date in yml_files_and_dates: + last_modified_date_dt = datetime.fromtimestamp(last_modified_date) + if last_modified_date_dt > delta: + current_files.append((file_path, last_modified_date)) + + write_category(f, title, current_files) + + for item in current_files: + yml_files_and_dates.remove(item) + + write_category(f, "older", yml_files_and_dates) + + +logger.info("File names and modification dates have been written to %s", output_file)