From 5b737c8229fb8023bb9a64bd4108a0a123d292e8 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Mon, 2 Dec 2024 16:24:26 +0000 Subject: [PATCH 1/2] vmray: record command line info --- CHANGELOG.md | 1 + capa/features/extractors/vmray/__init__.py | 21 +++++++++++++++++++-- capa/features/extractors/vmray/extractor.py | 2 +- capa/features/extractors/vmray/models.py | 14 ++++++++++++-- 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d4aa64f14..dd3381061 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - allow call as valid subscope for call scoped rules @mr-tz - support loading and analyzing a Binary Ninja database #2496 @xusheng6 +- vmray: record process command line details @mr-tz ### Breaking Changes diff --git a/capa/features/extractors/vmray/__init__.py b/capa/features/extractors/vmray/__init__.py index a8976cd8c..93f0330ee 100644 --- a/capa/features/extractors/vmray/__init__.py +++ b/capa/features/extractors/vmray/__init__.py @@ -13,7 +13,15 @@ from dataclasses import dataclass from capa.exceptions import UnsupportedFormatError -from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData, FunctionCall, xml_to_dict +from capa.features.extractors.vmray.models import ( + File, + Flog, + SummaryV2, + StaticData, + FunctionCall, + xml_to_dict, + sanitize_string, +) logger = logging.getLogger(__name__) @@ -35,6 +43,8 @@ class VMRayMonitorProcess: ppid: int # parent process ID assigned by OS monitor_id: int # unique ID assigned to process by VMRay image_name: str + filename: str + cmd_line: str class VMRayAnalysis: @@ -160,7 +170,12 @@ def _compute_monitor_processes(self): self.sv2.processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0 ) self.monitor_processes[process.monitor_id] = VMRayMonitorProcess( - process.os_pid, ppid, process.monitor_id, process.image_name + process.os_pid, + ppid, + process.monitor_id, + process.image_name, + sanitize_string(process.filename), + sanitize_string(process.cmd_line), ) # not all processes are recorded in SummaryV2.json, get missing data from flog.xml, see #2394 @@ -170,6 +185,8 @@ def _compute_monitor_processes(self): monitor_process.os_parent_pid, monitor_process.process_id, monitor_process.image_name, + monitor_process.filename, + monitor_process.cmd_line, ) if monitor_process.process_id not in self.monitor_processes: diff --git a/capa/features/extractors/vmray/extractor.py b/capa/features/extractors/vmray/extractor.py index a9f0491c9..7f40f25da 100644 --- a/capa/features/extractors/vmray/extractor.py +++ b/capa/features/extractors/vmray/extractor.py @@ -86,7 +86,7 @@ def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, def get_process_name(self, ph) -> str: monitor_process: VMRayMonitorProcess = ph.inner - return monitor_process.image_name + return f"{monitor_process.image_name} ({monitor_process.cmd_line})" def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]: for monitor_thread_id in self.analysis.monitor_threads_by_monitor_process[ph.inner.monitor_id]: diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py index c2d6551aa..728361c69 100644 --- a/capa/features/extractors/vmray/models.py +++ b/capa/features/extractors/vmray/models.py @@ -136,11 +136,20 @@ class FunctionReturn(BaseModel): from_addr: HexInt = Field(alias="from") +def sanitize_string(value: str) -> str: + # e.g. "cmd_line": "\"C:\\Users\\38lTTV5Kii\\Desktop\\filename.exe\" ", + return value.replace("\\\\", "\\").strip(' "') + + +# unify representation +SanitizedString = Annotated[str, BeforeValidator(sanitize_string)] + + class MonitorProcess(BaseModel): ts: HexInt process_id: int image_name: str - filename: str + filename: SanitizedString # page_root: HexInt os_pid: HexInt # os_integrity_level: HexInt @@ -148,7 +157,7 @@ class MonitorProcess(BaseModel): monitor_reason: str parent_id: int os_parent_pid: HexInt - # cmd_line: str + cmd_line: SanitizedString # cur_dir: str # os_username: str # bitness: int @@ -308,6 +317,7 @@ class Process(BaseModel): os_pid: int filename: str image_name: str + cmd_line: str ref_parent_process: Optional[GenericReference] = None From bbe2223b8d75541e7cfa1cff68f8993d9bdab973 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Mon, 2 Dec 2024 16:57:49 +0000 Subject: [PATCH 2/2] sanitize strings in model --- capa/features/extractors/vmray/__init__.py | 14 +++----------- capa/features/extractors/vmray/models.py | 4 ++-- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/capa/features/extractors/vmray/__init__.py b/capa/features/extractors/vmray/__init__.py index 93f0330ee..dc719211a 100644 --- a/capa/features/extractors/vmray/__init__.py +++ b/capa/features/extractors/vmray/__init__.py @@ -13,15 +13,7 @@ from dataclasses import dataclass from capa.exceptions import UnsupportedFormatError -from capa.features.extractors.vmray.models import ( - File, - Flog, - SummaryV2, - StaticData, - FunctionCall, - xml_to_dict, - sanitize_string, -) +from capa.features.extractors.vmray.models import File, Flog, SummaryV2, StaticData, FunctionCall, xml_to_dict logger = logging.getLogger(__name__) @@ -174,8 +166,8 @@ def _compute_monitor_processes(self): ppid, process.monitor_id, process.image_name, - sanitize_string(process.filename), - sanitize_string(process.cmd_line), + process.filename, + process.cmd_line, ) # not all processes are recorded in SummaryV2.json, get missing data from flog.xml, see #2394 diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py index 728361c69..755f494fe 100644 --- a/capa/features/extractors/vmray/models.py +++ b/capa/features/extractors/vmray/models.py @@ -315,9 +315,9 @@ class Process(BaseModel): monitor_id: int # monitor_reason: str os_pid: int - filename: str + filename: SanitizedString image_name: str - cmd_line: str + cmd_line: SanitizedString ref_parent_process: Optional[GenericReference] = None