From d1f3e43325dcc6562612cb36c5386f580c098437 Mon Sep 17 00:00:00 2001 From: Moritz Date: Tue, 3 Dec 2024 19:56:30 +0100 Subject: [PATCH] vmray: record command line info (#2515) * vmray: record command line info --- CHANGELOG.md | 1 + capa/features/extractors/vmray/__init__.py | 11 ++++++++++- capa/features/extractors/vmray/extractor.py | 2 +- capa/features/extractors/vmray/models.py | 16 +++++++++++++--- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a36269f3e..5df9d3b7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - allow call as valid subscope for call scoped rules @mr-tz - support loading and analyzing a Binary Ninja database #2496 @xusheng6 +- vmray: record process command line details @mr-tz ### Breaking Changes diff --git a/capa/features/extractors/vmray/__init__.py b/capa/features/extractors/vmray/__init__.py index a8976cd8c..dc719211a 100644 --- a/capa/features/extractors/vmray/__init__.py +++ b/capa/features/extractors/vmray/__init__.py @@ -35,6 +35,8 @@ class VMRayMonitorProcess: ppid: int # parent process ID assigned by OS monitor_id: int # unique ID assigned to process by VMRay image_name: str + filename: str + cmd_line: str class VMRayAnalysis: @@ -160,7 +162,12 @@ def _compute_monitor_processes(self): self.sv2.processes[process.ref_parent_process.path[1]].os_pid if process.ref_parent_process else 0 ) self.monitor_processes[process.monitor_id] = VMRayMonitorProcess( - process.os_pid, ppid, process.monitor_id, process.image_name + process.os_pid, + ppid, + process.monitor_id, + process.image_name, + process.filename, + process.cmd_line, ) # not all processes are recorded in SummaryV2.json, get missing data from flog.xml, see #2394 @@ -170,6 +177,8 @@ def _compute_monitor_processes(self): monitor_process.os_parent_pid, monitor_process.process_id, monitor_process.image_name, + monitor_process.filename, + monitor_process.cmd_line, ) if monitor_process.process_id not in self.monitor_processes: diff --git a/capa/features/extractors/vmray/extractor.py b/capa/features/extractors/vmray/extractor.py index a9f0491c9..7f40f25da 100644 --- a/capa/features/extractors/vmray/extractor.py +++ b/capa/features/extractors/vmray/extractor.py @@ -86,7 +86,7 @@ def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, def get_process_name(self, ph) -> str: monitor_process: VMRayMonitorProcess = ph.inner - return monitor_process.image_name + return f"{monitor_process.image_name} ({monitor_process.cmd_line})" def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]: for monitor_thread_id in self.analysis.monitor_threads_by_monitor_process[ph.inner.monitor_id]: diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py index c2d6551aa..755f494fe 100644 --- a/capa/features/extractors/vmray/models.py +++ b/capa/features/extractors/vmray/models.py @@ -136,11 +136,20 @@ class FunctionReturn(BaseModel): from_addr: HexInt = Field(alias="from") +def sanitize_string(value: str) -> str: + # e.g. "cmd_line": "\"C:\\Users\\38lTTV5Kii\\Desktop\\filename.exe\" ", + return value.replace("\\\\", "\\").strip(' "') + + +# unify representation +SanitizedString = Annotated[str, BeforeValidator(sanitize_string)] + + class MonitorProcess(BaseModel): ts: HexInt process_id: int image_name: str - filename: str + filename: SanitizedString # page_root: HexInt os_pid: HexInt # os_integrity_level: HexInt @@ -148,7 +157,7 @@ class MonitorProcess(BaseModel): monitor_reason: str parent_id: int os_parent_pid: HexInt - # cmd_line: str + cmd_line: SanitizedString # cur_dir: str # os_username: str # bitness: int @@ -306,8 +315,9 @@ class Process(BaseModel): monitor_id: int # monitor_reason: str os_pid: int - filename: str + filename: SanitizedString image_name: str + cmd_line: SanitizedString ref_parent_process: Optional[GenericReference] = None