From b37b1abfc85c2aec72b8c5b10d3c1ae6054afbd6 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Wed, 18 Oct 2023 15:05:55 +0200 Subject: [PATCH 1/6] identify potential JSON object data start --- capa/features/extractors/common.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/capa/features/extractors/common.py b/capa/features/extractors/common.py index 2d4f0266b..cad0912ef 100644 --- a/capa/features/extractors/common.py +++ b/capa/features/extractors/common.py @@ -41,6 +41,7 @@ MATCH_PE = b"MZ" MATCH_ELF = b"\x7fELF" MATCH_RESULT = b'{"meta":' +MATCH_JSON_OBJECT = b'{"' def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[String, Address]]: @@ -63,6 +64,13 @@ def extract_format(buf) -> Iterator[Tuple[Feature, Address]]: yield Format(FORMAT_FREEZE), NO_ADDRESS elif buf.startswith(MATCH_RESULT): yield Format(FORMAT_RESULT), NO_ADDRESS + elif ( + buf.replace(b" ", b"").replace(b"\r", b"").replace(b"\n", b"").replace(b"\t", b"").startswith(MATCH_JSON_OBJECT) + ): + # potential start of JSON object data - `{"` without whitespace (\r\n\t) + # we don't know what it is exactly, but may support it (e.g. a dynamic CAPE sandbox report) + # skip verdict here and let subsequent code analyze this further + return else: # we likely end up here: # 1. handling a file format (e.g. macho) From 1b9c897f2372d156e063d9c63a3e7035ea61085d Mon Sep 17 00:00:00 2001 From: mr-tz Date: Thu, 19 Oct 2023 09:49:41 +0200 Subject: [PATCH 2/6] adjust error messages --- capa/helpers.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/capa/helpers.py b/capa/helpers.py index a093ef662..45fac5bfe 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -147,11 +147,9 @@ def new_print(*args, **kwargs): def log_unsupported_format_error(): logger.error("-" * 80) - logger.error(" Input file does not appear to be a PE or ELF file.") + logger.error(" Input file does not appear to be a supported file.") logger.error(" ") - logger.error( - " capa currently only supports analyzing PE and ELF files (or shellcode, when using --format sc32|sc64)." - ) + logger.error(" See all supported file formats via capa's help output (-h).") logger.error(" If you don't know the input file type, you can try using the `file` utility to guess it.") logger.error("-" * 80) @@ -160,7 +158,7 @@ def log_unsupported_cape_report_error(error: str): logger.error("-" * 80) logger.error("Input file is not a valid CAPE report: %s", error) logger.error(" ") - logger.error(" capa currently only supports analyzing standard CAPE json reports.") + logger.error(" capa currently only supports analyzing standard CAPE reports in JSON format.") logger.error( " Please make sure your report file is in the standard format and contains both the static and dynamic sections." ) From 58302fbff6a23ad81b4ee34c1216b6aae8756a04 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Thu, 19 Oct 2023 09:50:00 +0200 Subject: [PATCH 3/6] fix typo --- doc/installation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/installation.md b/doc/installation.md index c178edf52..57c939c2b 100644 --- a/doc/installation.md +++ b/doc/installation.md @@ -105,7 +105,7 @@ To install these development dependencies, run: We use [pre-commit](https://pre-commit.com/) so that its trivial to run the same linters & configuration locally as in CI. -Run all linters liks: +Run all linters like: ❯ pre-commit run --hook-stage=manual --all-files isort....................................................................Passed From dfe6e2898b5ba264af369e232ff4139305c421d9 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Thu, 19 Oct 2023 11:59:33 +0200 Subject: [PATCH 4/6] improve JSON start check --- capa/features/extractors/common.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/capa/features/extractors/common.py b/capa/features/extractors/common.py index cad0912ef..df28add30 100644 --- a/capa/features/extractors/common.py +++ b/capa/features/extractors/common.py @@ -6,6 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import io +import re import logging import binascii import contextlib @@ -64,10 +65,8 @@ def extract_format(buf) -> Iterator[Tuple[Feature, Address]]: yield Format(FORMAT_FREEZE), NO_ADDRESS elif buf.startswith(MATCH_RESULT): yield Format(FORMAT_RESULT), NO_ADDRESS - elif ( - buf.replace(b" ", b"").replace(b"\r", b"").replace(b"\n", b"").replace(b"\t", b"").startswith(MATCH_JSON_OBJECT) - ): - # potential start of JSON object data - `{"` without whitespace (\r\n\t) + elif re.sub(b"\w", b"", buf[:20]).startswith(MATCH_JSON_OBJECT): + # potential start of JSON object data without whitespace # we don't know what it is exactly, but may support it (e.g. a dynamic CAPE sandbox report) # skip verdict here and let subsequent code analyze this further return From 7a49a6c564e15ffb5b8ec3fb1f8e330ac48822fa Mon Sep 17 00:00:00 2001 From: mr-tz Date: Thu, 19 Oct 2023 13:55:17 +0200 Subject: [PATCH 5/6] fix rule syntax --- tests/test_main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index 0d2dc6f04..730ac77cf 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -411,8 +411,8 @@ def test_com_feature_matching(z395eb_extractor): meta: name: initialize IWebBrowser2 scopes: - - static: basic block - - dynamic: unsupported + static: basic block + dynamic: unsupported features: - and: - api: ole32.CoCreateInstance From d02d92ede8c6b67e85282b2ebd729bf8d374c218 Mon Sep 17 00:00:00 2001 From: Moritz Date: Thu, 19 Oct 2023 13:57:25 +0200 Subject: [PATCH 6/6] Update capa/features/extractors/common.py --- capa/features/extractors/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capa/features/extractors/common.py b/capa/features/extractors/common.py index df28add30..e318a141b 100644 --- a/capa/features/extractors/common.py +++ b/capa/features/extractors/common.py @@ -65,7 +65,7 @@ def extract_format(buf) -> Iterator[Tuple[Feature, Address]]: yield Format(FORMAT_FREEZE), NO_ADDRESS elif buf.startswith(MATCH_RESULT): yield Format(FORMAT_RESULT), NO_ADDRESS - elif re.sub(b"\w", b"", buf[:20]).startswith(MATCH_JSON_OBJECT): + elif re.sub(rb"\w", b"", buf[:20]).startswith(MATCH_JSON_OBJECT): # potential start of JSON object data without whitespace # we don't know what it is exactly, but may support it (e.g. a dynamic CAPE sandbox report) # skip verdict here and let subsequent code analyze this further