diff --git a/.github/pyinstaller/pyinstaller.spec b/.github/pyinstaller/pyinstaller.spec index f103ba16e..a7c379d25 100644 --- a/.github/pyinstaller/pyinstaller.spec +++ b/.github/pyinstaller/pyinstaller.spec @@ -17,6 +17,7 @@ a = Analysis( # when invoking pyinstaller from the project root, # this gets invoked from the directory of the spec file, # i.e. ./.github/pyinstaller + ("../../assets", "assets"), ("../../rules", "rules"), ("../../sigs", "sigs"), ("../../cache", "cache"), diff --git a/CHANGELOG.md b/CHANGELOG.md index 33d141f5d..634dc4418 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - ghidra: add entry script helping users run capa against a loaded Ghidra database #1767 @mike-hunhoff - binja: add support for forwarded exports #1646 @xusheng6 - binja: add support for symtab names #1504 @xusheng6 +- add com class/interface features #322 @Aayush-goel-04 ### Breaking Changes @@ -1606,4 +1607,4 @@ Download a standalone binary below and checkout the readme [here on GitHub](http ### Raw diffs - [capa v1.0.0...v1.1.0](https://github.com/mandiant/capa/compare/v1.0.0...v1.1.0) - - [capa-rules v1.0.0...v1.1.0](https://github.com/mandiant/capa-rules/compare/v1.0.0...v1.1.0) + - [capa-rules v1.0.0...v1.1.0](https://github.com/mandiant/capa-rules/compare/v1.0.0...v1.1.0) \ No newline at end of file diff --git a/assets/classes.json.gz b/assets/classes.json.gz new file mode 100644 index 000000000..dbebcb22c Binary files /dev/null and b/assets/classes.json.gz differ diff --git a/assets/interfaces.json.gz b/assets/interfaces.json.gz new file mode 100644 index 000000000..ae68a33da Binary files /dev/null and b/assets/interfaces.json.gz differ diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index 9b8af10b8..e715ae863 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -8,6 +8,8 @@ import io import re +import gzip +import json import uuid import codecs import logging @@ -326,6 +328,68 @@ def ensure_feature_valid_for_scopes(scopes: Scopes, feature: Union[Feature, Stat raise InvalidRule(f"feature {feature} not supported for scopes {scopes}") +class ComType(Enum): + CLASS = "class" + INTERFACE = "interface" + + +# COM data source https://github.com/stevemk14ebr/COM-Code-Helper/tree/master +VALID_COM_TYPES = { + ComType.CLASS: {"db_path": "assets/classes.json.gz", "prefix": "CLSID_"}, + ComType.INTERFACE: {"db_path": "assets/interfaces.json.gz", "prefix": "IID_"}, +} + + +@lru_cache(maxsize=None) +def load_com_database(com_type: ComType) -> Dict[str, List[str]]: + com_db_path: Path = capa.main.get_default_root() / VALID_COM_TYPES[com_type]["db_path"] + + if not com_db_path.exists(): + raise IOError(f"COM database path '{com_db_path}' does not exist or cannot be accessed") + + try: + with gzip.open(com_db_path, "rb") as gzfile: + return json.loads(gzfile.read().decode("utf-8")) + except Exception as e: + raise IOError(f"Error loading COM database from '{com_db_path}'") from e + + +def translate_com_feature(com_name: str, com_type: ComType) -> ceng.Or: + com_db = load_com_database(com_type) + guid_strings: Optional[List[str]] = com_db.get(com_name) + if guid_strings is None or len(guid_strings) == 0: + logger.error(" %s doesn't exist in COM %s database", com_name, com_type) + raise InvalidRule(f"'{com_name}' doesn't exist in COM {com_type} database") + + com_features: List = [] + for guid_string in guid_strings: + hex_chars = guid_string.replace("-", "") + h = [hex_chars[i : i + 2] for i in range(0, len(hex_chars), 2)] + reordered_hex_pairs = [ + h[3], + h[2], + h[1], + h[0], + h[5], + h[4], + h[7], + h[6], + h[8], + h[9], + h[10], + h[11], + h[12], + h[13], + h[14], + h[15], + ] + guid_bytes = bytes.fromhex("".join(reordered_hex_pairs)) + prefix = VALID_COM_TYPES[com_type]["prefix"] + com_features.append(capa.features.common.StringFactory(guid_string, f"{prefix+com_name} as GUID string")) + com_features.append(capa.features.common.Bytes(guid_bytes, f"{prefix+com_name} as bytes")) + return ceng.Or(com_features) + + def parse_int(s: str) -> int: if s.startswith("0x"): return int(s, 0x10) @@ -742,6 +806,13 @@ def build_statements(d, scopes: Scopes): ensure_feature_valid_for_scopes(scopes, feature) return feature + elif key.startswith("com/"): + com_type = str(key[len("com/") :]).upper() + if com_type not in [item.name for item in ComType]: + raise InvalidRule(f"unexpected COM type: {com_type}") + value, description = parse_description(d[key], key, d.get("description")) + return translate_com_feature(value, ComType[com_type]) + else: Feature = parse_feature(key) value, description = parse_description(d[key], key, d.get("description")) diff --git a/rules b/rules index c88979a1b..94de0355c 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit c88979a1bcc9bc325810c022c9044fca64960d6c +Subproject commit 94de0355cde729b13b4313377d27f17a3ddf2567 diff --git a/tests/fixtures.py b/tests/fixtures.py index c32edfabc..3c7b007d1 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -318,6 +318,8 @@ def get_data_path_by_name(name) -> Path: return CD / "data" / "499c2a85f6e8142c3f48d4251c9c7cd6.raw32" elif name.startswith("9324d"): return CD / "data" / "9324d1a8ae37a36ae560c37448c9705a.exe_" + elif name.startswith("395eb"): + return CD / "data" / "395eb0ddd99d2c9e37b6d0b73485ee9c.exe_" elif name.startswith("a1982"): return CD / "data" / "a198216798ca38f280dc413f8c57f2c2.exe_" elif name.startswith("a933a"): @@ -1346,6 +1348,11 @@ def z9324d_extractor(): return get_extractor(get_data_path_by_name("9324d...")) +@pytest.fixture +def z395eb_extractor(): + return get_extractor(get_data_path_by_name("395eb...")) + + @pytest.fixture def pma12_04_extractor(): return get_extractor(get_data_path_by_name("pma12-04")) diff --git a/tests/test_main.py b/tests/test_main.py index 8caae9322..16f61ce53 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -401,6 +401,30 @@ def test_byte_matching(z9324d_extractor): assert "byte match test" in capabilities +def test_com_feature_matching(z395eb_extractor): + rules = capa.rules.RuleSet( + [ + capa.rules.Rule.from_yaml( + textwrap.dedent( + """ + rule: + meta: + name: initialize IWebBrowser2 + scope: basic block + features: + - and: + - api: ole32.CoCreateInstance + - com/class: InternetExplorer #bytes: 01 DF 02 00 00 00 00 00 C0 00 00 00 00 00 00 46 = CLSID_InternetExplorer + - com/interface: IWebBrowser2 #bytes: 61 16 0C D3 AF CD D0 11 8A 3E 00 C0 4F C9 E2 6E = IID_IWebBrowser2 + """ + ) + ) + ] + ) + capabilities, meta = capa.main.find_capabilities(rules, z395eb_extractor) + assert "initialize IWebBrowser2" in capabilities + + def test_count_bb(z9324d_extractor): rules = capa.rules.RuleSet( [ diff --git a/tests/test_rules.py b/tests/test_rules.py index dffaf577d..b6c9a9c17 100644 --- a/tests/test_rules.py +++ b/tests/test_rules.py @@ -1531,3 +1531,72 @@ def test_property_access_symbol(): ) is True ) + + +def test_translate_com_features(): + r = capa.rules.Rule.from_yaml( + textwrap.dedent( + """ + rule: + meta: + name: test rule + features: + - com/class: WICPngDecoder + # 389ea17b-5078-4cde-b6ef-25c15175c751 WICPngDecoder + # e018945b-aa86-4008-9bd4-6777a1e40c11 WICPngDecoder + """ + ) + ) + com_name = "WICPngDecoder" + com_features = [ + capa.features.common.Bytes(b"{\xa1\x9e8xP\xdeL\xb6\xef%\xc1Qu\xc7Q", f"CLSID_{com_name} as bytes"), + capa.features.common.StringFactory("389ea17b-5078-4cde-b6ef-25c15175c751", f"CLSID_{com_name} as GUID string"), + capa.features.common.Bytes(b"[\x94\x18\xe0\x86\xaa\x08@\x9b\xd4gw\xa1\xe4\x0c\x11", f"IID_{com_name} as bytes"), + capa.features.common.StringFactory("e018945b-aa86-4008-9bd4-6777a1e40c11", f"IID_{com_name} as GUID string"), + ] + assert set(com_features) == set(r.statement.get_children()) + + +def test_invalid_com_features(): + # test for unknown COM class + with pytest.raises(capa.rules.InvalidRule): + _ = capa.rules.Rule.from_yaml( + textwrap.dedent( + """ + rule: + meta: + name: test rule + features: + - com/class: invalid_com + """ + ) + ) + + # test for unknown COM interface + with pytest.raises(capa.rules.InvalidRule): + _ = capa.rules.Rule.from_yaml( + textwrap.dedent( + """ + rule: + meta: + name: test rule + features: + - com/interface: invalid_com + """ + ) + ) + + # test for invalid COM type + # valid_com_types = "class", "interface" + with pytest.raises(capa.rules.InvalidRule): + _ = capa.rules.Rule.from_yaml( + textwrap.dedent( + """ + rule: + meta: + name: test rule + features: + - com/invalid_COM_type: WICPngDecoder + """ + ) + )