diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 969443e8c3..7d706d6e53 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -57,7 +57,7 @@ When we make a significant decision in how we maintain the project and what we c we will document it in the [capa issues tracker](https://github.com/mandiant/capa/issues). This is the best place review our discussions about what/how/why we do things in the project. If you have a question, check to see if it is documented there. -If it is *not* documented there, or you can't find an answer, please open a issue. +If it is *not* documented there, or you can't find an answer, please open an issue. We'll link to existing issues when appropriate to keep discussions in one place. ## How Can I Contribute? diff --git a/.github/pyinstaller/hooks/hook-vivisect.py b/.github/pyinstaller/hooks/hook-vivisect.py index 8038b71461..680ba380e8 100644 --- a/.github/pyinstaller/hooks/hook-vivisect.py +++ b/.github/pyinstaller/hooks/hook-vivisect.py @@ -24,7 +24,7 @@ "pyqtwebengine", # the above are imported by these viv modules. # so really, we'd want to exclude these submodules of viv. - # but i dont think this works. + # but i don't think this works. "vqt", "vdb.qt", "envi.qt", diff --git a/README.md b/README.md index 9c387cdcb5..a50c90a250 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ function @ 0x4011C0 ... ``` -Additionally, capa also supports analyzing [CAPE](https://github.com/kevoreilly/CAPEv2) sandbox reports for dynamic capabilty extraction. +Additionally, capa also supports analyzing [CAPE](https://github.com/kevoreilly/CAPEv2) sandbox reports for dynamic capability extraction. In order to use this, you first submit your sample to CAPE for analysis, and then run capa against the generated report (JSON). Here's an example of running capa against a packed binary, and then running capa against the CAPE report of that binary: diff --git a/capa/capabilities/dynamic.py b/capa/capabilities/dynamic.py index 23bfde4acb..9cc8e2f455 100644 --- a/capa/capabilities/dynamic.py +++ b/capa/capabilities/dynamic.py @@ -65,7 +65,7 @@ def find_thread_capabilities( features: FeatureSet = collections.defaultdict(set) # matches found at the call scope. - # might be found at different calls, thats ok. + # might be found at different calls, that's ok. call_matches: MatchResults = collections.defaultdict(list) for ch in extractor.get_calls(ph, th): @@ -103,11 +103,11 @@ def find_process_capabilities( process_features: FeatureSet = collections.defaultdict(set) # matches found at the basic threads. - # might be found at different threads, thats ok. + # might be found at different threads, that's ok. thread_matches: MatchResults = collections.defaultdict(list) # matches found at the call scope. - # might be found at different calls, thats ok. + # might be found at different calls, that's ok. call_matches: MatchResults = collections.defaultdict(list) for th in extractor.get_threads(ph): diff --git a/capa/capabilities/static.py b/capa/capabilities/static.py index a522a29da8..8b213fdb67 100644 --- a/capa/capabilities/static.py +++ b/capa/capabilities/static.py @@ -66,7 +66,7 @@ def find_basic_block_capabilities( features: FeatureSet = collections.defaultdict(set) # matches found at the instruction scope. - # might be found at different instructions, thats ok. + # might be found at different instructions, that's ok. insn_matches: MatchResults = collections.defaultdict(list) for insn in extractor.get_instructions(f, bb): @@ -106,11 +106,11 @@ def find_code_capabilities( function_features: FeatureSet = collections.defaultdict(set) # matches found at the basic block scope. - # might be found at different basic blocks, thats ok. + # might be found at different basic blocks, that's ok. bb_matches: MatchResults = collections.defaultdict(list) # matches found at the instruction scope. - # might be found at different instructions, thats ok. + # might be found at different instructions, that's ok. insn_matches: MatchResults = collections.defaultdict(list) for bb in extractor.get_basic_blocks(fh): diff --git a/capa/features/address.py b/capa/features/address.py index 0edf4cec24..04906ac318 100644 --- a/capa/features/address.py +++ b/capa/features/address.py @@ -93,7 +93,7 @@ def __lt__(self, other): class DynamicCallAddress(Address): - """addesses a call in a dynamic execution trace""" + """addresses a call in a dynamic execution trace""" def __init__(self, thread: ThreadAddress, id: int): assert id >= 0 diff --git a/capa/features/common.py b/capa/features/common.py index b6527625f2..a046282535 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -227,7 +227,7 @@ def evaluate(self, ctx, short_circuit=True): if self.value in feature.value: matches[feature.value].update(locations) if short_circuit: - # we found one matching string, thats sufficient to match. + # we found one matching string, that's sufficient to match. # don't collect other matching strings in this mode. break @@ -322,7 +322,7 @@ def evaluate(self, ctx, short_circuit=True): if self.re.search(feature.value): matches[feature.value].update(locations) if short_circuit: - # we found one matching string, thats sufficient to match. + # we found one matching string, that's sufficient to match. # don't collect other matching strings in this mode. break diff --git a/capa/features/extractors/base_extractor.py b/capa/features/extractors/base_extractor.py index 6252d74700..57490a1b16 100644 --- a/capa/features/extractors/base_extractor.py +++ b/capa/features/extractors/base_extractor.py @@ -75,7 +75,7 @@ class BBHandle: @dataclass class InsnHandle: - """reference to a instruction recognized by a feature extractor. + """reference to an instruction recognized by a feature extractor. Attributes: address: the address of the instruction address. diff --git a/capa/features/extractors/cape/models.py b/capa/features/extractors/cape/models.py index 79db9272d4..c90a31b5cd 100644 --- a/capa/features/extractors/cape/models.py +++ b/capa/features/extractors/cape/models.py @@ -46,7 +46,7 @@ class FlexibleModel(BaseModel): # use this type to indicate that we won't model this data. -# because its not relevant to our use in capa. +# because it's not relevant to our use in capa. # # while its nice to have full coverage of the data shape, # it can easily change and break our parsing. @@ -356,8 +356,8 @@ class Behavior(ExactModel): anomaly: List[str] encryptedbuffers: List[EncryptedBuffer] # these are small objects that describe atomic events, - # like file move, registery access. - # we'll detect the same with our API call analyis. + # like file move, registry access. + # we'll detect the same with our API call analysis. enhanced: Skip = None diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 1e50ca2f72..2b38392790 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -206,7 +206,7 @@ def _parse(self): 15: OS.AROS, 16: OS.FENIXOS, 17: OS.CLOUD, - # 53: "SORTFIX", # i can't find any reference to this OS, i dont think it exists + # 53: "SORTFIX", # i can't find any reference to this OS, i don't think it exists # 64: "ARM_AEABI", # not an OS # 97: "ARM", # not an OS # 255: "STANDALONE", # not an OS diff --git a/capa/features/extractors/ghidra/helpers.py b/capa/features/extractors/ghidra/helpers.py index e6bee6643c..22e0ed6d42 100644 --- a/capa/features/extractors/ghidra/helpers.py +++ b/capa/features/extractors/ghidra/helpers.py @@ -260,7 +260,7 @@ def dereference_ptr(insn: ghidra.program.database.code.InstructionDB): if thfunc and thfunc.isThunk(): return handle_thunk(to_deref) else: - # if it doesn't poin to a thunk, it's usually a jmp to a label + # if it doesn't point to a thunk, it's usually a jmp to a label return to_deref if not dat: return to_deref diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 7b88dd2de0..e497c82831 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -113,7 +113,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato if f.vw.metadata["Format"] == "elf": if "symtab" not in fh.ctx["cache"]: # the symbol table gets stored as a function's attribute in order to avoid running - # this code everytime the call is made, thus preventing the computational overhead. + # this code every time the call is made, thus preventing the computational overhead. try: fh.ctx["cache"]["symtab"] = SymTab.from_viv(f.vw.parsedbin) except Exception: @@ -598,7 +598,7 @@ def extract_op_number_features( if f.vw.probeMemory(v, 1, envi.memory.MM_READ): # this is a valid address - # assume its not also a constant. + # assume it's not also a constant. return if insn.mnem == "add" and insn.opers[0].isReg() and insn.opers[0].reg == envi.archs.i386.regs.REG_ESP: diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index 44d40cd192..3b826c124f 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -382,7 +382,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: address=Address.from_capa(addr), feature=feature_from_capa(feature), ) # type: ignore - # Mypy is unable to recognise `basic_block` as a argument due to alias + # Mypy is unable to recognise `basic_block` as an argument due to alias for feature, addr in extractor.extract_basic_block_features(f, bb) ] @@ -419,7 +419,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: features=tuple(ffeatures), basic_blocks=basic_blocks, ) # type: ignore - # Mypy is unable to recognise `basic_blocks` as a argument due to alias + # Mypy is unable to recognise `basic_blocks` as an argument due to alias ) features = StaticFeatures( @@ -427,7 +427,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: file=tuple(file_features), functions=tuple(function_features), ) # type: ignore - # Mypy is unable to recognise `global_` as a argument due to alias + # Mypy is unable to recognise `global_` as an argument due to alias freeze = Freeze( version=CURRENT_VERSION, @@ -437,7 +437,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: extractor=Extractor(name=extractor.__class__.__name__), features=features, ) # type: ignore - # Mypy is unable to recognise `base_address` as a argument due to alias + # Mypy is unable to recognise `base_address` as an argument due to alias return freeze.model_dump_json() diff --git a/capa/features/freeze/features.py b/capa/features/freeze/features.py index dd0b1f2ff8..70cf026fc6 100644 --- a/capa/features/freeze/features.py +++ b/capa/features/freeze/features.py @@ -132,7 +132,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": elif isinstance(f, capa.features.file.Import): assert isinstance(f.value, str) return ImportFeature(import_=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `import_` as a argument due to alias + # Mypy is unable to recognise `import_` as an argument due to alias elif isinstance(f, capa.features.file.Section): assert isinstance(f.value, str) @@ -141,7 +141,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": elif isinstance(f, capa.features.file.FunctionName): assert isinstance(f.value, str) return FunctionNameFeature(function_name=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `function_name` as a argument due to alias + # Mypy is unable to recognise `function_name` as an argument due to alias # must come before check for String due to inheritance elif isinstance(f, capa.features.common.Substring): @@ -160,7 +160,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": elif isinstance(f, capa.features.common.Class): assert isinstance(f.value, str) return ClassFeature(class_=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `class_` as a argument due to alias + # Mypy is unable to recognise `class_` as an argument due to alias elif isinstance(f, capa.features.common.Namespace): assert isinstance(f.value, str) @@ -197,12 +197,12 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": elif isinstance(f, capa.features.insn.OperandNumber): assert isinstance(f.value, int) return OperandNumberFeature(index=f.index, operand_number=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `operand_number` as a argument due to alias + # Mypy is unable to recognise `operand_number` as an argument due to alias elif isinstance(f, capa.features.insn.OperandOffset): assert isinstance(f.value, int) return OperandOffsetFeature(index=f.index, operand_offset=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `operand_offset` as a argument due to alias + # Mypy is unable to recognise `operand_offset` as an argument due to alias else: raise NotImplementedError(f"feature_from_capa({type(f)}) not implemented") diff --git a/capa/ghidra/README.md b/capa/ghidra/README.md index f4b80edce3..050d3a38e7 100644 --- a/capa/ghidra/README.md +++ b/capa/ghidra/README.md @@ -24,7 +24,7 @@ Comments are added at the beginning of matched functions indicating matched capa ### Bookmarks -Bookmarks are added to functions that matched a capabilitiy that is mapped to a MITRE ATT&CK and/or Malware Behavior Catalog (MBC) technique. You can view these bookmarks in Ghidra's Bookmarks window. +Bookmarks are added to functions that matched a capability that is mapped to a MITRE ATT&CK and/or Malware Behavior Catalog (MBC) technique. You can view these bookmarks in Ghidra's Bookmarks window.
diff --git a/capa/ida/plugin/view.py b/capa/ida/plugin/view.py index bbb8287a25..6b37d6ac22 100644 --- a/capa/ida/plugin/view.py +++ b/capa/ida/plugin/view.py @@ -764,7 +764,7 @@ def load_features_from_yaml(self, rule_text, update_preview=False): node = self.make_child_node_from_feature(parent, parse_yaml_line(line.strip())) - # append our new node in case its a parent for another node + # append our new node in case it's a parent for another node if node: stack.append(node) diff --git a/capa/main.py b/capa/main.py index 16d6d3cbab..132170f436 100644 --- a/capa/main.py +++ b/capa/main.py @@ -246,7 +246,7 @@ def install_common_args(parser, wanted=None): if "backend" in wanted: backends = [ - (BACKEND_AUTO, "(default) detect apppropriate backend automatically"), + (BACKEND_AUTO, "(default) detect appropriate backend automatically"), (BACKEND_VIV, "vivisect"), (BACKEND_PEFILE, "pefile (file features only)"), (BACKEND_BINJA, "Binary Ninja"), @@ -317,7 +317,7 @@ def install_common_args(parser, wanted=None): # Library code should *not* call these functions. # # These main routines may raise `ShouldExitError` to indicate the program -# ...should exit. Its a tiny step away from doing `sys.exit()` directly. +# ...should exit. It's a tiny step away from doing `sys.exit()` directly. # I'm not sure if we should just do that. In the meantime, programs should # handle `ShouldExitError` and pass the status code to `sys.exit()`. # diff --git a/capa/optimizer.py b/capa/optimizer.py index 0ebee84445..9514a32dae 100644 --- a/capa/optimizer.py +++ b/capa/optimizer.py @@ -44,7 +44,7 @@ def get_node_cost(node): else: # this should be all hash-lookup features. - # we give this a arbitrary weight of 1. + # we give this an arbitrary weight of 1. # the only thing more "important" than this is checking OS/Arch/Format. return 1 diff --git a/capa/render/result_document.py b/capa/render/result_document.py index ce95245e02..c637cd256e 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -306,7 +306,7 @@ class Match(FrozenModel): args: success: did the node match? node: the logic node or feature node. - children: any children of the logic node. not relevent for features, can be empty. + children: any children of the logic node. not relevant for features, can be empty. locations: where the feature matched. not relevant for logic nodes (except range), can be empty. captures: captured values from the string/regex feature, and the locations of those values. """ @@ -418,7 +418,7 @@ def from_capa( # doc[locations] contains all matches for the given namespace. # for example, the feature might be `match: anti-analysis/packer` # which matches against "generic unpacker" and "UPX". - # in this case, doc[locations] contains locations for *both* of thse. + # in this case, doc[locations] contains locations for *both* of those. # # rule_matches contains the matches for the specific rule. # this is a subset of doc[locations]. @@ -459,7 +459,7 @@ def to_capa(self, rules_by_name: Dict[str, capa.rules.Rule]) -> capa.engine.Resu # # children contains a single tree of results, corresponding to the logic of the matched rule. # self.node.feature.match contains the name of the rule that was matched. - # so its all available to reconstruct, if necessary. + # so it's all available to reconstruct, if necessary. return capa.features.common.Result( success=self.success, diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 3498d24b84..19cd3d94cb 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -48,7 +48,7 @@ def hanging_indent(s: str, indent: int) -> str: def render_locations(ostream, layout: rd.Layout, locations: Iterable[frz.Address], indent: int): import capa.render.verbose as v - # its possible to have an empty locations array here, + # it's possible to have an empty locations array here, # such as when we're in MODE_FAILURE and showing the logic # under a `not` statement (which will have no matched locations). locations = sorted(locations) diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index 530c8424c6..68f3b0580e 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -867,14 +867,14 @@ def rec(statement): # we'll give precedence to namespaces, and then assume if that does work, # that it must be a rule name. # - # we don't expect any collisions between namespaces and rule names, but its possible. + # we don't expect any collisions between namespaces and rule names, but it's possible. # most likely would be collision between top level namespace (e.g. `host-interaction`) and rule name. # but, namespaces tend to use `-` while rule names use ` `. so, unlikely, but possible. if statement.value in namespaces: # matches a namespace, so take precedence and don't even check rule names. deps.update(r.name for r in namespaces[statement.value]) else: - # not a namespace, assume its a rule name. + # not a namespace, assume it's a rule name. assert isinstance(statement.value, str) deps.add(statement.value) @@ -934,7 +934,7 @@ def _extract_subscope_rules_rec(self, statement): # now recurse to other nodes in the logic tree. # note: we cannot recurse into the subscope sub-tree, - # because its been replaced by a `match` statement. + # because it's been replaced by a `match` statement. for child in statement.get_children(): yield from self._extract_subscope_rules_rec(child) @@ -1224,7 +1224,7 @@ def get_rules_and_dependencies(rules: List[Rule], rule_name: str) -> Iterator[Ru """ from the given collection of rules, select a rule and its dependencies (transitively). """ - # we evaluate `rules` multiple times, so if its a generator, realize it into a list. + # we evaluate `rules` multiple times, so if it's a generator, realize it into a list. rules = list(rules) namespaces = index_rules_by_namespace(rules) rules_by_name = {rule.name: rule for rule in rules} @@ -1257,7 +1257,7 @@ def ensure_rule_dependencies_are_met(rules: List[Rule]) -> None: raises: InvalidRule: if a dependency is not met. """ - # we evaluate `rules` multiple times, so if its a generator, realize it into a list. + # we evaluate `rules` multiple times, so if it's a generator, realize it into a list. rules = list(rules) namespaces = index_rules_by_namespace(rules) rules_by_name = {rule.name: rule for rule in rules} @@ -1304,7 +1304,7 @@ def topologically_order_rules(rules: List[Rule]) -> List[Rule]: assumes that the rule dependency graph is a DAG. """ - # we evaluate `rules` multiple times, so if its a generator, realize it into a list. + # we evaluate `rules` multiple times, so if it's a generator, realize it into a list. rules = list(rules) namespaces = index_rules_by_namespace(rules) rules_by_name = {rule.name: rule for rule in rules} @@ -1465,7 +1465,7 @@ def rec(rule_name: str, node: Union[Feature, Statement]): # # they're global, so if they match at one location in a file, # they'll match at every location in a file. - # so thats not helpful to decide how to downselect. + # so that's not helpful to decide how to downselect. # # and, a global rule will never be the sole selector in a rule. pass @@ -1535,10 +1535,10 @@ def rec(rule_name: str, node: Union[Feature, Statement]): rec(rule_name, root) # if a rule has a hard feature, - # dont consider it easy, and therefore, + # don't consider it easy, and therefore, # don't index any of its features. # - # otherwise, its an easy rule, and index its features + # otherwise, it's an easy rule, and index its features for rules_with_feature in rules_by_feature.values(): rules_with_feature.difference_update(rules_with_hard_features) easy_rules_by_feature = rules_by_feature diff --git a/rules b/rules index ce3e6d74b1..e0d5e95a82 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit ce3e6d74b1526bacd370d1c4001ff844876e3edc +Subproject commit e0d5e95a82375f887e1d4682aefdcf39f963d2c2 diff --git a/scripts/capa2yara.py b/scripts/capa2yara.py index b1adb3625d..9badf2cc93 100644 --- a/scripts/capa2yara.py +++ b/scripts/capa2yara.py @@ -194,7 +194,7 @@ def do_statement(s_type, kid): yara_strings += "\t$" + var_name + ' = "' + string + '" ascii wide' + convert_description(kid) + "\n" yara_condition += "\t$" + var_name + " " elif s_type == "api" or s_type == "import": - # research needed to decide if its possible in YARA to make a difference between api & import? + # research needed to decide if it's possible in YARA to make a difference between api & import? # https://github.com/mandiant/capa-rules/blob/master/doc/format.md#api api = kid.value @@ -289,7 +289,7 @@ def do_statement(s_type, kid): logger.info("too short for byte search (until I figure out how to do it properly): %r", number) return "BREAK", "Number too short" - # there's just one rule which contains 0xFFFFFFF but yara gives a warning if if used + # there's just one rule which contains 0xFFFFFFF but yara gives a warning if used if number == "0xFFFFFFFF": return "BREAK", "slow byte pattern for YARA search" diff --git a/tests/fixtures.py b/tests/fixtures.py index ce21d7db1e..3b36b77603 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -775,7 +775,7 @@ def parametrize(params, values, **kwargs): ("kernel32", "file", capa.features.file.Export("lstrlenW"), True), ("kernel32", "file", capa.features.file.Export("nope"), False), # forwarded export - ("ea2876", "file", capa.features.file.Export("vresion.GetFileVersionInfoA"), True), + ("ea2876", "file", capa.features.file.Export("version.GetFileVersionInfoA"), True), # file/imports ("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), True), ("mimikatz", "file", capa.features.file.Import("CryptSetHashParam"), True), diff --git a/tests/test_ghidra_features.py b/tests/test_ghidra_features.py index dc51aef452..595cf4dc01 100644 --- a/tests/test_ghidra_features.py +++ b/tests/test_ghidra_features.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. """ -Must invoke this script from within the Ghidra Runtime Enviornment +Must invoke this script from within the Ghidra Runtime Environment """ import sys import logging