diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 969443e8c3..7d706d6e53 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -57,7 +57,7 @@ When we make a significant decision in how we maintain the project and what we c we will document it in the [capa issues tracker](https://github.com/mandiant/capa/issues). This is the best place review our discussions about what/how/why we do things in the project. If you have a question, check to see if it is documented there. -If it is *not* documented there, or you can't find an answer, please open a issue. +If it is *not* documented there, or you can't find an answer, please open an issue. We'll link to existing issues when appropriate to keep discussions in one place. ## How Can I Contribute? diff --git a/.github/pyinstaller/hooks/hook-vivisect.py b/.github/pyinstaller/hooks/hook-vivisect.py index 8038b71461..680ba380e8 100644 --- a/.github/pyinstaller/hooks/hook-vivisect.py +++ b/.github/pyinstaller/hooks/hook-vivisect.py @@ -24,7 +24,7 @@ "pyqtwebengine", # the above are imported by these viv modules. # so really, we'd want to exclude these submodules of viv. - # but i dont think this works. + # but i don't think this works. "vqt", "vdb.qt", "envi.qt", diff --git a/README.md b/README.md index 9c387cdcb5..a50c90a250 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ function @ 0x4011C0 ... ``` -Additionally, capa also supports analyzing [CAPE](https://github.com/kevoreilly/CAPEv2) sandbox reports for dynamic capabilty extraction. +Additionally, capa also supports analyzing [CAPE](https://github.com/kevoreilly/CAPEv2) sandbox reports for dynamic capability extraction. In order to use this, you first submit your sample to CAPE for analysis, and then run capa against the generated report (JSON). Here's an example of running capa against a packed binary, and then running capa against the CAPE report of that binary: diff --git a/capa/capabilities/dynamic.py b/capa/capabilities/dynamic.py index 23bfde4acb..9cc8e2f455 100644 --- a/capa/capabilities/dynamic.py +++ b/capa/capabilities/dynamic.py @@ -65,7 +65,7 @@ def find_thread_capabilities( features: FeatureSet = collections.defaultdict(set) # matches found at the call scope. - # might be found at different calls, thats ok. + # might be found at different calls, that's ok. call_matches: MatchResults = collections.defaultdict(list) for ch in extractor.get_calls(ph, th): @@ -103,11 +103,11 @@ def find_process_capabilities( process_features: FeatureSet = collections.defaultdict(set) # matches found at the basic threads. - # might be found at different threads, thats ok. + # might be found at different threads, that's ok. thread_matches: MatchResults = collections.defaultdict(list) # matches found at the call scope. - # might be found at different calls, thats ok. + # might be found at different calls, that's ok. call_matches: MatchResults = collections.defaultdict(list) for th in extractor.get_threads(ph): diff --git a/capa/capabilities/static.py b/capa/capabilities/static.py index a522a29da8..8b213fdb67 100644 --- a/capa/capabilities/static.py +++ b/capa/capabilities/static.py @@ -66,7 +66,7 @@ def find_basic_block_capabilities( features: FeatureSet = collections.defaultdict(set) # matches found at the instruction scope. - # might be found at different instructions, thats ok. + # might be found at different instructions, that's ok. insn_matches: MatchResults = collections.defaultdict(list) for insn in extractor.get_instructions(f, bb): @@ -106,11 +106,11 @@ def find_code_capabilities( function_features: FeatureSet = collections.defaultdict(set) # matches found at the basic block scope. - # might be found at different basic blocks, thats ok. + # might be found at different basic blocks, that's ok. bb_matches: MatchResults = collections.defaultdict(list) # matches found at the instruction scope. - # might be found at different instructions, thats ok. + # might be found at different instructions, that's ok. insn_matches: MatchResults = collections.defaultdict(list) for bb in extractor.get_basic_blocks(fh): diff --git a/capa/features/address.py b/capa/features/address.py index 0edf4cec24..04906ac318 100644 --- a/capa/features/address.py +++ b/capa/features/address.py @@ -93,7 +93,7 @@ def __lt__(self, other): class DynamicCallAddress(Address): - """addesses a call in a dynamic execution trace""" + """addresses a call in a dynamic execution trace""" def __init__(self, thread: ThreadAddress, id: int): assert id >= 0 diff --git a/capa/features/common.py b/capa/features/common.py index b6527625f2..a046282535 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -227,7 +227,7 @@ def evaluate(self, ctx, short_circuit=True): if self.value in feature.value: matches[feature.value].update(locations) if short_circuit: - # we found one matching string, thats sufficient to match. + # we found one matching string, that's sufficient to match. # don't collect other matching strings in this mode. break @@ -322,7 +322,7 @@ def evaluate(self, ctx, short_circuit=True): if self.re.search(feature.value): matches[feature.value].update(locations) if short_circuit: - # we found one matching string, thats sufficient to match. + # we found one matching string, that's sufficient to match. # don't collect other matching strings in this mode. break diff --git a/capa/features/extractors/base_extractor.py b/capa/features/extractors/base_extractor.py index 6252d74700..57490a1b16 100644 --- a/capa/features/extractors/base_extractor.py +++ b/capa/features/extractors/base_extractor.py @@ -75,7 +75,7 @@ class BBHandle: @dataclass class InsnHandle: - """reference to a instruction recognized by a feature extractor. + """reference to an instruction recognized by a feature extractor. Attributes: address: the address of the instruction address. diff --git a/capa/features/extractors/cape/models.py b/capa/features/extractors/cape/models.py index 79db9272d4..c90a31b5cd 100644 --- a/capa/features/extractors/cape/models.py +++ b/capa/features/extractors/cape/models.py @@ -46,7 +46,7 @@ class FlexibleModel(BaseModel): # use this type to indicate that we won't model this data. -# because its not relevant to our use in capa. +# because it's not relevant to our use in capa. # # while its nice to have full coverage of the data shape, # it can easily change and break our parsing. @@ -356,8 +356,8 @@ class Behavior(ExactModel): anomaly: List[str] encryptedbuffers: List[EncryptedBuffer] # these are small objects that describe atomic events, - # like file move, registery access. - # we'll detect the same with our API call analyis. + # like file move, registry access. + # we'll detect the same with our API call analysis. enhanced: Skip = None diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 1e50ca2f72..2b38392790 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -206,7 +206,7 @@ def _parse(self): 15: OS.AROS, 16: OS.FENIXOS, 17: OS.CLOUD, - # 53: "SORTFIX", # i can't find any reference to this OS, i dont think it exists + # 53: "SORTFIX", # i can't find any reference to this OS, i don't think it exists # 64: "ARM_AEABI", # not an OS # 97: "ARM", # not an OS # 255: "STANDALONE", # not an OS diff --git a/capa/features/extractors/ghidra/helpers.py b/capa/features/extractors/ghidra/helpers.py index e6bee6643c..22e0ed6d42 100644 --- a/capa/features/extractors/ghidra/helpers.py +++ b/capa/features/extractors/ghidra/helpers.py @@ -260,7 +260,7 @@ def dereference_ptr(insn: ghidra.program.database.code.InstructionDB): if thfunc and thfunc.isThunk(): return handle_thunk(to_deref) else: - # if it doesn't poin to a thunk, it's usually a jmp to a label + # if it doesn't point to a thunk, it's usually a jmp to a label return to_deref if not dat: return to_deref diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 7b88dd2de0..e497c82831 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -113,7 +113,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterato if f.vw.metadata["Format"] == "elf": if "symtab" not in fh.ctx["cache"]: # the symbol table gets stored as a function's attribute in order to avoid running - # this code everytime the call is made, thus preventing the computational overhead. + # this code every time the call is made, thus preventing the computational overhead. try: fh.ctx["cache"]["symtab"] = SymTab.from_viv(f.vw.parsedbin) except Exception: @@ -598,7 +598,7 @@ def extract_op_number_features( if f.vw.probeMemory(v, 1, envi.memory.MM_READ): # this is a valid address - # assume its not also a constant. + # assume it's not also a constant. return if insn.mnem == "add" and insn.opers[0].isReg() and insn.opers[0].reg == envi.archs.i386.regs.REG_ESP: diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index 44d40cd192..3b826c124f 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -382,7 +382,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: address=Address.from_capa(addr), feature=feature_from_capa(feature), ) # type: ignore - # Mypy is unable to recognise `basic_block` as a argument due to alias + # Mypy is unable to recognise `basic_block` as an argument due to alias for feature, addr in extractor.extract_basic_block_features(f, bb) ] @@ -419,7 +419,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: features=tuple(ffeatures), basic_blocks=basic_blocks, ) # type: ignore - # Mypy is unable to recognise `basic_blocks` as a argument due to alias + # Mypy is unable to recognise `basic_blocks` as an argument due to alias ) features = StaticFeatures( @@ -427,7 +427,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: file=tuple(file_features), functions=tuple(function_features), ) # type: ignore - # Mypy is unable to recognise `global_` as a argument due to alias + # Mypy is unable to recognise `global_` as an argument due to alias freeze = Freeze( version=CURRENT_VERSION, @@ -437,7 +437,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: extractor=Extractor(name=extractor.__class__.__name__), features=features, ) # type: ignore - # Mypy is unable to recognise `base_address` as a argument due to alias + # Mypy is unable to recognise `base_address` as an argument due to alias return freeze.model_dump_json() diff --git a/capa/features/freeze/features.py b/capa/features/freeze/features.py index dd0b1f2ff8..70cf026fc6 100644 --- a/capa/features/freeze/features.py +++ b/capa/features/freeze/features.py @@ -132,7 +132,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": elif isinstance(f, capa.features.file.Import): assert isinstance(f.value, str) return ImportFeature(import_=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `import_` as a argument due to alias + # Mypy is unable to recognise `import_` as an argument due to alias elif isinstance(f, capa.features.file.Section): assert isinstance(f.value, str) @@ -141,7 +141,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": elif isinstance(f, capa.features.file.FunctionName): assert isinstance(f.value, str) return FunctionNameFeature(function_name=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `function_name` as a argument due to alias + # Mypy is unable to recognise `function_name` as an argument due to alias # must come before check for String due to inheritance elif isinstance(f, capa.features.common.Substring): @@ -160,7 +160,7 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": elif isinstance(f, capa.features.common.Class): assert isinstance(f.value, str) return ClassFeature(class_=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `class_` as a argument due to alias + # Mypy is unable to recognise `class_` as an argument due to alias elif isinstance(f, capa.features.common.Namespace): assert isinstance(f.value, str) @@ -197,12 +197,12 @@ def feature_from_capa(f: capa.features.common.Feature) -> "Feature": elif isinstance(f, capa.features.insn.OperandNumber): assert isinstance(f.value, int) return OperandNumberFeature(index=f.index, operand_number=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `operand_number` as a argument due to alias + # Mypy is unable to recognise `operand_number` as an argument due to alias elif isinstance(f, capa.features.insn.OperandOffset): assert isinstance(f.value, int) return OperandOffsetFeature(index=f.index, operand_offset=f.value, description=f.description) # type: ignore - # Mypy is unable to recognise `operand_offset` as a argument due to alias + # Mypy is unable to recognise `operand_offset` as an argument due to alias else: raise NotImplementedError(f"feature_from_capa({type(f)}) not implemented") diff --git a/capa/ghidra/README.md b/capa/ghidra/README.md index f4b80edce3..050d3a38e7 100644 --- a/capa/ghidra/README.md +++ b/capa/ghidra/README.md @@ -24,7 +24,7 @@ Comments are added at the beginning of matched functions indicating matched capa ### Bookmarks -Bookmarks are added to functions that matched a capabilitiy that is mapped to a MITRE ATT&CK and/or Malware Behavior Catalog (MBC) technique. You can view these bookmarks in Ghidra's Bookmarks window. +Bookmarks are added to functions that matched a capability that is mapped to a MITRE ATT&CK and/or Malware Behavior Catalog (MBC) technique. You can view these bookmarks in Ghidra's Bookmarks window.