Skip to content

Commit

Permalink
Merge pull request #142 from intezer/fix/linux-offline-scan-TKT-4432
Browse files Browse the repository at this point in the history
fix(linux-offline-scan) TKT-4432 handle offline scans with large json files
  • Loading branch information
itamarga authored Jul 16, 2024
2 parents 11435be + 1c75692 commit 246893c
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 15 deletions.
4 changes: 4 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
1.21.1
_______
- Fix handling of large offline endpoint scan info files

1.21
_______
- Add download endpoint scan
Expand Down
2 changes: 1 addition & 1 deletion intezer_sdk/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.21'
__version__ = '1.21.1'
31 changes: 22 additions & 9 deletions intezer_sdk/_endpoint_analysis_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,15 @@ def send_processes_info(self, processes_info: dict):
raise_for_status(response)

def send_all_loaded_modules_info(self, all_loaded_modules_info: dict):
response = self.request_with_refresh_expired_access_token(path=f'/processes/loaded-modules-info',
data=all_loaded_modules_info,
method='POST')
raise_for_status(response)
processes = all_loaded_modules_info.get('processes')
# Analyze schema limits to 2000 items per request
if processes:
for i in range(0, len(processes), 2000):
data = {'processes': processes[i:i+2000]}
response = self.request_with_refresh_expired_access_token(path=f'/processes/loaded-modules-info',
data=data,
method='POST')
raise_for_status(response)

def send_loaded_modules_info(self, pid, loaded_modules_info: dict):
response = self.request_with_refresh_expired_access_token(path=f'/processes/{pid}/loaded-modules-info',
Expand Down Expand Up @@ -68,11 +73,19 @@ def send_files_info(self, files_info: dict) -> List[str]:
:param files_info: endpoint scan files info
:return: list of file hashes to upload
"""
response = self.request_with_refresh_expired_access_token(path='/files-info',
data=files_info,
method='POST')
raise_for_status(response)
return response.json()['result']
# Analyze schema limits to 2000 items per request
files_to_upload = []
files_info = files_info.get('files_info')
if files_info:
for i in range(0, len(files_info), 2000):
data = {'files_info': files_info[i:i+2000]}
response = self.request_with_refresh_expired_access_token(path='/files-info',
data=data,
method='POST')
raise_for_status(response)
files_to_upload.extend(response.json()['result'])

return files_to_upload

def send_memory_module_dump_info(self, memory_modules_info: dict) -> List[str]:
"""
Expand Down
10 changes: 5 additions & 5 deletions intezer_sdk/endpoint_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from intezer_sdk.consts import EndpointAnalysisEndReason
from intezer_sdk.consts import SCAN_DEFAULT_MAX_WORKERS
from intezer_sdk.sub_analysis import SubAnalysis

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -218,24 +217,25 @@ def _send_loaded_modules_info(self):

def _send_files_info_and_upload_required(self):
logger.info(f'Endpoint analysis: {self.analysis_id}, uploading files info and uploading required files')
tasks = []
for files_info_file in glob.glob(os.path.join(self._offline_scan_directory, 'files_info_*.json')):
logger.debug(f'Endpoint analysis: {self.analysis_id}, uploading {files_info_file}')
with open(files_info_file, encoding='utf-8') as f:
files_info = json.load(f)
files_to_upload = self._scan_api.send_files_info(files_info)

futures = []
with concurrent.futures.ThreadPoolExecutor(self.max_workers) as executor:
for file_to_upload in files_to_upload:
file_path = os.path.join(self._files_dir, f'{file_to_upload}.sample')
if os.path.isfile(file_path):
futures.append(executor.submit(self._scan_api.upload_collected_binary,
tasks.append(executor.submit(self._scan_api.upload_collected_binary,
file_path,
'file-system'))
else:
logger.warning(f'Endpoint analysis: {self.analysis_id}, file {file_path} does not exist')
for future in concurrent.futures.as_completed(futures):
future.result()

for future in concurrent.futures.as_completed(tasks):
future.result()

def _send_module_differences(self):
file_module_differences_file_path = os.path.join(self._offline_scan_directory, 'file_module_differences.json')
Expand Down

0 comments on commit 246893c

Please sign in to comment.