diff --git a/extract_embedded_service/DEPENDENCIES b/extract_embedded_service/DEPENDENCIES new file mode 100644 index 00000000..8ba69b3f --- /dev/null +++ b/extract_embedded_service/DEPENDENCIES @@ -0,0 +1,3 @@ +Dependencies for extract_embedded (is/are): +clamscan - you need clamscan binary in /usr/bin/ + diff --git a/extract_embedded_service/LICENSE b/extract_embedded_service/LICENSE new file mode 100644 index 00000000..8866226d --- /dev/null +++ b/extract_embedded_service/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2017, Lionel PRAT. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/extract_embedded_service/README b/extract_embedded_service/README new file mode 100644 index 00000000..03b53043 --- /dev/null +++ b/extract_embedded_service/README @@ -0,0 +1,11 @@ +Version 0.0.2 +------------- + +Correction of errors (rawdata, remove return in run). + +Version 0.0.1 +------------- + +This is an attempt at integrating a crits service with clamav for extract embedded file(s) (options --leave-temps --tempdir=tmp/). + + diff --git a/extract_embedded_service/__init__.py b/extract_embedded_service/__init__.py new file mode 100644 index 00000000..822b1326 --- /dev/null +++ b/extract_embedded_service/__init__.py @@ -0,0 +1,345 @@ +# (c) 2017, Lionel PRAT +# based on service pdf2txt of Adam Polkosnik && meta_office => Thank! +# use tool : https://github.com/lprat/static_analysis +# All rights reserved. +import logging +import hashlib +import shutil +import os +import tempfile +import re +from datetime import datetime +import subprocess +import json + +from django.template.loader import render_to_string + +from crits.core.user_tools import get_user_info +from crits.services.core import Service, ServiceConfigError +from crits.samples.handlers import handle_file +from crits.vocabulary.relationships import RelationshipTypes +from crits.vocabulary.acls import SampleACL +from crits.core.class_mapper import class_from_id +from django.conf import settings +from django.template.loader import render_to_string + +# for adding the extracted files +from crits.screenshots.handlers import add_screenshot + +#get info Sample object +from crits.samples.sample import Sample + +from crits.indicators.indicator import Indicator +from crits.indicators.handlers import handle_indicator_ind +from crits.vocabulary.acls import IndicatorACL +from crits.vocabulary.indicators import ( + IndicatorCI, + IndicatorAttackTypes, + IndicatorThreatTypes, + IndicatorTypes +) + +from . import forms + +logger = logging.getLogger(__name__) + + +class ExtractEmbeddedService(Service): + """ + Extract embedded files with clamscan. + """ + + name = "ExtractEmbedded" + version = '0.0.2' + #template = "extract_embedded_service_template.html" + supported_types = ['Sample'] + description = "Extract embedded files with clamscan." + + @staticmethod + def parse_config(config): + clamscan_path = config.get("clamscan_path", "") + if not clamscan_path: + raise ServiceConfigError("You must specify a valid path for clamscan.") + if not os.path.isfile(clamscan_path): + raise ServiceConfigError("clamscan path does not exist.") + if not os.access(clamscan_path, os.X_OK): + raise ServiceConfigError("clamscan is not executable.") + if not 'clamscan' in clamscan_path.lower(): + raise ServiceConfigError("Executable does not appear to be clamscan.") + analysis_path = config.get("analysis_path", "") + if not analysis_path: + raise ServiceConfigError("You must specify a valid path for analysis static tool.") + if not os.path.isfile(analysis_path): + raise ServiceConfigError("Analysis static tool path does not exist.") + pattern_path = config.get("pattern_path", "") + if not pattern_path: + raise ServiceConfigError("You must specify a valid path for pattern DB file.") + if not os.path.isfile(pattern_path): + raise ServiceConfigError("Pattern DB file path does not exist.") + yararules_path = config.get("yararules_path", "") + if not yararules_path: + raise ServiceConfigError("You must specify a valid path for yara rules.") + coef_path = config.get("coef_path", "") + if not coef_path: + raise ServiceConfigError("You must specify a valid path for coef configuration path.") + if not os.path.isfile(coef_path): + raise ServiceConfigError("Coef Configuration path does not exist.") + tlp_value = config.get("tlp_value", "") + + @staticmethod + def get_config(existing_config): + # Generate default config from form and initial values. + config = {} + fields = forms.ExtractEmbeddedConfigForm().fields + for name, field in fields.iteritems(): + config[name] = field.initial + + # If there is a config in the database, use values from that. + if existing_config: + for key, value in existing_config.iteritems(): + config[key] = value + return config + + @staticmethod + def get_config_details(config): + return {'clamscan_path': config['clamscan_path'], + 'analysis_path': config['analysis_path'], + 'pattern_path': config['pattern_path'], + 'yararules_path': config['yararules_path'], + 'coef_path': config['coef_path'], + 'tlp_value': config['tlp_value']} + + @classmethod + def generate_config_form(self, config): + html = render_to_string('services_config_form.html', + {'name': self.name, + 'form': forms.ExtractEmbeddedConfigForm(initial=config), + 'config_error': None}) + form = forms.ExtractEmbeddedConfigForm + return form, html + + @staticmethod + def valid_for(obj): + if not obj.filedata: + return False + #work for all types + return True + + @staticmethod + def save_runtime_config(config): + if config['debug_log']: + del config['debug_log'] + if config['import_file']: + del config['import_file'] + if config['import_file_ioc']: + del config['import_file_ioc'] + if config['import_file_yara']: + del config['import_file_yara'] + + @staticmethod + def bind_runtime_form(analyst, config): + if 'debug_log' not in config: + config['debug_log'] = False + if 'import_file' not in config: + config['import_file'] = False + if 'import_file_ioc' not in config: + config['import_file_ioc'] = False + if 'import_file_yara' not in config: + config['import_file_yara'] = False + form = forms.ExtractEmbeddedRunForm(data=config) + return form + + @classmethod + def generate_runtime_form(self, analyst, config, crits_type, identifier): + html = render_to_string("services_run_form.html", + {'name': self.name, + 'form': forms.ExtractEmbeddedRunForm(), + 'crits_type': crits_type, + 'identifier': identifier}) + return html + + def run(self, obj, config): + obj.filedata.seek(0) + data8 = obj.filedata.read(8) + obj.filedata.seek(0) + user = self.current_task.user + self.config = config + self.obj = obj + self._debug("ExtractEmbedded started") + tlp_value = self.config.get("tlp_value", "tlp_value") + clamscan_path = self.config.get("clamscan_path", os.path.dirname(os.path.realpath(__file__))+'/static_analysis/clamav-devel/clamscan/clamscan') + analysis_path = self.config.get("analysis_path", os.path.dirname(os.path.realpath(__file__))+'/static_analysis/analysis.py') + yararules_path = self.config.get("yararules_path", os.path.dirname(os.path.realpath(__file__))+'/static_analysis/yara_rules/') + pattern_path = self.config.get("pattern_path", os.path.dirname(os.path.realpath(__file__))+'/static_analysis/pattern.db') + coef_path = self.config.get("coef_path", os.path.dirname(os.path.realpath(__file__))+'/static_analysis/coef.conf') + #write out the sample stored in the db to a tmp file + with self._write_to_file() as tmp_file: + (working_dir, filename) = os.path.split(tmp_file) + new_env = dict(os.environ) # Copy current environment + args = [] + obj.filedata.seek(0) + #make temp file for get json result and graph + dirtmp = tempfile._get_default_tempdir() + file_png=dirtmp+'/'+next(tempfile._get_candidate_names())+'.png' + file_json=dirtmp+'/'+next(tempfile._get_candidate_names())+'.json' + if os.path.isfile(file_png) or os.path.isfile(file_json): + self._warning('Error: File temp exist.') + else: + #TODO add choice: verbose mode and extracted file emmbed + args = ['python', analysis_path, '-c', clamscan_path, '-g', '-y', yararules_path, '-p', pattern_path, '-f', tmp_file, '-m', coef_path, '-s', file_png, '-j', file_json] + if config['debug_log']: + args = ['python', analysis_path, '-c', clamscan_path, '-g', '-y', yararules_path, '-p', pattern_path, '-f', tmp_file, '-m', coef_path, '-s', file_png, '-j', file_json, '-v'] + #verify user can write sample + acl_write = user.has_access_to(SampleACL.WRITE) + if not acl_write: + self._info("User does not have permission to add Sample Data to CRITs") + # pdftotext does not generate a lot of output, so we should not have to + # worry about this hanging because the buffer is full + proc = subprocess.Popen(args, env=new_env, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, cwd=working_dir) + # Note that we are redirecting STDERR to STDOUT, so we can ignore + # the second element of the tuple returned by communicate(). + output, serr = proc.communicate() + #print stderr without message 'empty database file' + self._info(output) + if serr: + self._warning(serr) + #run command problem + if proc.returncode: + msg = ("analysis could not process the file.") + self._warning(msg) + if os.path.isfile(file_json): + os.remove(file_json) + if os.path.isfile(file_png): + os.remove(file_png) + #run command OK + else: + #add json information and png file + result_extract = None + if os.path.isfile(file_json): + with open(file_json) as data_file: + try: + result_extract = json.load(data_file) + except Exception as e: + self._warning("Error to parse json result: "+str(e)) + os.remove(file_json) + if os.path.isfile(file_png): + #add screeshot + fileh = open(file_png, "rb") + fileh.seek(0) + res = add_screenshot(description='Render of analysis embedded files', + tags=None, + method=self.name, + source=obj.source, + reference=None, + analyst=self.current_task.user.username, + screenshot=fileh, + screenshot_ids=None, + oid=obj.id, + tlp=tlp_value, + otype="Sample") + if res.get('message') and res.get('success') == True: + self._warning("res-message: %s id:%s" % (res.get('message'), res.get('id') ) ) + self._add_result('Graph analysis', res.get('id'), {'Message': res.get('message')}) + self._info("id:%s, file: %s" % (res.get('id'), file_png)) + fileh.close() + os.remove(file_png) + if type(result_extract) is dict: + parse_result(self, result_extract, acl_write, None) + if result_extract['TempDirExtract'] and dirtmp in str(result_extract['TempDirExtract']) and os.path.isdir(str(result_extract['TempDirExtract'])): + #remove dir tmp + shutil.rmtree(result_extract['TempDirExtract']) + +def parse_result(self, result_extract, acl_write, md5_parent): + stream_md5 = None + if type(result_extract) is dict: + #make reccursion extract each file embbed + if 'FileMD5' in result_extract and result_extract['FileMD5']: + tmp_dict={} + b_yara=False + b_ioc=False + #extract info + no_info=['ExtractInfo','ContainedObjects', 'Yara', 'PathFile', 'FileMD5', 'RootFileType', 'TempDirExtract'] + for key, value in result_extract.iteritems(): + if not key in no_info: + self._add_result('File: '+result_extract['FileMD5'] + ' - Info', key, {'value': str(value)}) + #extract yara match + if result_extract['Yara']: + for item_v in result_extract['Yara']: + for key, value in item_v.iteritems(): + self._add_result('File: '+result_extract['FileMD5'] + ' - Signatures yara matched', key, value) + b_yara = True + #extract IOC + if result_extract['ExtractInfo']: + for item_v in result_extract['ExtractInfo']: + for key, value in item_v.iteritems(): + self._add_result('File: '+result_extract['FileMD5'] + ' - Extract potential IOC', key, {'value': str(value)}) + b_ioc = True + #add_sample + if 'PathFile' in result_extract and type(result_extract['PathFile']) is list and len(result_extract['PathFile']) > 0: + if os.path.isfile(str(result_extract['PathFile'][0])): + with open(str(result_extract['PathFile'][0]), 'r') as content_file_tmp: + content_tmp = content_file_tmp.read() + stream_md5 = hashlib.md5(content_tmp).hexdigest() + name = str(stream_md5).decode('ascii', errors='ignore') + id_ = Sample.objects(md5=stream_md5).only('id').first() + if id_: + self._info('Add relationship with sample existed:'+str(stream_md5)) + #make relationship + id_.add_relationship(rel_item=self.obj, + rel_type=RelationshipTypes.CONTAINED_WITHIN, + rel_date=datetime.now(), + analyst=self.current_task.user.username) + else: + if acl_write and (self.config['import_file'] or (self.config['import_file_yara'] and b_yara) or (self.config['import_file_ioc'] and b_ioc)): + obj_parent = None + if md5_parent: + obj_parent = Sample.objects(md5=md5_parent).only('id').first() + if not obj_parent: + sample = handle_file(name, content_tmp, self.obj.source, + related_id=str(self.obj.id), + related_type=str(self.obj._meta['crits_type']), + campaign=self.obj.campaign, + source_method=self.name, + relationship=RelationshipTypes.CONTAINED_WITHIN, + user=self.current_task.user) + else: + sample = handle_file(name, content_tmp, obj_parent.source, + related_id=str(obj_parent.id), + related_type=str(obj_parent._meta['crits_type']), + campaign=obj_parent.campaign, + source_method=self.name, + relationship=RelationshipTypes.CONTAINED_WITHIN, + user=self.current_task.user) + self._info('Add sample '+str(stream_md5)) + else: + #add IOC if not add sample + if self.current_task.user.has_access_to(IndicatorACL.WRITE) and b_yara: + res = handle_indicator_ind(stream_md5, + self.obj.source, + IndicatorTypes.MD5, + IndicatorThreatTypes.UNKNOWN, + IndicatorAttackTypes.UNKNOWN, + self.current_task.user, + add_relationship=True, + source_method=self.name, + campaign=self.obj.campaign, + description='Extracted by service '+self.name) + self._info('Add indicator md5:'+str(stream_md5)+' -- id: '+str(res)) + #contains file + if 'ContainedObjects' in result_extract and type(result_extract['ContainedObjects']) is list and result_extract['ContainedObjects']: + for item_v in result_extract['ContainedObjects']: + if item_v['FileMD5'] and item_v['FileType'] and item_v['FileSize']: + #search if file exist + id_ = Sample.objects(md5=str(item_v['FileMD5'])).only('id').first() + sample_exist = False + ioc_exist = False + if id_: + sample_exist = True + id_ = Indicator.objects(value=str(item_v['FileMD5'])).only('id').first() + if id_: + ioc_exist = True + self._add_result('File: '+result_extract['FileMD5'] + ' - Contains md5 files', item_v['FileMD5'], {'type': str(item_v['FileType']), 'size': str(item_v['FileSize']), 'Exists Sample': str(sample_exist), 'Exists IOC md5': str(ioc_exist)}) + for item_v in result_extract['ContainedObjects']: + #re do loop for best display result + parse_result(self, item_v, acl_write, stream_md5) diff --git a/extract_embedded_service/bootstrap b/extract_embedded_service/bootstrap new file mode 100755 index 00000000..45cb1eb9 --- /dev/null +++ b/extract_embedded_service/bootstrap @@ -0,0 +1,147 @@ +#!/bin/sh +# (c) 2017, The MITRE Corporation. All rights reserved. +# Source code distributed pursuant to license agreement. +# +# Usage: bootstrap +# This script is designed to install all of the necessary dependencies for the +# service. + +. ../funcs.sh + +ubuntu_install() +{ + printf "${HEAD}Installing dependencies with apt-get${END}\n" + #sudo apt-add-repository universe + #sudo apt-get update + sudo apt-get install -y --fix-missing clamav + if [ $? -eq 0 ] + then + printf "${PASS}Ubuntu Install Complete${END}\n" + else + printf "${FAIL}Ubuntu Install Failed${END}\n" + fi + sudo ldconfig +} + +debian_install() +{ + printf "${HEAD}Installing dependencies with apt-get${END}\n" + #sudo apt-add-repository universe + #sudo apt-get update + sudo apt-get install -y --fix-missing clamav + if [ $? -eq 0 ] + then + printf "${PASS}Debian Install Complete${END}\n" + else + printf "${FAIL}Debian Install Failed${END}\n" + fi + sudo ldconfig +} + +darwin_install() +{ + command -v brew >/dev/null 2>&1 || { + printf "${HEAD}Installation for OSX requires Homebrew. Please visit http://brew.sh/.${END}\n" + exit + } + brew install clamav + if [ $? -eq 0 ] + then + printf "${PASS}Homebrew Install Complete${END}\n" + else + printf "${FAIL}Homebrew Install Failed${END}\n" + fi +} + +freebsd_install() +{ + #printf "${HEAD}Installing Ports${END}\n" + sudo pkg install clamav + if [ $? -eq 0 ] + then + printf "${PASS}Ports Install Complete${END}\n" + else + printf "${FAIL}Ports Install Failed${END}\n" + fi +} + +red_hat_install() +{ + #printf "${HEAD}Installing Yum Packages${END}\n" + sudo yum install clamav + if [ $? -eq 0 ] + then + printf "${PASS}Yum Install Complete${END}\n" + else + printf "${FAIL}Yum Install Failed${END}\n" + fi +} + +centos_install() +{ + #printf "${HEAD}Installing Yum Packages${END}\n" + sudo yum install clamav + if [ $? -eq 0 ] + then + printf "${PASS}Yum Install Complete${END}\n" + else + printf "${FAIL}Yum Install Failed${END}\n" + fi +} +#=============================================================== +# This is the Beginning of the Script +#=============================================================== +# Sees if there is an argument +if [ -z $1 ]; +then + STEP=1 +else + STEP=$1 +fi + +while [ $STEP -lt 2 ] +do + case $STEP in + 1) + verify + if [ "$OS" = 'ubuntu' ] + then + #printf "${PASS}ubuntu is Supported!${END}\n" + ubuntu_install || exit_restart $STEP + depend_crits ||exit_restart $STEP + elif [ "$OS" = 'debian' ] + then + #printf "${PASS}Debian is Supported!${END}\n" + debian_install || exit_restart $STEP + depend_crits ||exit_restart $STEP + elif [ "$OS" = 'darwin' ] + then + #printf "${PASS}OS X is Supported!${END}\n" + darwin_install || exit_restart $STEP + depend_crits ||exit_restart $STEP + elif [ "$OS" = "centos" ] + then + #printf "${PASS}CentOS is Supported!${END}\n" + centos_install || exit_restart $STEP + depend_crits ||exit_restart $STEP + elif [ "$OS" = "red hat" ] + then + #printf "${PASS}Red Hat is Supported!${END}\n" + red_hat_install || exit_restart $STEP + depend_crits ||exit_restart $STEP + elif [ "$OS" = 'freebsd' ] + then + #printf "${PASS}FreeBSD is Supported${END}\n" + freebsd_install || exit_restart $STEP + depend_crits ||exit_restart $STEP + else + printf "${FAIL}OS: $OS, need Ubuntu, Debian, Darwin (OS X), CentOS, Red Hat, or FreeBSD${END}\n" + exit + fi + ;; + *) + exit + ;; + esac + STEP=$((STEP+1)) +done diff --git a/extract_embedded_service/forms.py b/extract_embedded_service/forms.py new file mode 100644 index 00000000..5871d4cb --- /dev/null +++ b/extract_embedded_service/forms.py @@ -0,0 +1,62 @@ +import os +from django import forms + +class ExtractEmbeddedConfigForm(forms.Form): + error_css_class = 'error' + required_css_class = 'required' + clamscan_path = forms.CharField(required=True, + label="clamscan binary", + initial=os.path.dirname(os.path.realpath(__file__))+'/static_analysis/clamav-devel/clamscan/clamscan', + widget=forms.TextInput(), + help_text="Full path to clamscan binary.") + analysis_path = forms.CharField(required=True, + label="Analysis static tool", + initial=os.path.dirname(os.path.realpath(__file__))+'/static_analysis/analysis.py', + widget=forms.TextInput(), + help_text="Full path to analysis static tool. (https://github.com/lprat/static_analysis)") + yararules_path = forms.CharField(required=True, + label="Yara rules", + initial=os.path.dirname(os.path.realpath(__file__))+'/static_analysis/yara_rules/', + widget=forms.TextInput(), + help_text="Yara rules path for analysis static tool.") + pattern_path = forms.CharField(required=True, + label="Pattern DB", + initial=os.path.dirname(os.path.realpath(__file__))+'/static_analysis/pattern.db', + widget=forms.TextInput(), + help_text="Pattern DB path for extract data") + coef_path = forms.CharField(required=True, + label="Coef configuration", + initial=os.path.dirname(os.path.realpath(__file__))+'/static_analysis/coef.conf', + widget=forms.TextInput(), + help_text="Coef configuration path for globale score") + tlp_value = forms.CharField(required=True, + label="Tlp value", + initial='red', + widget=forms.TextInput(), + help_text="Indicate TLP value.") + def __init__(self, *args, **kwargs): + kwargs.setdefault('label_suffix', ':') + super(ExtractEmbeddedConfigForm, self).__init__(*args, **kwargs) + +class ExtractEmbeddedRunForm(forms.Form): + error_css_class = 'error' + required_css_class = 'required' + import_file_ioc = forms.BooleanField(required=False, + initial=False, + label="Import", + help_text="Import extracted file contains IOC informations in CRITS as sample.") + import_file_yara = forms.BooleanField(required=False, + initial=True, + label="Import", + help_text="Import extracted file matched by yara rules in CRITS as sample.") + import_file = forms.BooleanField(required=False, + initial=False, + label="Import", + help_text="Import ALL extracted file in CRITS as sample.") + debug_log = forms.BooleanField(required=False, + initial=False, + label="Debug", + help_text="Insert debug info in log.") + def __init__(self, *args, **kwargs): + kwargs.setdefault('label_suffix', ':') + super(ExtractEmbeddedRunForm, self).__init__(*args, **kwargs)