diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..12c33d2 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +patreon: dfirscience diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ad41b38 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +ENSRL diff --git a/CHANGELOG b/CHANGELOG new file mode 100644 index 0000000..2ec3478 --- /dev/null +++ b/CHANGELOG @@ -0,0 +1,15 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.1.0] - 2022-02-06 +### Added +- This CHANGELOG based on the standard from https://keepachangelog.com/en/1.0.0/ +- README about the project description and goals +- LICENSE - MIT +- ensrl.py inital structure and plan +- New ENSRL set, compressed \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..75b6832 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 DFIRScience + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e2ad4ae --- /dev/null +++ b/README.md @@ -0,0 +1,45 @@ +## ENSRL + +A script to filter and split the National Software Reference Library (NSRL) RDS. + +## Why?? + +Known-good hash sets are extremely useful for digital investigators to filter known files in an investigation. However, more hashes mean more CPU cycles to process case data. As such, we have to be as efficient as possible with the hash sets we use. + +### Filters + +All NSRL entries are categorized as 362 - TBD. Which means we can't meaningfully categorize by OS. Product may be possible, but filtering NSRLProd.txt won't be very accurate. + +ENSRL also filters out file names starting with "__" and "." a these seem to mostly be executable segmets and not whole files. + +Split filters are built based on the NSRLOS.txt, and searching for the OS. For example "Windows." This mostly gives what we want with few false positives. + +The UNIX/Linux entries get a bit more tricky, as expected. Our current filter looks like: + +```shell +cat NSRLOS.txt | egrep -vi "(windows|android|ios|mac|msdos|ms dos|amstrad|netware|nextstep|aix|compaq|dos|dr dos|amiga|os x|at&t|apple)" +``` + +The first keywords are used for the OS filters, while things like msdos are simply removed. If you're analyzing an system running DOS... I'm so, so sorry. + + +## Install and Run + +Make sure you have [Python 3](https://www.python.org/) installed. Download the repository. Download the [NSRL](https://www.nist.gov/itl/ssd/software-quality-group/national-software-reference-library-nsrl/nsrl-download/current-rds). Tested on Modern RDS (minimal) v2.75. Make sure NSRLOS.txt is in the same directory as NSRLFile.txt. +From a command prompt run: + +```bash +pip install -r requirements.txt +python ensrl.py NSRLFile.txt +``` + +This will filter entries that are likely partial executable matches, java classes, etc. And will split the NSRL set by Operating System: Windows, Linux, MacOS, Android, iOS. The idea is that you choose +the database that is specific to your investigation at the time. + +## Bug reports and suggestions + +Pull requests considered! Otherwise create an issue or message me on [Twitter](https://twitter.com/dfirscience) if you find any bugs or have some recommendations. + +## Thank you + +Thanks to [Hexacorn]](https://www.hexacorn.com/blog/2022/02/04/analysing-nsrl-data-set-for-fun-and-because-curious/) for the excellent blog post that prompted this research. diff --git a/ensrl.py b/ensrl.py new file mode 100755 index 0000000..78c7d79 --- /dev/null +++ b/ensrl.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- + +""" +Filter and split the NSRL RDS into a more efficent collection for DFIR. +MIT License. +""" + +# Imports +import argparse +import logging +import re, os +import signal +import platform + +from alive_progress import alive_it +from pathlib import Path +from zipfile import ZipFile # TODO accept the NSRL zip file + +__software__ = 'ENSRL' +__author__ = 'Joshua James' +__copyright__ = 'Copyright 2022, ENSRL' +__credits__ = [] +__license__ = 'MIT' +__version__ = '0.1.0' +__maintainer__ = 'Joshua James' +__email__ = 'joshua+github@dfirscience.org' +__status__ = 'active' + +# Last updated 2022-02-06 +WINDOWSCODES = ["1000", "1001", "1010", "1011", "1016", "1018", "1019", "1027", "1028", "1029", "1030", "1031", "1049", "1050", "1051", "1052", "1053", "1054", "1055", "1056", "1057", "1058", "1059", "1060", "1072", "1073", "1074", "1075", "1076", "1077", "1078", "1079", "1112", "1138", "1139", "1140", "1141", "1142", "1143", "1144", "1147", "1148", "1154", "1155", "1156", "1157", "1158", "1159", "1176", "1177", "1178", "1192", "1194", "1195", "1196", "1213", "1240", "1252", "1280", "1293", "1295", "1317", "1318", "1319", "1320", "1357", "1358", "1359", "1365", "1366", "186", "189", "190", "191", "192", "193", "194", "195", "196", "197", "198", "199", "200", "201", "202", "203", "204", "205", "206", "207", "208", "209", "210", "211", "212", "213", "214", "215", "216", "217", "218", "219", "220", "221", "222", "223", "224", "225", "226", "227", "228", "229", "230", "231", "232", "233", "237", "238", "239", "240", "241", "242", "243", "249", "250", "251", "252", "253", "254", "255", "258", "263", "264", "265", "267", "268", "269", "270", "271", "272", "273", "274", "275", "280", "281", "282", "283", "284", "285", "286", "298", "300", "302", "303", "304", "305", "306", "307", "308", "310", "311", "325", "326", "327", "328", "330", "331", "333", "338", "339", "341", "342", "343", "344", "345", "346", "347", "348", "349", "350", "351", "354", "355", "356", "357", "358", "359", "363", "364", "365", "366", "371", "372", "373", "374", "375", "376", "377", "378", "379", "380", "385", "386", "387", "389", "390", "391", "392", "393", "394", "395", "400", "401", "402", "403", "404", "405", "406", "407", "408", "409", "411", "412", "413", "414", "415", "416", "417", "418", "419", "420", "421", "422", "423", "424", "425", "426", "427", "448", "453", "454", "455", "456", "457", "461", "462", "464", "465", "466", "467", "469", "480", "481", "482", "483", "485", "486", "487", "488", "489", "490", "491", "492", "493", "494", "496", "497", "498", "500", "510", "513", "514", "518", "522", "523", "530", "531", "535", "539", "540", "543", "544", "549", "550", "551", "552", "553", "558", "559", "560", "561", "564", "569", "570", "575", "576", "577", "578", "579", "580", "581", "582", "583", "584", "585", "590", "591", "592", "594", "595", "596", "597", "602", "608", "609", "610", "612", "613", "614", "615", "616", "617", "618", "619", "620", "621", "629", "630", "638", "639", "640", "641", "642", "643", "644", "645", "648", "649", "650", "656", "658", "660", "661", "663", "664", "666", "668", "669", "670", "672", "683", "684", "688", "694", "695", "696", "697", "698", "699", "700", "702", "707", "714", "716", "718", "720", "724", "725", "726", "727", "728", "729", "731", "732", "737", "738", "739", "748", "758", "759", "760", "761", "762", "763", "764", "765", "768", "772", "783", "787", "788", "789", "790", "795", "796", "798", "801", "802", "803", "805", "810", "811", "812", "824", "857", "860", "861", "862", "867", "868", "869", "870", "871", "872", "873", "874", "875", "876", "877", "878", "879", "880", "884", "885", "892", "893", "894", "895", "896", "897", "898", "900", "901", "902", "903", "904", "905", "906", "919", "921", "922", "983", "986", "987", "988", "989", "991", "992", "993", "994", "995"] +MACCODES = ["100", "450", "1002", "101", "102", "1020", "1021", "1022", "1026", "103", "1032", "1033", "1038", "1039", "104", "105", "106", "107", "1091", "1132", "1161", "1162", "1163", "1185", "1187", "1188", "1219", "127", "1312", "1321", "1356", "236", "244", "245", "246", "247", "256", "257", "259", "260", "261", "262", "277", "278", "279", "292", "296", "309", "322", "323", "324", "332", "335", "337", "340", "352", "353", "360", "368", "369", "384", "399", "410", "428", "429", "431", "432", "434", "435", "436", "437", "438", "439", "441", "443", "447", "471", "474", "475", "476", "484", "499", "502", "503", "504", "528", "529", "532", "534", "536", "537", "538", "546", "547", "548", "554", "562", "563", "565", "571", "572", "573", "574", "589", "593", "600", "601", "631", "632", "647", "665", "667", "671", "685", "686", "687", "689", "690", "691", "692", "693", "701", "706", "73", "74", "75", "750", "757", "76", "766", "77", "771", "774", "775", "779", "78", "785", "79", "80", "804", "808", "809", "81", "813", "82", "822", "823", "825", "827", "83", "84", "85", "86", "87", "88", "882", "886", "89", "90", "907", "91", "92", "920", "93", "94", "95", "96", "97", "974", "975", "976", "98", "984", "99", "996"] +LINUXCODES = ["1003", "1004", "1005", "1006", "1007", "1008", "1009", "1012", "1013", "1015", "1024", "1025", "1034", "1035", "1036", "1037", "1040", "1041", "1042", "1043", "1044", "1045", "1046", "1047", "1048", "1063", "1064", "1065", "1066", "1067", "1068", "1069", "1070", "1071", "1080", "1081", "1082", "1083", "1084", "1085", "1086", "1087", "1088", "1089", "1090", "1092", "1093", "1094", "1095", "1096", "1097", "1098", "1099", "110", "1100", "1101", "1102", "1103", "1104", "1105", "1106", "1107", "1108", "1109", "111", "1110", "1111", "1113", "1114", "1115", "1116", "1117", "1118", "1119", "112", "1120", "1121", "1122", "1123", "1124", "1125", "1126", "1127", "1128", "1129", "1130", "1131", "1133", "1134", "1135", "1136", "1137", "1146", "1149", "1150", "1151", "1152", "1153", "1164", "1165", "1166", "1167", "1168", "1169", "117", "1170", "1171", "1172", "1173", "1174", "1175", "118", "1181", "1182", "1183", "1184", "1186", "1189", "119", "1190", "120", "1207", "1208", "1209", "121", "1210", "1212", "1214", "1215", "1216", "1217", "1218", "122", "1220", "1221", "1222", "1223", "1224", "1225", "1226", "1227", "1228", "1229", "123", "1230", "1231", "1232", "1233", "1234", "1235", "1236", "1237", "1238", "1239", "124", "1241", "1242", "1243", "1244", "1245", "1246", "1247", "1248", "1249", "125", "1250", "1251", "1253", "1254", "1255", "1256", "1257", "1258", "1259", "126", "1260", "1261", "1262", "1263", "1264", "1265", "1266", "1267", "1268", "1269", "1270", "1271", "1272", "1273", "1274", "1275", "1276", "1277", "1278", "1279", "128", "1281", "1283", "1284", "1285", "1286", "1287", "1288", "1289", "129", "1290", "1291", "1292", "1294", "1296", "1297", "1298", "1299", "130", "1300", "1301", "1302", "1303", "1304", "1305", "1306", "1307", "1308", "1309", "131", "1310", "1313", "1314", "1315", "1316", "132", "1323", "1324", "133", "1330", "1335", "1336", "1337", "1338", "1339", "134", "1340", "135", "1353", "1354", "1355", "136", "1360", "1361", "1362", "1363", "1364", "1367", "1368", "137", "138", "139", "140", "141", "142", "143", "144", "145", "146", "147", "148", "149", "15", "150", "151", "152", "153", "154", "155", "156", "157", "158", "159", "16", "160", "161", "162", "163", "164", "165", "166", "167", "168", "169", "17", "170", "171", "172", "173", "174", "175", "176", "177", "178", "18", "180", "181", "182", "183", "184", "185", "187", "188", "235", "26", "266", "27", "276", "28", "288", "289", "29", "290", "291", "294", "295", "297", "299", "30", "301", "31", "313", "314", "315", "316", "317", "318", "319", "32", "329", "33", "334", "336", "34", "35", "36", "367", "370", "38", "381", "382", "383", "388", "39", "396", "397", "398", "40", "41", "42", "43", "430", "433", "44", "440", "442", "444", "445", "446", "449", "45", "459", "46", "460", "463", "47", "470", "472", "473", "477", "478", "479", "48", "49", "495", "50", "501", "505", "506", "507", "508", "509", "51", "511", "515", "516", "517", "519", "52", "520", "521", "525", "526", "527", "53", "533", "54", "541", "545", "555", "556", "557", "566", "567", "568", "586", "587", "588", "598", "599", "6", "603", "604", "605", "606", "607", "611", "633", "634", "635", "636", "637", "651", "652", "657", "659", "662", "673", "674", "675", "676", "677", "678", "679", "680", "681", "682", "7", "703", "704", "705", "708", "709", "711", "712", "713", "715", "717", "719", "721", "722", "723", "730", "733", "734", "735", "736", "740", "741", "742", "743", "744", "745", "746", "747", "749", "751", "752", "753", "754", "755", "756", "767", "770", "773", "776", "777", "778", "780", "781", "782", "786", "791", "792", "793", "797", "800", "806", "807", "814", "815", "816", "818", "819", "820", "826", "828", "829", "830", "831", "832", "833", "834", "835", "836", "837", "838", "839", "840", "841", "842", "843", "844", "845", "846", "847", "848", "849", "850", "851", "854", "855", "858", "859", "863", "864", "865", "866", "889", "890", "891", "899", "908", "909", "910", "911", "912", "913", "914", "915", "916", "917", "918", "923", "924", "925", "926", "927", "928", "929", "930", "931", "932", "933", "934", "935", "936", "937", "938", "939", "940", "941", "942", "943", "944", "945", "946", "947", "948", "949", "950", "951", "952", "953", "954", "955", "956", "957", "958", "959", "960", "961", "962", "963", "964", "965", "966", "967", "968", "969", "970", "971", "972", "973", "977", "978", "979", "980", "981", "982", "985", "990", "997", "998", "999"] +ANDROIDCODES = ["1160", "1179", "1180", "1191", "1193", "1197", "1198", "1199", "1200", "1201", "1202", "1203", "1204", "1205", "1206", "1211", "1311", "1322", "1325", "1326", "1327", "1328", "1329", "1331", "1332", "1333", "1334", "1341", "1342", "1343", "1344", "1345", "1346", "1347", "1348", "1349", "1350", "799", "817"] +IOSCODES = ["1145", "622", "623", "624", "625", "626", "627", "628", "646", "653", "654", "655", "881", "883"] +OTHERCODES = ["362"] +NSRLHEADER = '"SHA-1","MD5","CRC32","FileName","FileSize","ProductCode","OpSystemCode","SpecialCode"\n' +FNs = ["ENSRL-Windows.txt", "ENSRL-Mac.txt", "ENSRL-Linux.txt", "ENSRL-Android.txt", "ENSRL-iOS.txt", "ENSRL-Other.txt"] + +# Set logging level and format +def setLogging(debug): + fmt = "[%(levelname)s] %(asctime)s %(message)s" + LOGLEVEL = logging.INFO if debug is False else logging.DEBUG + logging.basicConfig(level=LOGLEVEL, format=fmt, datefmt='%Y-%M-%dT%H:%M:%S') + +# Argparser config and argument setup +def setArgs(): + parser = argparse.ArgumentParser(description=__copyright__) + parser.add_argument('nsrl', help="The location of NSRLFile.txt") + parser.add_argument('-o', '--out', required=False, action='store', dest="out", help='Output directory path') + parser.add_argument('--debug', required=False, action='store_true', help='Set the log level to DEBUG') + return(parser.parse_args()) + +# Run on ctrl+c +def exitHandler(sig, frame): + logging.info('Process terminated by user.') + if platform.system == "Windows": os._exit() + else: os.kill(os.getpid(), signal.SIGINT) + +def createENSRL(OUTD): + Path(OUTD).mkdir(parents=True, exist_ok=True) + for f in FNs: + logging.debug(f'Creating file {f} on {OUTD}') + FP = Path(OUTD).joinpath(f) + FP.write_text(NSRLHEADER) + # TODO Try/catch + +def cleanEmpty(OUTD): + for fn in FNs: + FP = Path(OUTD).joinpath(fn) + with FP.open() as f: + i = 0 + for l in f: + i += 1 + if i > 1: break + if i == 1: + logging.debug(f'File {f} is empty. Removing...') + FP.unlink() + +def parseNSRL(NSRL, OUTD): + try: + with open(NSRL) as f: + for l in alive_it(f): + FN = re.sub('\"', '', l.split(',')[3]) + if FN.startswith("__") or FN.startswith("."): continue # File name filter + OSCODE = re.sub('\"', '', l.split(',')[-2]) + FP = "" + if OSCODE in WINDOWSCODES: + FP = Path(OUTD).joinpath("ENSRL-Windows.txt") + elif OSCODE in MACCODES: + FP = Path(OUTD).joinpath("ENSRL-Mac.txt") + elif OSCODE in LINUXCODES: + FP = Path(OUTD).joinpath("ENSRL-Linux.txt") + if OSCODE in ANDROIDCODES: + FP = Path(OUTD).joinpath("ENSRL-Android.txt") + elif OSCODE in IOSCODES: + FP = Path(OUTD).joinpath("ENSRL-iOS.txt") + elif OSCODE in OTHERCODES: + FP = Path(OUTD).joinpath("ENSRL-Other.txt") + if FP != "": # Check if we have a valid category + with FP.open("a") as f: + f.write(l) + except RecursionError as e: + logging.debug(f'Recursion error: {e}') + +def main(): + signal.signal(signal.SIGINT, exitHandler) + args = setArgs() + NSRL = Path(args.nsrl) + OUTD = Path.cwd().joinpath("ENSRL") + setLogging(args.debug) + print(f"{__software__} v{__version__} - Use ctrl+c to exit") + if Path.is_file(NSRL): + logging.debug(f'NSRL set to {args.nsrl}') + if args.out and Path.is_dir(Path(args.out)): + logging.debug(f'Output directory set to {args.out}') + OUTD = args.out + "ENSRL" + logging.info("Creating ENSRL output directory.") + createENSRL(OUTD) + logging.info("Sorting NSRL... this will take a while.") + parseNSRL(NSRL,OUTD) + cleanEmpty(OUTD) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..473fdb3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +alive_progress>=2.2.0 \ No newline at end of file