Skip to content

Commit

Permalink
feat: add JSON testing data
Browse files Browse the repository at this point in the history
- Add README about test data.
- Add script to redact sensitive fields.
- Add JSON testing data collected from many systems, with redaction of
  sensitive fields.

The initial corpus includes:
- NVME drives
- SAS drives - HDD only, no SSD
- SCSI drives - HDD only, no SSD
- SATA drives - SSD & HDD

Signed-off-by: Robin H. Johnson <[email protected]>
  • Loading branch information
robbat2 committed Nov 8, 2023
1 parent 536ca0c commit a1c3448
Show file tree
Hide file tree
Showing 32 changed files with 8,888 additions and 0 deletions.
65 changes: 65 additions & 0 deletions redact_fake_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#! /usr/bin/env python3
# SPDX-License-Identifier: Apache-2.0
# Redact potentially sensitive information in smartctl JSON files
# This script does an in-place modification.
import json
import sys
import copy
import os

def main():
for arg in sys.argv[1:]:
print(arg)
redact_one_file(arg)

def redact_one_file(filename):
data = None
tmpname = filename+".new"
with open(filename, "r") as jsonFile:
data = json.load(jsonFile)

newdata = redact_data(data)

with open(tmpname, "w") as jsonFile:
json.dump(newdata, jsonFile, indent="\t", sort_keys=True)

os.rename(tmpname, filename)

def mutate_nested_dict(d, keys, newvalue, if_present=False):
# if_present=True: only mutate if the full key path exists.
if len(keys) == 1:
if not if_present or keys[0] in d:
d[keys[0]] = newvalue
else:
k = keys[0]
if k in d:
mutate_nested_dict(d[k], keys[1:], newvalue, if_present=if_present)

REDACTED_STRING = 'REDACTED'
REDACTED_TIME_T = 1234567890
REDACTED_ASCTIME = "Fri Feb 13 23:31:30 2009 UTC" # TODO: generate from TIME_T, with UTC
REDACTED_HEX16_STR = '0x1234567890abcdef'
REDACTED_UINT32 = 1234567890

REDACT_FIELDS = [
{'k': ['smartctl','platform_info'], 'v': REDACTED_STRING},
{'k': ['smartctl','build_info'], 'v': REDACTED_STRING},
{'k': ['serial_number'], 'v': REDACTED_STRING},
{'k': ['firmware_version'], 'v': REDACTED_STRING},
{'k': ['local_time', 'time_t'], 'v': REDACTED_TIME_T},
{'k': ['local_time', 'asctime'], 'v': REDACTED_ASCTIME},
{'k': ['logical_unit_id'], 'v': REDACTED_HEX16_STR},
{'k': ['wwn','id'], 'v': REDACTED_UINT32},
# TODO: how to redact /dev/sdX /dev/nvmeN ??
]

def redact_data(data):
newdata = copy.deepcopy(data)
for f in REDACT_FIELDS:
#newval = str(f['v'])+str(f['k']) # for debugging
newval = f['v']
mutate_nested_dict(newdata, f['k'], newval, if_present=True)
return newdata

if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions testdata/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
!*.json
Loading

0 comments on commit a1c3448

Please sign in to comment.