Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UmbrellaDish patch importable fido 1 #149

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 83 additions & 72 deletions fido/fido.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,37 +743,16 @@ def list_files(roots, recurse=False):
break


def main(args=None):
if not args:
args = sys.argv[1:]

parser = ArgumentParser(description=defaults['description'], epilog=defaults['epilog'], fromfile_prefix_chars='@', formatter_class=RawTextHelpFormatter)
parser.add_argument('-v', default=False, action='store_true', help='show version information')
parser.add_argument('-q', default=False, action='store_true', help='run (more) quietly')
parser.add_argument('-recurse', default=False, action='store_true', help='recurse into subdirectories')
parser.add_argument('-zip', default=False, action='store_true', help='recurse into zip and tar files')
parser.add_argument('-noextension', default=False, action='store_true', help='disable extension matching, reduces number of matches but may reduce false positives')
parser.add_argument('-nocontainer', default=False, action='store_true', help='disable deep scan of container documents, increases speed but may reduce accuracy with big files')
parser.add_argument('-pronom_only', default=False, action='store_true', help='disables loading of format extensions file, only PRONOM signatures are loaded, may reduce accuracy of results')

group = parser.add_mutually_exclusive_group()
group.add_argument('-input', default=False, help='file containing a list of files to check, one per line. - means stdin')
group.add_argument('files', nargs='*', default=[], metavar='FILE', help='files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.')

parser.add_argument('-filename', default=None, help='filename if file contents passed through STDIN')
parser.add_argument('-useformats', metavar='INCLUDEPUIDS', default=None, help='comma separated string of formats to use in identification')
parser.add_argument('-nouseformats', metavar='EXCLUDEPUIDS', default=None, help='comma separated string of formats not to use in identification')
parser.add_argument('-matchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use on match. See nomatchprintf, README.txt.')
parser.add_argument('-nomatchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use if no match. See README.txt')
parser.add_argument('-bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default=' + str(defaults['bufsize']) + ' bytes)')
parser.add_argument('-container_bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default=' + str(defaults['container_bufsize']) + ' bytes)')
parser.add_argument('-loadformats', default=None, metavar='XML1,...,XMLn', help='comma separated string of XML format files to add.')
parser.add_argument('-confdir', default=CONFIG_DIR, help='configuration directory to load_fido_xml, for example, the format specifications from.')

if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args(args)
def main(
version=False, quiet=False,
recurse=False, recurse_compressed_archives=False,
noextension=False, nocontainer=False, pronom_only=False,
check_list=None, files=None, filename=None,
useformats=None, nouseformats=None,
matchprintf=None, nomatchprintf=None,
bufsize=None, container_bufsize=None,
loadformats=None, confdir=None, handle_matches=None
):

t0 = time.clock()

Expand All @@ -784,83 +763,115 @@ def main(args=None):
defaults['xml_fidoExtensionSignature'] = versions.fido_extension_signature
defaults['format_files'] = [defaults['xml_pronomSignature']]

if args.pronom_only:
if pronom_only:
versionHeader = "FIDO v{0} ({1}, {2})\n".format(__version__, defaults['xml_pronomSignature'], defaults['containersignature_file'])
else:
versionHeader = "FIDO v{0} ({1}, {2}, {3})\n".format(__version__, defaults['xml_pronomSignature'], defaults['containersignature_file'], defaults['xml_fidoExtensionSignature'])
defaults['format_files'].append(defaults['xml_fidoExtensionSignature'])

if args.v:
sys.stdout.write(versionHeader)
sys.exit(0)

if args.matchprintf:
if matchprintf:
try:
args.matchprintf = args.matchprintf.decode('string_escape')
matchprintf = matchprintf.decode('string_escape')
except AttributeError:
args.matchprintf = args.matchprintf.replace(r"\n", "\n")
args.matchprintf = args.matchprintf.replace(r"\t", "\t")
if args.nomatchprintf:
matchprintf = matchprintf.replace(r"\n", "\n")
matchprintf = matchprintf.replace(r"\t", "\t")

if nomatchprintf:
try:
args.nomatchprintf = args.nomatchprintf.decode('string_escape')
nomatchprintf = nomatchprintf.decode('string_escape')
except AttributeError:
args.matchprintf = args.matchprintf.replace(r"\n", "\n")
args.matchprintf = args.matchprintf.replace(r"\t", "\t")
matchprintf = matchprintf.replace(r"\n", "\n")
matchprintf = matchprintf.replace(r"\t", "\t")

fido = Fido(
quiet=args.q,
bufsize=args.bufsize,
container_bufsize=args.container_bufsize,
printmatch=args.matchprintf,
printnomatch=args.nomatchprintf,
zip=args.zip,
nocontainer=args.nocontainer,
conf_dir=args.confdir)
quiet=quiet,
bufsize=bufsize,
container_bufsize=container_bufsize,
printmatch=matchprintf,
printnomatch=nomatchprintf,
zip=recurse_compressed_archives,
nocontainer=nocontainer,
conf_dir=confdir,
handle_matches=handle_matches
)

# TODO: Allow conf options to be dis-included
if args.loadformats:
for file in args.loadformats.split(','):
if loadformats:
for file in loadformats.split(','):
fido.load_fido_xml(file)

# TODO: remove from maps
if args.useformats:
args.useformats = args.useformats.split(',')
fido.formats = [f for f in fido.formats if f.find('puid').text in args.useformats]
elif args.nouseformats:
args.nouseformats = args.nouseformats.split(',')
fido.formats = [f for f in fido.formats if f.find('puid').text not in args.nouseformats]
if useformats:
useformats = useformats.split(',')
fido.formats = [f for f in fido.formats if f.find('puid').text in useformats]
elif nouseformats:
nouseformats = nouseformats.split(',')
fido.formats = [f for f in fido.formats if f.find('puid').text not in nouseformats]

# Set up to use stdin, or open input files:
if args.input == '-':
args.files = sys.stdin
elif args.input:
args.files = open(args.input, 'r')
if check_list == '-':
files = sys.stdin
elif check_list:
files = open(check_list, 'r')

# RUN
try:
if not args.q:
if not quiet:
sys.stderr.write(versionHeader)
sys.stderr.flush()
if (not args.input) and len(args.files) == 1 and args.files[0] == '-':
if (not check_list) and len(files) == 1 and files[0] == '-':
if fido.zip:
raise RuntimeError("Multiple content read from stdin not yet supported.")
sys.exit(1)
fido.identify_multi_object_stream(sys.stdin, extension=not args.noextension)
fido.identify_multi_object_stream(sys.stdin, extension=not noextension)
else:
fido.identify_stream(sys.stdin, args.filename, extension=not args.noextension)
fido.identify_stream(sys.stdin, filename, extension=not noextension)
else:
for file in list_files(args.files, args.recurse):
fido.identify_file(file, extension=not args.noextension)
for file in list_files(files, recurse):
fido.identify_file(file, extension=not noextension)
except KeyboardInterrupt:
msg = "FIDO: Interrupt while identifying file {0}"
sys.stderr.write(msg.format(fido.current_file))
sys.exit(1)

if not args.q:
if not quiet:
sys.stdout.flush()
fido.print_summary(time.clock() - t0)
sys.stderr.flush()


if __name__ == '__main__':
main()
# run as a command line tool instead of as a module
parser = ArgumentParser(description=defaults['description'], epilog=defaults['epilog'], fromfile_prefix_chars='@', formatter_class=RawTextHelpFormatter)
parser.add_argument('-v', default=False, action='store_true', dest='version', help='show version information')
parser.add_argument('-q', default=False, action='store_true', dest='quiet', help='run (more) quietly')
parser.add_argument('-recurse', default=False, action='store_true', help='recurse into subdirectories')
parser.add_argument('-zip', default=False, action='store_true', dest='recurse_compressed_archives', help='recurse into zip and tar files')
parser.add_argument('-noextension', default=False, action='store_true', help='disable extension matching, reduces number of matches but may reduce false positives')
parser.add_argument('-nocontainer', default=False, action='store_true', help='disable deep scan of container documents, increases speed but may reduce accuracy with big files')
parser.add_argument('-pronom_only', default=False, action='store_true', help='disables loading of format extensions file, only PRONOM signatures are loaded, may reduce accuracy of results')

group = parser.add_mutually_exclusive_group()
group.add_argument('-input', default=False, dest='check_list', help='file containing a list of files to check, one per line. - means stdin')
group.add_argument('files', nargs='*', default=[], metavar='FILE', help='files to check. If the file is -, then read content from stdin. In this case, python must be invoked with -u or it may convert the line terminators.')

parser.add_argument('-filename', default=None, help='filename if file contents passed through STDIN')
parser.add_argument('-useformats', metavar='INCLUDEPUIDS', default=None, help='comma separated string of formats to use in identification')
parser.add_argument('-nouseformats', metavar='EXCLUDEPUIDS', default=None, help='comma separated string of formats not to use in identification')
parser.add_argument('-matchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use on match. See nomatchprintf, README.txt.')
parser.add_argument('-nomatchprintf', metavar='FORMATSTRING', default=None, help='format string (Python style) to use if no match. See README.txt')
parser.add_argument('-bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default=' + str(defaults['bufsize']) + ' bytes)')
parser.add_argument('-container_bufsize', type=int, default=None, help='size (in bytes) of the buffer to match against (default=' + str(defaults['container_bufsize']) + ' bytes)')
parser.add_argument('-loadformats', default=None, metavar='XML1,...,XMLn', help='comma separated string of XML format files to add.')
parser.add_argument('-confdir', default=CONFIG_DIR, help='configuration directory to load_fido_xml, for example, the format specifications from.')

if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)

args = parser.parse_args()

if args.version:
sys.stdout.write(versionHeader)
else:
main(**vars(args))