From 8d1260f40363b37254d77db9dae0b8d41f8a001c Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 4 Oct 2024 12:25:15 +0200 Subject: [PATCH 01/10] fileformats - use presentation tool for formats with odp convert output --- fileformats.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fileformats.yml b/fileformats.yml index 80518e1..c7b2d0d 100644 --- a/fileformats.yml +++ b/fileformats.yml @@ -911,7 +911,7 @@ fmt/125: name: Microsoft Powerpoint Presentation 95 action: convert convert: - tool: document + tool: presentation outputs: - odp - pdf @@ -919,7 +919,7 @@ fmt/126: name: Microsoft Powerpoint Presentation 97-2003 action: convert convert: - tool: document + tool: presentation outputs: - odp - pdf @@ -1057,7 +1057,7 @@ fmt/215: name: Microsoft Powerpoint for Windows 2007 onwards action: convert convert: - tool: document + tool: presentation outputs: - odp - pdf @@ -1444,7 +1444,7 @@ fmt/487: name: Macro Enabled Microsoft Powerpoint 2007 Onwards action: convert convert: - tool: document + tool: presentation outputs: - odp - pdf @@ -1722,7 +1722,7 @@ fmt/629: name: Microsoft PowerPoint Show 2007 action: convert convert: - tool: document + tool: presentation outputs: - odp - pdf @@ -1730,7 +1730,7 @@ fmt/631: name: Microsoft PowerPoint Template 2007 action: convert convert: - tool: document + tool: presentation outputs: - odp - pdf From e452413e46fa7a361be4ca95012004e46e4080c4 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 4 Oct 2024 12:25:34 +0200 Subject: [PATCH 02/10] fileformats - use pdfa-3 output for formats with pdf convert tool --- fileformats.yml | 62 ++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/fileformats.yml b/fileformats.yml index c7b2d0d..228afa9 100644 --- a/fileformats.yml +++ b/fileformats.yml @@ -274,49 +274,49 @@ fmt/14: convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/15: name: Acrobat PDF 1.1 - Portable Document Format action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/16: name: Acrobat PDF 1.2 - Portable Document Format action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/17: name: Acrobat PDF 1.3 - Portable Document Format action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/18: name: Acrobat PDF 1.4 - Portable Document Format action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/19: name: Acrobat PDF 1.5 - Portable Document Format action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/20: name: Acrobat PDF 1.6 - Portable Document Format action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/21: name: AutoCAD Drawing 1.0 action: convert @@ -798,7 +798,7 @@ fmt/95: convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/96: name: Hypertext Markup Language action: convert @@ -979,7 +979,7 @@ fmt/146: convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/155: name: Geographic Tagged Image File Format (GeoTIFF) action: convert @@ -993,14 +993,14 @@ fmt/157: convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/158: name: Acrobat PDF/X - Portable Document Format - Exchange 3:2002 action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/163: name: Microsoft Works Word Processor 1-3 for DOS and 2 for Windows action: convert @@ -1096,7 +1096,7 @@ fmt/276: convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/277: name: ESRI Arc/View Shapefile Index action: ignore @@ -1222,7 +1222,7 @@ fmt/354: convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/355: name: Rich Text Format 1.9 action: convert @@ -1295,7 +1295,7 @@ fmt/422: convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/428: name: CorelDraw Drawing action: convert @@ -1405,35 +1405,35 @@ fmt/476: convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/477: name: Acrobat PDF/A - Portable Document Format action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/478: name: Acrobat PDF/A - Portable Document Format (2u) action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/479: name: Acrobat PDF/A - Portable Document Format (3a) action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/480: name: Acrobat PDF/A - Portable Document Format (3b) action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/484: name: 7Zip format action: extract @@ -1454,14 +1454,14 @@ fmt/488: convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/493: name: Acrobat PDF/E - Portable Document Format for Engineering PDF/E-1 action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/494: name: Microsoft Office Encrypted Document (2007 Onwards) action: ignore @@ -1586,56 +1586,56 @@ fmt/557: convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/558: name: Adobe Illustrator 9.0 action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/559: name: Adobe Illustrator 10.0 action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/560: name: Adobe Illustrator 11.0 action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/561: name: Adobe Illustrator 12.0 action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/562: name: Adobe Illustrator 13.0 action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/563: name: Adobe Illustrator 14.0 action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/564: name: Adobe Illustrator 15.0 action: convert convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/573: name: WebM video action: convert @@ -1956,7 +1956,7 @@ fmt/1129: convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/1132: name: Netscape Bookmark File Format action: ignore @@ -2059,7 +2059,7 @@ fmt/1451: convert: tool: pdf outputs: - - pdf + - pdfa-3 fmt/1452: name: Lotus 1-2-3 Worksheet (97) action: convert From cfc93b6f32e7961c4f06fd4baffc8fa04f99c2c2 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 4 Oct 2024 12:31:33 +0200 Subject: [PATCH 03/10] fileformats - use spreadsheet tool for formats with ods convert output, use convert copy tool for ODS and ODT --- fileformats.yml | 63 ++++++++++++++++++++----------------------------- 1 file changed, 26 insertions(+), 37 deletions(-) diff --git a/fileformats.yml b/fileformats.yml index 228afa9..fc0f1cb 100644 --- a/fileformats.yml +++ b/fileformats.yml @@ -541,7 +541,7 @@ fmt/55: name: Microsoft Excel 2.x Worksheet (xls) action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -549,7 +549,7 @@ fmt/56: name: Microsoft Excel 3.0 Worksheet (xls) action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -557,7 +557,7 @@ fmt/57: name: Microsoft Excel 4.0 Worksheet (xls) 4S action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -565,7 +565,7 @@ fmt/58: name: Microsoft Excel 4.0 Workbook (xls) 4W action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -573,7 +573,7 @@ fmt/59: name: Microsoft Excel 5.0/95 Workbook (xls) action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -581,7 +581,7 @@ fmt/61: name: Microsoft Excel 97 Workbook (xls) 8 action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -948,17 +948,12 @@ fmt/136: name: OpenDocument Text 1.0 action: convert convert: - tool: document - outputs: - - pdf + tool: copy fmt/137: name: OpenDocument Spreadsheet 1.0 action: convert convert: - tool: document - outputs: - - ods - - pdf + tool: copy fmt/141: name: Waveform Audio (PCMWAVEFORMAT) action: convert @@ -1013,7 +1008,7 @@ fmt/189: action: convert description: Microsoft developed office Open XML (OOXML) in 2007. It is an XML based format representing a departure from previous Microsoft Office suites, which were binary file based. Starting with Microsoft Office 2007, the Office Open XML file formats have become the default file format of Microsoft Office. convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -1049,7 +1044,7 @@ fmt/214: name: Microsoft Excel for Windows 2007 onwards action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -1137,18 +1132,12 @@ fmt/294: name: OpenDocument Spreadsheet 1.1 action: convert convert: - tool: document - outputs: - - ods - - pdf + tool: copy fmt/295: name: OpenDocument Spreadsheet 1.2 action: convert convert: - tool: document - outputs: - - ods - - pdf + tool: copy fmt/296: name: OpenDocument Graphics action: convert @@ -1362,7 +1351,7 @@ fmt/445: name: Microsoft Excel Macro-Enabled 2007 action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -1666,7 +1655,7 @@ fmt/595: name: Microsoft Excel Non-XML Binary Workbook 2007 onwards action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -1681,7 +1670,7 @@ fmt/598: name: Microsoft Excel Template 2007 onwards action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -1866,7 +1855,7 @@ fmt/901: name: Microsoft Works Spreadsheet 6-9 action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -2064,7 +2053,7 @@ fmt/1452: name: Lotus 1-2-3 Worksheet (97) action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -2072,7 +2061,7 @@ fmt/1453: name: Lotus 1-2-3 Worksheet (9.8 Millennium) action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -2217,7 +2206,7 @@ x-fmt/9: name: dBASE Database III action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -2225,7 +2214,7 @@ x-fmt/10: name: dBASE Database IV action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -2241,7 +2230,7 @@ x-fmt/17: name: Microsoft Excel Template 97-2003 action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -2350,15 +2339,15 @@ x-fmt/114: name: Lotus 1-2-3 Worksheet (2.0) action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf x-fmt/115: - name: ' Lotus 1-2-3 Worksheet (3.0)' + name: Lotus 1-2-3 Worksheet (3.0) action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -2366,7 +2355,7 @@ x-fmt/116: name: Lotus 1-2-3 Worksheet (4-5) action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf @@ -2374,7 +2363,7 @@ x-fmt/117: name: Lotus 1-2-3 Worksheet (1.0) action: convert convert: - tool: document + tool: spreadsheet outputs: - ods - pdf From 34d00eb25d0efbeeaf6295d8c73c8127622f3e8b Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 4 Oct 2024 13:01:34 +0200 Subject: [PATCH 04/10] fileformats - format --- fileformats.yml | 769 +++++++++++++++++++++++++----------------------- 1 file changed, 398 insertions(+), 371 deletions(-) diff --git a/fileformats.yml b/fileformats.yml index fc0f1cb..9ea256d 100644 --- a/fileformats.yml +++ b/fileformats.yml @@ -31,7 +31,7 @@ aca-fmt/1: convert: tool: symphovert outputs: - - ods + - ods aca-fmt/2: name: Microsoft Word Markup action: manual @@ -45,8 +45,8 @@ aca-fmt/3: convert: tool: document outputs: - - odt - - pdf + - odt + - pdf aca-fmt/4: name: MindManager Mind Map action: ignore @@ -88,77 +88,84 @@ aca-fmt/9: convert: tool: tnef outputs: - - html + - html aca-fmt/10: name: MS Access 95 action: manual manual: reason: Unable to convert automatically. - process: Open with Microsoft Access. For each table in the database, save the table as .xlsx. Reidentify with digiarch. + process: Open with Microsoft Access. For each table in the database, save the + table as .xlsx. Reidentify with digiarch. aca-fmt/11: name: MS Access 97 action: manual manual: reason: Unable to convert automatically. - process: Open with Microsoft Access. For each table in the database, save the table as .xlsx. Reidentify with digiarch. + process: Open with Microsoft Access. For each table in the database, save the + table as .xlsx. Reidentify with digiarch. aca-fmt/12: name: MS Access 2000 action: manual manual: reason: Unable to convert automatically. - process: Open with Microsoft Access. For each table in the database, save the table as .xlsx. Reidentify with digiarch. + process: Open with Microsoft Access. For each table in the database, save the + table as .xlsx. Reidentify with digiarch. aca-fmt/13: name: MS Access 2002/3 action: manual manual: reason: Unable to convert automatically. - process: Open with Microsoft Access. For each table in the database, save the table as .xlsx. Reidentify with digiarch. + process: Open with Microsoft Access. For each table in the database, save the + table as .xlsx. Reidentify with digiarch. aca-fmt/14: name: MS Access database unspecified version (Jet 3 DB) action: manual manual: reason: Unable to convert automatically. - process: Open with Microsoft Access. For each table in the database, save the table as .xlsx. Reidentify with digiarch. + process: Open with Microsoft Access. For each table in the database, save the + table as .xlsx. Reidentify with digiarch. aca-fmt/15: name: MS Access database unspecified version (Jet 4 DB) action: manual manual: reason: Unable to convert automatically. - process: Open with Microsoft Access. For each table in the database, save the table as .xlsx. Reidentify with digiarch. + process: Open with Microsoft Access. For each table in the database, save the + table as .xlsx. Reidentify with digiarch. aca-fmt/16: name: MS Access database unspecified version (ACE DB) action: manual manual: reason: Unable to convert automatically. - process: Open with Microsoft Access. For each table in the database, save the table as .xlsx. Reidentify with digiarch. + process: Open with Microsoft Access. For each table in the database, save the + table as .xlsx. Reidentify with digiarch. aca-fmt/18: name: OpenDocument Text (unspecified version) action: convert convert: tool: document outputs: - - pdf + - pdf aca-fmt/20: name: MapInfo TAB file action: convert convert: tool: gis outputs: - - tab + - tab aca-fmt/21: name: OpenDocument Spreadsheet (unspecified version) action: convert convert: tool: document outputs: - - pdf + - pdf aca-fmt/22: name: Windows Compressed Enhanced Metafile action: convert convert: tool: document outputs: - - jpg + - jpg aca-fmt/23: name: Microsoft Word XML Document description: A Microsoft Word document saved as a standalone XML file @@ -166,7 +173,7 @@ aca-fmt/23: convert: tool: document outputs: - - odt + - odt aca-fmt/24: name: vCard action: ignore @@ -190,8 +197,12 @@ aca-fmt/27: action: ignore ignore: template: not-preservable - reason: Pre-installed or custom skin that changes the appearance of the Windows Media Player interface. Not preservation-worthy - description: A WMZ file is a pre-installed or custom skin that changes the appearance of the Windows Media Player interface, often to match a certain theme. It contains a combination of graphics and JScript code that defines the look and behavior of each skin element. WMZ files are compressed using GZIP compression. + reason: Pre-installed or custom skin that changes the appearance of the Windows + Media Player interface. Not preservation-worthy + description: A WMZ file is a pre-installed or custom skin that changes the appearance + of the Windows Media Player interface, often to match a certain theme. It contains + a combination of graphics and JScript code that defines the look and behavior + of each skin element. WMZ files are compressed using GZIP compression. aca-fmt/28: name: Bentley Microstation V7 File description: Bentley CAD file. https://www.loc.gov/preservation/digital/formats/fdd/fdd000603.shtml @@ -217,7 +228,8 @@ aca-fmt/31: manual: reason: No cli for this format. process: Open in eDrawings Viewer and save as pdf. www.edrawingsviewer.com/download-edrawings - description: An EASM file is a CAD drawing created by Dassault Systemes eDrawings Publisher, a plug-in used to export designs from CAD applications. + description: An EASM file is a CAD drawing created by Dassault Systemes eDrawings + Publisher, a plug-in used to export designs from CAD applications. fmt/3: name: Graphics Interchange Format 87a ignore_if: @@ -226,7 +238,7 @@ fmt/3: convert: tool: image outputs: - - png + - png fmt/4: name: Graphics Interchange Format 89a ignore_if: @@ -235,14 +247,14 @@ fmt/4: convert: tool: image outputs: - - png + - png fmt/5: name: Audio/Video Interleaved Format action: convert convert: tool: video outputs: - - mp4 + - mp4 fmt/11: name: Portable Network Graphics 1.0 ignore_if: @@ -251,7 +263,7 @@ fmt/11: convert: tool: image outputs: - - png + - png fmt/12: name: Portable Network Graphics 1.1 ignore_if: @@ -260,7 +272,7 @@ fmt/12: convert: tool: image outputs: - - png + - png fmt/13: name: Portable Network Graphics 1.2 ignore_if: @@ -274,205 +286,205 @@ fmt/14: convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/15: name: Acrobat PDF 1.1 - Portable Document Format action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/16: name: Acrobat PDF 1.2 - Portable Document Format action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/17: name: Acrobat PDF 1.3 - Portable Document Format action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/18: name: Acrobat PDF 1.4 - Portable Document Format action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/19: name: Acrobat PDF 1.5 - Portable Document Format action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/20: name: Acrobat PDF 1.6 - Portable Document Format action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/21: name: AutoCAD Drawing 1.0 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/22: name: AutoCAD Drawing 1.2 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/23: name: AutoCAD Drawing 1.3 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/24: name: AutoCAD Drawing 1.4 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/25: name: AutoCAD Drawing 2.0 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/26: name: AutoCAD Drawing 2.1 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/27: name: AutoCAD Drawing 2.2 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/28: name: AutoCAD Drawing 2.5 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/29: name: AutoCAD Drawing 2.6 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/30: name: AutoCAD Drawing R9 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/31: name: AutoCAD Drawing R10 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/32: name: AutoCAD Drawing R11/12 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/33: name: AutoCAD Drawing R13 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/34: name: AutoCAD Drawing R14 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/35: name: AutoCAD Drawing 2000-2002 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/36: name: AutoCAD Drawing 2004-2005 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/37: name: Microsoft Word for Windows Document 1.0 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/38: name: Microsoft Word for Windows Document 2.0 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/39: name: Microsoft Word Document 95 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/40: name: Microsoft Word Document 97-2003 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/41: name: Raw JPEG Stream ignore_if: @@ -481,7 +493,7 @@ fmt/41: convert: tool: image outputs: - - png + - png fmt/42: name: JPEG File Interchange Format 1.00 ignore_if: @@ -490,7 +502,7 @@ fmt/42: convert: tool: image outputs: - - png + - png fmt/43: name: JPEG File Interchange Format 1.01 ignore_if: @@ -499,7 +511,7 @@ fmt/43: convert: tool: image outputs: - - png + - png fmt/44: name: JPEG File Interchange Format 1.02 ignore_if: @@ -508,267 +520,267 @@ fmt/44: convert: tool: image outputs: - - png + - png fmt/45: name: Rich Text Format 1.0-1.4 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/50: name: Rich Text Format 1.5-1.6 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/52: name: Rich Text Format 1.7 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/53: name: Rich Text Format 1.8 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/55: name: Microsoft Excel 2.x Worksheet (xls) action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf fmt/56: name: Microsoft Excel 3.0 Worksheet (xls) action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf fmt/57: name: Microsoft Excel 4.0 Worksheet (xls) 4S action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf fmt/58: name: Microsoft Excel 4.0 Workbook (xls) 4W action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf fmt/59: name: Microsoft Excel 5.0/95 Workbook (xls) action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf fmt/61: name: Microsoft Excel 97 Workbook (xls) 8 action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf fmt/63: name: Drawing Interchange File Format (ASCII) action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/64: name: Drawing Interchange File Format (ASCII) 1.0 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/65: name: Drawing Interchange File Format (ASCII) 1.2 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/66: name: Drawing Interchange File Format (ASCII) 1.3 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/67: name: Drawing Interchange File Format (ASCII) 1.4 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/68: name: Drawing Interchange File Format (ASCII) 2.0 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/69: name: Drawing Interchange File Format (ASCII) 2.1 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/70: name: Drawing Interchange File Format (ASCII) 2.2 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/71: name: Drawing Interchange File Format (ASCII) 2.5 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/72: name: Drawing Interchange File Format (ASCII) 2.6 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/73: name: Drawing Interchange File Format (ASCII) R9 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/74: name: Drawing Interchange File Format (ASCII) R10 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/75: name: Drawing Interchange File Format (ASCII) R11/12 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/76: name: Drawing Interchange File Format (ASCII) R13 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/77: name: Drawing Interchange File Format (ASCII) R14 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/78: name: Drawing Interchange File Format (ASCII) 2000-2002 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/79: name: Drawing Interchange File Format (ASCII) 2004/2005/2006 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/80: name: Drawing Interchange File Format (Binary) R10 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/81: name: Drawing Interchange File Format (Binary) R11/12 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/82: name: Drawing Interchange File Format (Binary) R13 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/83: name: Drawing Interchange File Format (Binary) R14 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/84: name: Drawing Interchange File Format (Binary) 2000-2002 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/85: name: Drawing Interchange File Format (Binary) 2004-2006 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/90: name: PCX 5 ignore_if: @@ -777,28 +789,28 @@ fmt/90: convert: tool: image outputs: - - png + - png fmt/91: name: Scalable Vector Graphics 1.0 action: convert convert: tool: browser outputs: - - pdf + - pdf fmt/92: name: Scalable Vector Graphics 1.1 action: convert convert: tool: browser outputs: - - pdf + - pdf fmt/95: name: Acrobat PDF/A - Portable Document Format 1a action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/96: name: Hypertext Markup Language action: convert @@ -823,7 +835,7 @@ fmt/101: name: Extensible Markup Language 1.0 reidentify: reason: Some applications allow saving documents as XML and can re-open them - on_fail: "action" + on_fail: action action: convert convert: tool: copy @@ -852,7 +864,7 @@ fmt/111: name: OLE2 Compound Document Format reidentify: reason: why do we re-run these? - on_fail: "action" + on_fail: action action: manual manual: reason: We don't know what to do with this format. @@ -865,7 +877,7 @@ fmt/115: convert: tool: image outputs: - - png + - png fmt/116: name: Windows Bitmap 3.0 ignore_if: @@ -874,7 +886,7 @@ fmt/116: convert: tool: image outputs: - - png + - png fmt/117: name: Windows Bitmap 3.0 NT ignore_if: @@ -883,21 +895,21 @@ fmt/117: convert: tool: image outputs: - - png + - png fmt/122: name: Encapsulated PostScript File Format 1.2 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/123: name: Encapsulated PostScript File Format 2.0 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/124: name: Encapsulated PostScript File Format 3 ignore_if: @@ -906,44 +918,44 @@ fmt/124: convert: tool: document outputs: - - pdf + - pdf fmt/125: name: Microsoft Powerpoint Presentation 95 action: convert convert: tool: presentation outputs: - - odp - - pdf + - odp + - pdf fmt/126: name: Microsoft Powerpoint Presentation 97-2003 action: convert convert: tool: presentation outputs: - - odp - - pdf + - odp + - pdf fmt/132: name: Windows Media Audio action: convert convert: tool: audio outputs: - - flac + - flac fmt/133: name: Windows Media Video action: convert convert: tool: video outputs: - - mp4 + - mp4 fmt/134: name: MPEG 1/2 Audio Layer 3 action: convert convert: tool: audio outputs: - - flac + - flac fmt/136: name: OpenDocument Text 1.0 action: convert @@ -960,58 +972,61 @@ fmt/141: convert: tool: audio outputs: - - flac + - flac fmt/142: name: Waveform Audio (WAVEFORMATEX) action: convert convert: tool: audio outputs: - - flac + - flac fmt/146: name: Acrobat PDF/X - Portable Document Format - Exchange 1a:2003 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/155: name: Geographic Tagged Image File Format (GeoTIFF) action: convert convert: tool: image outputs: - - tif + - tif fmt/157: name: Acrobat PDF/X - Portable Document Format - Exchange 1a:2001 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/158: name: Acrobat PDF/X - Portable Document Format - Exchange 3:2002 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/163: name: Microsoft Works Word Processor 1-3 for DOS and 2 for Windows action: convert convert: tool: document outputs: - - pdf + - pdf fmt/189: name: Microsoft Office Open XML action: convert - description: Microsoft developed office Open XML (OOXML) in 2007. It is an XML based format representing a departure from previous Microsoft Office suites, which were binary file based. Starting with Microsoft Office 2007, the Office Open XML file formats have become the default file format of Microsoft Office. + description: Microsoft developed office Open XML (OOXML) in 2007. It is an XML based + format representing a departure from previous Microsoft Office suites, which were + binary file based. Starting with Microsoft Office 2007, the Office Open XML file + formats have become the default file format of Microsoft Office. convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf fmt/199: name: MPEG-4 Media File rename: @@ -1022,7 +1037,7 @@ fmt/199: convert: tool: video outputs: - - mp4 + - mp4 fmt/206: name: Structured Query Language Data action: convert @@ -1046,16 +1061,16 @@ fmt/214: convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf fmt/215: name: Microsoft Powerpoint for Windows 2007 onwards action: convert convert: tool: presentation outputs: - - odp - - pdf + - odp + - pdf fmt/217: name: PaintShop Pro Browser Cache File action: ignore @@ -1069,29 +1084,30 @@ fmt/233: convert: tool: document outputs: - - pdf + - pdf fmt/244: name: Keyhole Markup Language (XML) - description: KML is an open standard officially named the OpenGISĀ® KML Encoding Standard (OGC KML). https://developers.google.com/kml/documentation/kmlreference + description: "KML is an open standard officially named the OpenGIS\xAE KML Encoding\ + \ Standard (OGC KML). https://developers.google.com/kml/documentation/kmlreference" action: convert convert: tool: gis outputs: - - gml3 + - gml3 fmt/258: name: Microsoft Works Word Processor 5-6 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/276: name: Acrobat PDF 1.7 - Portable Document Format 1.7 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/277: name: ESRI Arc/View Shapefile Index action: ignore @@ -1104,30 +1120,30 @@ fmt/279: convert: tool: audio outputs: - - flac + - flac fmt/290: name: OpenDocument Text 1.1 action: convert convert: tool: document outputs: - - odt - - pdf + - odt + - pdf fmt/291: name: OpenDocument Text 1.2 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/292: name: OpenDocument Presentation 1.1 action: convert convert: tool: document outputs: - - odt - - pdf + - odt + - pdf fmt/294: name: OpenDocument Spreadsheet 1.1 action: convert @@ -1144,16 +1160,16 @@ fmt/296: convert: tool: document outputs: - - svg - - pdf + - svg + - pdf fmt/297: name: OpenDocument Graphics action: convert convert: tool: document outputs: - - svg - - pdf + - svg + - pdf fmt/319: name: ESRI Spatial Index File action: ignore @@ -1186,7 +1202,7 @@ fmt/340: convert: tool: symphovert outputs: - - odt + - odt fmt/345: name: Microsoft Windows Enhanced Metafile 3.0 ignore_if: @@ -1195,7 +1211,7 @@ fmt/345: convert: tool: image outputs: - - png + - png fmt/353: name: Tagged Image File Format ignore_if: @@ -1204,35 +1220,35 @@ fmt/353: convert: tool: image outputs: - - tif + - tif fmt/354: name: Acrobat PDF/A - Portable Document Format 1b action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/355: name: Rich Text Format 1.9 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/356: name: Adaptive Multi-Rate Audio action: convert convert: tool: audio outputs: - - flac + - flac fmt/357: name: 3GPP Audio/Video File action: convert convert: tool: video outputs: - - mp4 + - mp4 fmt/388: name: Internet Calendar and Scheduling format action: convert @@ -1263,60 +1279,60 @@ fmt/412: convert: tool: document outputs: - - pdf + - pdf fmt/413: name: Scalable Vector Graphics Tiny 1.2 action: convert convert: tool: browser outputs: - - pdf + - pdf fmt/414: name: Audio Interchange File Format action: convert convert: tool: audio outputs: - - flac + - flac fmt/422: name: Adobe Illustrator 6.0 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/428: name: CorelDraw Drawing action: convert convert: tool: document outputs: - - svg - - pdf + - svg + - pdf fmt/433: name: Drawing Interchange File Format (ASCII) 2007/2008/2009 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/434: name: AutoCAD Drawing 2010/2011/2012 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/435: name: Drawing Interchange File Format (ASCII) 2010/2012/2014 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/440: name: Microsoft Project 2007 action: ignore @@ -1325,36 +1341,37 @@ fmt/440: reason: Project files. Not preservation-worthy fmt/441: name: Windows Media Video - description: Windows Media Video 9 Advanced Profile or WVC1 is a fuly compliant Advanced Profile of the VC-1 codec standard. + description: Windows Media Video 9 Advanced Profile or WVC1 is a fuly compliant + Advanced Profile of the VC-1 codec standard. action: convert convert: tool: video outputs: - - mp4 + - mp4 fmt/442: name: Microsoft Visio (generic) action: convert convert: tool: document outputs: - - svg - - pdf + - svg + - pdf fmt/443: name: Microsoft Visio Drawing 2003-2010 action: convert convert: tool: document outputs: - - svg - - pdf + - svg + - pdf fmt/445: name: Microsoft Excel Macro-Enabled 2007 action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf fmt/468: name: ISO 9660 Disk Image File action: extract @@ -1367,7 +1384,7 @@ fmt/471: convert: tool: browser outputs: - - pdf + - pdf fmt/473: name: Microsoft Office Owner File action: ignore @@ -1394,35 +1411,35 @@ fmt/476: convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/477: name: Acrobat PDF/A - Portable Document Format action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/478: name: Acrobat PDF/A - Portable Document Format (2u) action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/479: name: Acrobat PDF/A - Portable Document Format (3a) action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/480: name: Acrobat PDF/A - Portable Document Format (3b) action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/484: name: 7Zip format action: extract @@ -1435,22 +1452,22 @@ fmt/487: convert: tool: presentation outputs: - - odp - - pdf + - odp + - pdf fmt/488: name: Acrobat PDF/X - Portable Document Format - Exchange PDF/X-4 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/493: name: Acrobat PDF/E - Portable Document Format for Engineering PDF/E-1 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/494: name: Microsoft Office Encrypted Document (2007 Onwards) action: ignore @@ -1512,7 +1529,7 @@ fmt/523: convert: tool: document outputs: - - pdf + - pdf fmt/524: name: Microsoft Office Theme action: ignore @@ -1543,16 +1560,16 @@ fmt/531: convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/532: name: Drawing Interchange File Format (ASCII) 2013/2014/2015/2016/2017 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/550: name: Adobe InDesign Document CS4 action: ignore @@ -1575,70 +1592,70 @@ fmt/557: convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/558: name: Adobe Illustrator 9.0 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/559: name: Adobe Illustrator 10.0 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/560: name: Adobe Illustrator 11.0 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/561: name: Adobe Illustrator 12.0 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/562: name: Adobe Illustrator 13.0 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/563: name: Adobe Illustrator 14.0 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/564: name: Adobe Illustrator 15.0 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/573: name: WebM video action: convert convert: tool: video outputs: - - mp4 + - mp4 fmt/583: name: Vector Markup Language action: convert convert: tool: browser outputs: - - pdf + - pdf fmt/584: name: Windows Media Metafile action: convert @@ -1657,23 +1674,23 @@ fmt/595: convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf fmt/597: name: Microsoft Word Template 2007 onwards action: convert convert: tool: document outputs: - - pdf + - pdf fmt/598: name: Microsoft Excel Template 2007 onwards action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf fmt/599: name: Microsoft Word Macro-Enabled Document Template 2007 onwards action: ignore @@ -1686,15 +1703,15 @@ fmt/607: convert: tool: sas outputs: - - ods - - pdf + - ods + - pdf fmt/609: name: Microsoft Word (Generic) 6.0-2003 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/613: name: RAR Archive version 5 action: extract @@ -1713,16 +1730,16 @@ fmt/629: convert: tool: presentation outputs: - - odp - - pdf + - odp + - pdf fmt/631: name: Microsoft PowerPoint Template 2007 action: convert convert: tool: presentation outputs: - - odp - - pdf + - odp + - pdf fmt/634: name: Microsoft Compiled HTML Help action: ignore @@ -1743,14 +1760,14 @@ fmt/645: convert: tool: image outputs: - - png + - png fmt/657: name: Open XML Paper Specification action: convert convert: tool: xps outputs: - - pdf + - pdf fmt/670: name: 'PKCS #7 Cryptographic Message File' action: ignore @@ -1770,7 +1787,7 @@ fmt/724: convert: tool: gis outputs: - - gml3 + - gml3 fmt/725: name: Microsoft Project 2010 action: ignore @@ -1783,15 +1800,15 @@ fmt/744: convert: tool: document outputs: - - pdf + - pdf fmt/749: name: AppleWorks Word Processor 6 action: convert convert: tool: document outputs: - - odt - - pdf + - odt + - pdf fmt/754: name: Microsoft Word Document (Password Protected) 97-2003 action: ignore @@ -1831,7 +1848,7 @@ fmt/881: convert: tool: image outputs: - - png + - png fmt/886: name: HTML Components action: ignore @@ -1857,8 +1874,8 @@ fmt/901: convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf fmt/938: name: Python Script File action: ignore @@ -1884,8 +1901,8 @@ fmt/979: action: ignore ignore: template: not-preservable - reason: A property list file is a a serialised storage file, usually used by programs to store configurations in a - more compact form. Not preservation-worthy + reason: A property list file is a a serialised storage file, usually used by programs + to store configurations in a more compact form. Not preservation-worthy fmt/997: name: SPSS Portable File action: ignore @@ -1910,7 +1927,7 @@ fmt/1047: convert: tool: gis outputs: - - gml3 + - gml3 fmt/1079: name: Microsoft Program Database 7.00 action: ignore @@ -1945,7 +1962,7 @@ fmt/1129: convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/1132: name: Netscape Bookmark File Format action: ignore @@ -1958,7 +1975,7 @@ fmt/1216: convert: tool: symphovert outputs: - - odp + - odp fmt/1256: name: MapInfo Workspace File action: ignore @@ -2000,71 +2017,71 @@ fmt/1389: convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/1390: name: Drawing Interchange Format (Binary) 2007-2009 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/1391: name: Drawing Interchange Format (Binary) 2010-2012 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/1392: name: Drawing Interchange Format (Binary) 2013-2017 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/1393: name: Drawing Interchange Format (Binary) 2018-2021 action: convert convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg fmt/1439: name: Apple iWork Pages 09 action: convert convert: tool: document outputs: - - odt - - pdf + - odt + - pdf fmt/1451: name: PDF Portfolio 1.7 action: convert convert: tool: pdf outputs: - - pdfa-3 + - pdfa-3 fmt/1452: name: Lotus 1-2-3 Worksheet (97) action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf fmt/1453: name: Lotus 1-2-3 Worksheet (9.8 Millennium) action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf fmt/1482: name: Access Report Snapshot action: ignore @@ -2080,98 +2097,106 @@ fmt/1507: convert: tool: image outputs: - - png + - png fmt/1511: name: Microsoft Publisher 1 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/1512: name: Microsoft Publisher 2003 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/1513: name: Microsoft Publisher 2007 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/1514: name: Microsoft Publisher 2010 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/1515: name: Microsoft Publisher 2013 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/1516: name: Microsoft Publisher 2016-2019 action: convert convert: tool: document outputs: - - pdf + - pdf fmt/1600: - name: 'ESRI ArcInfo DAT File (Internal)' + name: ESRI ArcInfo DAT File (Internal) reidentify: reason: Identifies .dat-files as mapinfo-files by extension only. They are sometimes winmail.dat - on_fail: "action" - action: "manual" + on_fail: action + action: manual manual: reason: There was no bit-header which matches the file. - process: Find out what we can do with this format. You can try and see, if there is a mismatch between our bit-header and the bit-header for the fil with the repository "byteheader". + process: Find out what we can do with this format. You can try and see, if there + is a mismatch between our bit-header and the bit-header for the fil with the + repository "byteheader". fmt/1711: name: Software602 Printer Configuration File action: ignore ignore: template: not-preservable - reason: Binary format for configuration, containing paths to printers and/ or dictionaries, - used by Software602 programmes. + reason: Binary format for configuration, containing paths to printers and/ or + dictionaries, used by Software602 programmes. fmt/1729: name: Esri Shapefile Geospatial Metadata File action: convert convert: tool: gis outputs: - - gml3 + - gml3 fmt/1730: name: '' reidentify: reason: Identifies .dat-files as mapinfo-files by extension only. They are sometimes winmail.dat - on_fail: "action" - action: "manual" + on_fail: action + action: manual manual: reason: There was no bit-header which matches the file. - process: Find out what we can do with this format. You can try and see, if there is a mismatch between our bit-header and the bit-header for the fil with the repository "byteheader". + process: Find out what we can do with this format. You can try and see, if there + is a mismatch between our bit-header and the bit-header for the fil with the + repository "byteheader". fmt/1756: name: Opendocument Text 1.3 reidentify: reason: Identified by ext only. Prone to error - on_fail: "action" - action: "manual" + on_fail: action + action: manual manual: reason: There was no bit-header which matches the file. - process: Find out what we can do with this format. You can try and see, if there is a mismatch between our bit-header and the bit-header for the fil with the repository "byteheader". + process: Find out what we can do with this format. You can try and see, if there + is a mismatch between our bit-header and the bit-header for the fil with the + repository "byteheader". fmt/1763: name: MacBinary action: manual manual: reason: Not handled by Digiarch Extract yet. - process: Open on Mac and save as pdf, rename to .bin file and use macutils in Linux, ex. macunpack input.bin and rename output file to correct format and save as pdf. manpages.debian.org/buster/macutils/macutil.1.en.html + process: Open on Mac and save as pdf, rename to .bin file and use macutils in + Linux, ex. macunpack input.bin and rename output file to correct format and + save as pdf. manpages.debian.org/buster/macutils/macutil.1.en.html fmt/1768: name: C Source Code file action: ignore @@ -2188,8 +2213,8 @@ fmt/1879: reason: Not preservation-worthy fmt/1915: name: ActiveMime Object - description: ActiveMime Object, or mso, are attached to e-mail messages as a way for the - user to view the attached Office document as part of the e-mail itself. + description: ActiveMime Object, or mso, are attached to e-mail messages as a way + for the user to view the attached Office document as part of the e-mail itself. action: ignore ignore: template: not-preservable @@ -2200,29 +2225,29 @@ fmt/1934: convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg x-fmt/9: name: dBASE Database III action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf x-fmt/10: name: dBASE Database IV action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf x-fmt/13: name: Tab-separated values reidentify: reason: .TAB-files related to GIS is sometimes identified as plaintext - on_fail: "action" + on_fail: action action: convert convert: tool: copy @@ -2232,8 +2257,8 @@ x-fmt/17: convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf x-fmt/18: name: Comma Separated Values action: convert @@ -2251,21 +2276,21 @@ x-fmt/44: convert: tool: document outputs: - - pdf + - pdf x-fmt/45: name: Microsoft Word Document Template 97-2003 action: convert convert: tool: document outputs: - - pdf + - pdf x-fmt/49: name: AutoCAD Design Web Format 6.0 action: convert convert: tool: cad outputs: - - pdf + - pdf x-fmt/54: name: AutoCAD Font Mapping Table action: ignore @@ -2290,7 +2315,7 @@ x-fmt/64: convert: tool: document outputs: - - pdf + - pdf x-fmt/78: name: AutoCAD Plot Configuration File (2000) action: ignore @@ -2308,8 +2333,8 @@ x-fmt/92: convert: tool: document outputs: - - svg - - pdf + - svg + - pdf x-fmt/103: name: AutoCAD Compiled Shape/Font File action: ignore @@ -2331,7 +2356,7 @@ x-fmt/111: name: Plain Text File reidentify: reason: .TAB-files related to GIS is sometimes identified as plaintext - on_fail: "action" + on_fail: action action: convert convert: tool: copy @@ -2341,32 +2366,32 @@ x-fmt/114: convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf x-fmt/115: name: Lotus 1-2-3 Worksheet (3.0) action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf x-fmt/116: name: Lotus 1-2-3 Worksheet (4-5) action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf x-fmt/117: name: Lotus 1-2-3 Worksheet (1.0) action: convert convert: tool: spreadsheet outputs: - - ods - - pdf + - ods + - pdf x-fmt/119: name: Windows Metafile Image ignore_if: @@ -2375,7 +2400,7 @@ x-fmt/119: convert: tool: image outputs: - - png + - png x-fmt/128: name: Microsoft Excel Workspace action: ignore @@ -2406,7 +2431,7 @@ x-fmt/153: convert: tool: image outputs: - - png + - png x-fmt/169: name: PHP Script Page action: ignore @@ -2445,21 +2470,21 @@ x-fmt/227: convert: tool: gis outputs: - - gml3 + - gml3 x-fmt/231: name: ESRI MapInfo Export File action: convert convert: tool: gis outputs: - - gml3 + - gml3 x-fmt/235: name: ESRI Arc/View ShapeFile action: convert convert: tool: gis outputs: - - gml3 + - gml3 x-fmt/240: name: Microsoft Acces Database File (2000) action: ignore @@ -2485,50 +2510,50 @@ x-fmt/252: convert: tool: document outputs: - - pdf + - pdf x-fmt/253: name: Microsoft Publisher 95 action: convert convert: tool: document outputs: - - pdf + - pdf x-fmt/254: name: Microsoft Publisher 97 action: convert convert: tool: document outputs: - - pdf + - pdf x-fmt/255: name: Microsoft Publisher 98 action: convert convert: tool: document outputs: - - pdf + - pdf x-fmt/256: name: Microsoft Publisher 2000 action: convert convert: tool: document outputs: - - pdf + - pdf x-fmt/257: name: Microsoft Publisher 2002 action: convert convert: tool: document outputs: - - pdf + - pdf x-fmt/258: name: Microsoft Visio Drawing 2000 action: convert convert: tool: document outputs: - - svg - - pdf + - svg + - pdf x-fmt/263: name: ZIP Format alternatives: @@ -2565,11 +2590,12 @@ x-fmt/280: name: XML Schema Definition reidentify: reason: If the XSD is not custom made, then we ignore the XSD file. - on_fail: "action" + on_fail: action action: ignore ignore: template: not-preservable - reason: A prescriptive XML template, for validating XML files with. Not preservation-worthy if not with a GML file. + reason: A prescriptive XML template, for validating XML files with. Not preservation-worthy + if not with a GML file. x-fmt/315: name: Document Type Definition action: convert @@ -2590,26 +2616,26 @@ x-fmt/332: reason: It's a file containing formatting information for Lotus spreadsheets. Not preservation-worthy x-fmt/340: - name: 'Lotus WordPro Document' + name: Lotus WordPro Document action: convert convert: tool: symphovert outputs: - - odt + - odt x-fmt/345: name: Microsoft Works Document action: convert convert: tool: document outputs: - - odt - - pdf + - odt + - pdf x-fmt/346: name: Microstation CAD Drawing reidentify: reason: Pronom identifies the format on extension alone chunk_size: 2048 - on_fail: "action" + on_fail: action action: ignore ignore: template: not-convertable @@ -2620,36 +2646,36 @@ x-fmt/375: convert: tool: document outputs: - - svg - - pdf + - svg + - pdf x-fmt/382: name: Macromedia FLV 1 action: convert convert: tool: video outputs: - - mp4 + - mp4 x-fmt/384: name: Quicktime action: convert convert: tool: video outputs: - - mp4 + - mp4 x-fmt/385: name: MPEG-1 Program Stream action: convert convert: tool: video outputs: - - mp4 + - mp4 x-fmt/386: name: MPEG-2 Program Stream action: convert convert: tool: video outputs: - - mp4 + - mp4 x-fmt/387: name: Exchangeable Image File Format (Uncompressed) 2.2 ignore_if: @@ -2658,7 +2684,7 @@ x-fmt/387: convert: tool: image outputs: - - png + - png x-fmt/388: name: Exchangeable Image File Format (Uncompressed) 2.1 ignore_if: @@ -2667,7 +2693,7 @@ x-fmt/388: convert: tool: image outputs: - - png + - png x-fmt/390: name: Exchangeable Image File Format (Compressed) 2.1 ignore_if: @@ -2676,7 +2702,7 @@ x-fmt/390: convert: tool: image outputs: - - png + - png x-fmt/391: name: Exchangeable Image File Format (Compressed) 2.2 ignore_if: @@ -2685,7 +2711,7 @@ x-fmt/391: convert: tool: image outputs: - - png + - png x-fmt/392: name: JP2 (JPEG 2000 part 1) ignore_if: @@ -2694,23 +2720,23 @@ x-fmt/392: convert: tool: image outputs: - - png + - png x-fmt/393: name: WordPerfect for MS-DOS Document 5.0 action: convert convert: tool: document outputs: - - odt - - pdf + - odt + - pdf x-fmt/394: name: WordPerfect for MS-DOS/Windows Document 5.1 action: convert convert: tool: document outputs: - - odt - - pdf + - odt + - pdf x-fmt/398: name: Exchangeable Image File Format (Compressed) 2.0 ignore_if: @@ -2719,7 +2745,7 @@ x-fmt/398: convert: tool: image outputs: - - png + - png x-fmt/411: name: Windows Portable Executable action: ignore @@ -2743,9 +2769,9 @@ x-fmt/414: action: manual manual: reason: Cannot extract with digiarch - process: Find the dir where .cab file is located. Create a subdir in the same dir - as the .cab file with the uuid of the .cab file as name. Open the .cab file with - windows. Copy everything into the subdir. Make sure that it is just like + process: Find the dir where .cab file is located. Create a subdir in the same + dir as the .cab file with the uuid of the .cab file as name. Open the .cab file + with windows. Copy everything into the subdir. Make sure that it is just like 'digiarch extract' x-fmt/415: name: Java Compiled Object Code @@ -2770,7 +2796,7 @@ x-fmt/418: convert: tool: image outputs: - - png + - png ignore: template: not-preservable reason: An ICO file contains an icon, which is typically used to represent a Windows @@ -2810,7 +2836,7 @@ x-fmt/429: convert: tool: browser outputs: - - pdf + - pdf x-fmt/430: name: MSG file action: extract @@ -2821,7 +2847,7 @@ x-fmt/430: convert: tool: msg outputs: - - eml + - eml x-fmt/441: name: AutoCAD Database File Locking Information action: ignore @@ -2847,8 +2873,8 @@ x-fmt/455: convert: tool: cad2d outputs: - - pdf - - svg + - pdf + - svg xfmt/157: name: Microsoft Windows Enhanced Metafile 1.0 ignore_if: @@ -2857,12 +2883,13 @@ xfmt/157: convert: tool: image outputs: - - png + - png fmt/1549: name: Bentley Microstation Hidden Line File reidentify: - reason: Pronom sometimes wrongly identifies .dgn as .hln due to small byteheader definition - on_fail: "action" + reason: Pronom sometimes wrongly identifies .dgn as .hln due to small byteheader + definition + on_fail: action action: manual manual: reason: Never able to find this type of file. From 4548f3a2ccca4115830002bed6d8ff09c8631006 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 4 Oct 2024 13:05:43 +0200 Subject: [PATCH 05/10] fileformats.schema - change remove convert.outputs array and use a single convert.output property instead --- fileformats.schema.json | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fileformats.schema.json b/fileformats.schema.json index 6e1f8c7..52c31e1 100644 --- a/fileformats.schema.json +++ b/fileformats.schema.json @@ -329,7 +329,7 @@ }, "then": { "required": [ - "outputs" + "output" ] }, "properties": { @@ -337,14 +337,10 @@ "description": "The converter tool to use.", "type": "string" }, - "outputs": { - "description": "A list of extension targets for the converter.", - "type": "array", - "items": { - "type": "string", - "pattern": "^(.[a-z0-9]+)+$" - }, - "minItems": 1 + "output": { + "description": "The output taget for the converter.", + "type": "string", + "pattern": "^[a-z0-9-]+$" } } }, From c8f81d57be084a6f1e5754fb6abdd572468f7ece Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 4 Oct 2024 13:06:04 +0200 Subject: [PATCH 06/10] fileformats - use a single convert output --- fileformats.yml | 794 ++++++++++++++---------------------------------- 1 file changed, 233 insertions(+), 561 deletions(-) diff --git a/fileformats.yml b/fileformats.yml index 9ea256d..199e8a2 100644 --- a/fileformats.yml +++ b/fileformats.yml @@ -30,8 +30,7 @@ aca-fmt/1: action: convert convert: tool: symphovert - outputs: - - ods + output: ods aca-fmt/2: name: Microsoft Word Markup action: manual @@ -44,9 +43,7 @@ aca-fmt/3: action: convert convert: tool: document - outputs: - - odt - - pdf + output: odt aca-fmt/4: name: MindManager Mind Map action: ignore @@ -87,8 +84,7 @@ aca-fmt/9: on_success: convert convert: tool: tnef - outputs: - - html + output: html aca-fmt/10: name: MS Access 95 action: manual @@ -143,37 +139,32 @@ aca-fmt/18: action: convert convert: tool: document - outputs: - - pdf + output: pdf aca-fmt/20: name: MapInfo TAB file action: convert convert: tool: gis - outputs: - - tab + output: tab aca-fmt/21: name: OpenDocument Spreadsheet (unspecified version) action: convert convert: tool: document - outputs: - - pdf + output: pdf aca-fmt/22: name: Windows Compressed Enhanced Metafile action: convert convert: tool: document - outputs: - - jpg + output: jpg aca-fmt/23: name: Microsoft Word XML Document description: A Microsoft Word document saved as a standalone XML file action: convert convert: tool: document - outputs: - - odt + output: odt aca-fmt/24: name: vCard action: ignore @@ -237,8 +228,7 @@ fmt/3: action: convert convert: tool: image - outputs: - - png + output: png fmt/4: name: Graphics Interchange Format 89a ignore_if: @@ -246,15 +236,13 @@ fmt/4: action: convert convert: tool: image - outputs: - - png + output: png fmt/5: name: Audio/Video Interleaved Format action: convert convert: tool: video - outputs: - - mp4 + output: mp4 fmt/11: name: Portable Network Graphics 1.0 ignore_if: @@ -262,8 +250,7 @@ fmt/11: action: convert convert: tool: image - outputs: - - png + output: png fmt/12: name: Portable Network Graphics 1.1 ignore_if: @@ -271,8 +258,7 @@ fmt/12: action: convert convert: tool: image - outputs: - - png + output: png fmt/13: name: Portable Network Graphics 1.2 ignore_if: @@ -285,206 +271,163 @@ fmt/14: action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/15: name: Acrobat PDF 1.1 - Portable Document Format action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/16: name: Acrobat PDF 1.2 - Portable Document Format action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/17: name: Acrobat PDF 1.3 - Portable Document Format action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/18: name: Acrobat PDF 1.4 - Portable Document Format action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/19: name: Acrobat PDF 1.5 - Portable Document Format action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/20: name: Acrobat PDF 1.6 - Portable Document Format action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/21: name: AutoCAD Drawing 1.0 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/22: name: AutoCAD Drawing 1.2 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/23: name: AutoCAD Drawing 1.3 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/24: name: AutoCAD Drawing 1.4 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/25: name: AutoCAD Drawing 2.0 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/26: name: AutoCAD Drawing 2.1 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/27: name: AutoCAD Drawing 2.2 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/28: name: AutoCAD Drawing 2.5 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/29: name: AutoCAD Drawing 2.6 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/30: name: AutoCAD Drawing R9 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/31: name: AutoCAD Drawing R10 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/32: name: AutoCAD Drawing R11/12 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/33: name: AutoCAD Drawing R13 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/34: name: AutoCAD Drawing R14 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/35: name: AutoCAD Drawing 2000-2002 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/36: name: AutoCAD Drawing 2004-2005 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/37: name: Microsoft Word for Windows Document 1.0 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/38: name: Microsoft Word for Windows Document 2.0 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/39: name: Microsoft Word Document 95 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/40: name: Microsoft Word Document 97-2003 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/41: name: Raw JPEG Stream ignore_if: @@ -492,8 +435,7 @@ fmt/41: action: convert convert: tool: image - outputs: - - png + output: png fmt/42: name: JPEG File Interchange Format 1.00 ignore_if: @@ -501,8 +443,7 @@ fmt/42: action: convert convert: tool: image - outputs: - - png + output: png fmt/43: name: JPEG File Interchange Format 1.01 ignore_if: @@ -510,8 +451,7 @@ fmt/43: action: convert convert: tool: image - outputs: - - png + output: png fmt/44: name: JPEG File Interchange Format 1.02 ignore_if: @@ -519,268 +459,205 @@ fmt/44: action: convert convert: tool: image - outputs: - - png + output: png fmt/45: name: Rich Text Format 1.0-1.4 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/50: name: Rich Text Format 1.5-1.6 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/52: name: Rich Text Format 1.7 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/53: name: Rich Text Format 1.8 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/55: name: Microsoft Excel 2.x Worksheet (xls) action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods fmt/56: name: Microsoft Excel 3.0 Worksheet (xls) action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods fmt/57: name: Microsoft Excel 4.0 Worksheet (xls) 4S action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods fmt/58: name: Microsoft Excel 4.0 Workbook (xls) 4W action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods fmt/59: name: Microsoft Excel 5.0/95 Workbook (xls) action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods fmt/61: name: Microsoft Excel 97 Workbook (xls) 8 action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods fmt/63: name: Drawing Interchange File Format (ASCII) action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/64: name: Drawing Interchange File Format (ASCII) 1.0 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/65: name: Drawing Interchange File Format (ASCII) 1.2 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/66: name: Drawing Interchange File Format (ASCII) 1.3 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/67: name: Drawing Interchange File Format (ASCII) 1.4 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/68: name: Drawing Interchange File Format (ASCII) 2.0 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/69: name: Drawing Interchange File Format (ASCII) 2.1 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/70: name: Drawing Interchange File Format (ASCII) 2.2 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/71: name: Drawing Interchange File Format (ASCII) 2.5 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/72: name: Drawing Interchange File Format (ASCII) 2.6 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/73: name: Drawing Interchange File Format (ASCII) R9 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/74: name: Drawing Interchange File Format (ASCII) R10 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/75: name: Drawing Interchange File Format (ASCII) R11/12 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/76: name: Drawing Interchange File Format (ASCII) R13 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/77: name: Drawing Interchange File Format (ASCII) R14 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/78: name: Drawing Interchange File Format (ASCII) 2000-2002 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/79: name: Drawing Interchange File Format (ASCII) 2004/2005/2006 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/80: name: Drawing Interchange File Format (Binary) R10 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/81: name: Drawing Interchange File Format (Binary) R11/12 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/82: name: Drawing Interchange File Format (Binary) R13 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/83: name: Drawing Interchange File Format (Binary) R14 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/84: name: Drawing Interchange File Format (Binary) 2000-2002 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/85: name: Drawing Interchange File Format (Binary) 2004-2006 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/90: name: PCX 5 ignore_if: @@ -788,29 +665,25 @@ fmt/90: action: convert convert: tool: image - outputs: - - png + output: png fmt/91: name: Scalable Vector Graphics 1.0 action: convert convert: tool: browser - outputs: - - pdf + output: pdf fmt/92: name: Scalable Vector Graphics 1.1 action: convert convert: tool: browser - outputs: - - pdf + output: pdf fmt/95: name: Acrobat PDF/A - Portable Document Format 1a action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/96: name: Hypertext Markup Language action: convert @@ -876,8 +749,7 @@ fmt/115: action: convert convert: tool: image - outputs: - - png + output: png fmt/116: name: Windows Bitmap 3.0 ignore_if: @@ -885,8 +757,7 @@ fmt/116: action: convert convert: tool: image - outputs: - - png + output: png fmt/117: name: Windows Bitmap 3.0 NT ignore_if: @@ -894,22 +765,19 @@ fmt/117: action: convert convert: tool: image - outputs: - - png + output: png fmt/122: name: Encapsulated PostScript File Format 1.2 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/123: name: Encapsulated PostScript File Format 2.0 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/124: name: Encapsulated PostScript File Format 3 ignore_if: @@ -917,45 +785,37 @@ fmt/124: action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/125: name: Microsoft Powerpoint Presentation 95 action: convert convert: tool: presentation - outputs: - - odp - - pdf + output: odp fmt/126: name: Microsoft Powerpoint Presentation 97-2003 action: convert convert: tool: presentation - outputs: - - odp - - pdf + output: odp fmt/132: name: Windows Media Audio action: convert convert: tool: audio - outputs: - - flac + output: flac fmt/133: name: Windows Media Video action: convert convert: tool: video - outputs: - - mp4 + output: mp4 fmt/134: name: MPEG 1/2 Audio Layer 3 action: convert convert: tool: audio - outputs: - - flac + output: flac fmt/136: name: OpenDocument Text 1.0 action: convert @@ -971,50 +831,43 @@ fmt/141: action: convert convert: tool: audio - outputs: - - flac + output: flac fmt/142: name: Waveform Audio (WAVEFORMATEX) action: convert convert: tool: audio - outputs: - - flac + output: flac fmt/146: name: Acrobat PDF/X - Portable Document Format - Exchange 1a:2003 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/155: name: Geographic Tagged Image File Format (GeoTIFF) action: convert convert: tool: image - outputs: - - tif + output: tif fmt/157: name: Acrobat PDF/X - Portable Document Format - Exchange 1a:2001 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/158: name: Acrobat PDF/X - Portable Document Format - Exchange 3:2002 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/163: name: Microsoft Works Word Processor 1-3 for DOS and 2 for Windows action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/189: name: Microsoft Office Open XML action: convert @@ -1024,9 +877,7 @@ fmt/189: formats have become the default file format of Microsoft Office. convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods fmt/199: name: MPEG-4 Media File rename: @@ -1036,8 +887,7 @@ fmt/199: action: convert convert: tool: video - outputs: - - mp4 + output: mp4 fmt/206: name: Structured Query Language Data action: convert @@ -1060,17 +910,13 @@ fmt/214: action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods fmt/215: name: Microsoft Powerpoint for Windows 2007 onwards action: convert convert: tool: presentation - outputs: - - odp - - pdf + output: odp fmt/217: name: PaintShop Pro Browser Cache File action: ignore @@ -1083,8 +929,7 @@ fmt/233: action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/244: name: Keyhole Markup Language (XML) description: "KML is an open standard officially named the OpenGIS\xAE KML Encoding\ @@ -1092,22 +937,19 @@ fmt/244: action: convert convert: tool: gis - outputs: - - gml3 + output: gml3 fmt/258: name: Microsoft Works Word Processor 5-6 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/276: name: Acrobat PDF 1.7 - Portable Document Format 1.7 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/277: name: ESRI Arc/View Shapefile Index action: ignore @@ -1119,31 +961,25 @@ fmt/279: action: convert convert: tool: audio - outputs: - - flac + output: flac fmt/290: name: OpenDocument Text 1.1 action: convert convert: tool: document - outputs: - - odt - - pdf + output: odt fmt/291: name: OpenDocument Text 1.2 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/292: name: OpenDocument Presentation 1.1 action: convert convert: tool: document - outputs: - - odt - - pdf + output: odt fmt/294: name: OpenDocument Spreadsheet 1.1 action: convert @@ -1159,17 +995,13 @@ fmt/296: action: convert convert: tool: document - outputs: - - svg - - pdf + output: svg fmt/297: name: OpenDocument Graphics action: convert convert: tool: document - outputs: - - svg - - pdf + output: svg fmt/319: name: ESRI Spatial Index File action: ignore @@ -1201,8 +1033,7 @@ fmt/340: action: convert convert: tool: symphovert - outputs: - - odt + output: odt fmt/345: name: Microsoft Windows Enhanced Metafile 3.0 ignore_if: @@ -1210,8 +1041,7 @@ fmt/345: action: convert convert: tool: image - outputs: - - png + output: png fmt/353: name: Tagged Image File Format ignore_if: @@ -1219,36 +1049,31 @@ fmt/353: action: convert convert: tool: image - outputs: - - tif + output: tif fmt/354: name: Acrobat PDF/A - Portable Document Format 1b action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/355: name: Rich Text Format 1.9 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/356: name: Adaptive Multi-Rate Audio action: convert convert: tool: audio - outputs: - - flac + output: flac fmt/357: name: 3GPP Audio/Video File action: convert convert: tool: video - outputs: - - mp4 + output: mp4 fmt/388: name: Internet Calendar and Scheduling format action: convert @@ -1278,61 +1103,49 @@ fmt/412: action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/413: name: Scalable Vector Graphics Tiny 1.2 action: convert convert: tool: browser - outputs: - - pdf + output: pdf fmt/414: name: Audio Interchange File Format action: convert convert: tool: audio - outputs: - - flac + output: flac fmt/422: name: Adobe Illustrator 6.0 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/428: name: CorelDraw Drawing action: convert convert: tool: document - outputs: - - svg - - pdf + output: svg fmt/433: name: Drawing Interchange File Format (ASCII) 2007/2008/2009 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/434: name: AutoCAD Drawing 2010/2011/2012 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/435: name: Drawing Interchange File Format (ASCII) 2010/2012/2014 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/440: name: Microsoft Project 2007 action: ignore @@ -1346,32 +1159,25 @@ fmt/441: action: convert convert: tool: video - outputs: - - mp4 + output: mp4 fmt/442: name: Microsoft Visio (generic) action: convert convert: tool: document - outputs: - - svg - - pdf + output: svg fmt/443: name: Microsoft Visio Drawing 2003-2010 action: convert convert: tool: document - outputs: - - svg - - pdf + output: svg fmt/445: name: Microsoft Excel Macro-Enabled 2007 action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods fmt/468: name: ISO 9660 Disk Image File action: extract @@ -1383,8 +1189,7 @@ fmt/471: action: convert convert: tool: browser - outputs: - - pdf + output: pdf fmt/473: name: Microsoft Office Owner File action: ignore @@ -1410,36 +1215,31 @@ fmt/476: action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/477: name: Acrobat PDF/A - Portable Document Format action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/478: name: Acrobat PDF/A - Portable Document Format (2u) action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/479: name: Acrobat PDF/A - Portable Document Format (3a) action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/480: name: Acrobat PDF/A - Portable Document Format (3b) action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/484: name: 7Zip format action: extract @@ -1451,23 +1251,19 @@ fmt/487: action: convert convert: tool: presentation - outputs: - - odp - - pdf + output: odp fmt/488: name: Acrobat PDF/X - Portable Document Format - Exchange PDF/X-4 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/493: name: Acrobat PDF/E - Portable Document Format for Engineering PDF/E-1 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/494: name: Microsoft Office Encrypted Document (2007 Onwards) action: ignore @@ -1528,8 +1324,7 @@ fmt/523: action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/524: name: Microsoft Office Theme action: ignore @@ -1559,17 +1354,13 @@ fmt/531: action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/532: name: Drawing Interchange File Format (ASCII) 2013/2014/2015/2016/2017 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/550: name: Adobe InDesign Document CS4 action: ignore @@ -1591,71 +1382,61 @@ fmt/557: action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/558: name: Adobe Illustrator 9.0 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/559: name: Adobe Illustrator 10.0 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/560: name: Adobe Illustrator 11.0 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/561: name: Adobe Illustrator 12.0 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/562: name: Adobe Illustrator 13.0 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/563: name: Adobe Illustrator 14.0 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/564: name: Adobe Illustrator 15.0 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/573: name: WebM video action: convert convert: tool: video - outputs: - - mp4 + output: mp4 fmt/583: name: Vector Markup Language action: convert convert: tool: browser - outputs: - - pdf + output: pdf fmt/584: name: Windows Media Metafile action: convert @@ -1673,24 +1454,19 @@ fmt/595: action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods fmt/597: name: Microsoft Word Template 2007 onwards action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/598: name: Microsoft Excel Template 2007 onwards action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods fmt/599: name: Microsoft Word Macro-Enabled Document Template 2007 onwards action: ignore @@ -1702,16 +1478,13 @@ fmt/607: action: convert convert: tool: sas - outputs: - - ods - - pdf + output: ods fmt/609: name: Microsoft Word (Generic) 6.0-2003 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/613: name: RAR Archive version 5 action: extract @@ -1729,17 +1502,13 @@ fmt/629: action: convert convert: tool: presentation - outputs: - - odp - - pdf + output: odp fmt/631: name: Microsoft PowerPoint Template 2007 action: convert convert: tool: presentation - outputs: - - odp - - pdf + output: odp fmt/634: name: Microsoft Compiled HTML Help action: ignore @@ -1759,15 +1528,13 @@ fmt/645: action: convert convert: tool: image - outputs: - - png + output: png fmt/657: name: Open XML Paper Specification action: convert convert: tool: xps - outputs: - - pdf + output: pdf fmt/670: name: 'PKCS #7 Cryptographic Message File' action: ignore @@ -1786,8 +1553,7 @@ fmt/724: action: convert convert: tool: gis - outputs: - - gml3 + output: gml3 fmt/725: name: Microsoft Project 2010 action: ignore @@ -1799,16 +1565,13 @@ fmt/744: action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/749: name: AppleWorks Word Processor 6 action: convert convert: tool: document - outputs: - - odt - - pdf + output: odt fmt/754: name: Microsoft Word Document (Password Protected) 97-2003 action: ignore @@ -1847,8 +1610,7 @@ fmt/881: action: convert convert: tool: image - outputs: - - png + output: png fmt/886: name: HTML Components action: ignore @@ -1873,9 +1635,7 @@ fmt/901: action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods fmt/938: name: Python Script File action: ignore @@ -1926,8 +1686,7 @@ fmt/1047: action: convert convert: tool: gis - outputs: - - gml3 + output: gml3 fmt/1079: name: Microsoft Program Database 7.00 action: ignore @@ -1961,8 +1720,7 @@ fmt/1129: action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/1132: name: Netscape Bookmark File Format action: ignore @@ -1974,8 +1732,7 @@ fmt/1216: action: convert convert: tool: symphovert - outputs: - - odp + output: odp fmt/1256: name: MapInfo Workspace File action: ignore @@ -2016,72 +1773,55 @@ fmt/1389: action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/1390: name: Drawing Interchange Format (Binary) 2007-2009 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/1391: name: Drawing Interchange Format (Binary) 2010-2012 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/1392: name: Drawing Interchange Format (Binary) 2013-2017 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/1393: name: Drawing Interchange Format (Binary) 2018-2021 action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg fmt/1439: name: Apple iWork Pages 09 action: convert convert: tool: document - outputs: - - odt - - pdf + output: odt fmt/1451: name: PDF Portfolio 1.7 action: convert convert: tool: pdf - outputs: - - pdfa-3 + output: pdfa-3 fmt/1452: name: Lotus 1-2-3 Worksheet (97) action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods fmt/1453: name: Lotus 1-2-3 Worksheet (9.8 Millennium) action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods fmt/1482: name: Access Report Snapshot action: ignore @@ -2096,50 +1836,43 @@ fmt/1507: action: convert convert: tool: image - outputs: - - png + output: png fmt/1511: name: Microsoft Publisher 1 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/1512: name: Microsoft Publisher 2003 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/1513: name: Microsoft Publisher 2007 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/1514: name: Microsoft Publisher 2010 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/1515: name: Microsoft Publisher 2013 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/1516: name: Microsoft Publisher 2016-2019 action: convert convert: tool: document - outputs: - - pdf + output: pdf fmt/1600: name: ESRI ArcInfo DAT File (Internal) reidentify: @@ -2164,8 +1897,7 @@ fmt/1729: action: convert convert: tool: gis - outputs: - - gml3 + output: gml3 fmt/1730: name: '' reidentify: @@ -2224,25 +1956,19 @@ fmt/1934: action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg x-fmt/9: name: dBASE Database III action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods x-fmt/10: name: dBASE Database IV action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods x-fmt/13: name: Tab-separated values reidentify: @@ -2256,9 +1982,7 @@ x-fmt/17: action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods x-fmt/18: name: Comma Separated Values action: convert @@ -2275,22 +1999,19 @@ x-fmt/44: action: convert convert: tool: document - outputs: - - pdf + output: pdf x-fmt/45: name: Microsoft Word Document Template 97-2003 action: convert convert: tool: document - outputs: - - pdf + output: pdf x-fmt/49: name: AutoCAD Design Web Format 6.0 action: convert convert: tool: cad - outputs: - - pdf + output: pdf x-fmt/54: name: AutoCAD Font Mapping Table action: ignore @@ -2314,8 +2035,7 @@ x-fmt/64: action: convert convert: tool: document - outputs: - - pdf + output: pdf x-fmt/78: name: AutoCAD Plot Configuration File (2000) action: ignore @@ -2332,9 +2052,7 @@ x-fmt/92: action: convert convert: tool: document - outputs: - - svg - - pdf + output: svg x-fmt/103: name: AutoCAD Compiled Shape/Font File action: ignore @@ -2365,33 +2083,25 @@ x-fmt/114: action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods x-fmt/115: name: Lotus 1-2-3 Worksheet (3.0) action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods x-fmt/116: name: Lotus 1-2-3 Worksheet (4-5) action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods x-fmt/117: name: Lotus 1-2-3 Worksheet (1.0) action: convert convert: tool: spreadsheet - outputs: - - ods - - pdf + output: ods x-fmt/119: name: Windows Metafile Image ignore_if: @@ -2399,8 +2109,7 @@ x-fmt/119: action: convert convert: tool: image - outputs: - - png + output: png x-fmt/128: name: Microsoft Excel Workspace action: ignore @@ -2430,8 +2139,7 @@ x-fmt/153: action: convert convert: tool: image - outputs: - - png + output: png x-fmt/169: name: PHP Script Page action: ignore @@ -2469,22 +2177,19 @@ x-fmt/227: action: convert convert: tool: gis - outputs: - - gml3 + output: gml3 x-fmt/231: name: ESRI MapInfo Export File action: convert convert: tool: gis - outputs: - - gml3 + output: gml3 x-fmt/235: name: ESRI Arc/View ShapeFile action: convert convert: tool: gis - outputs: - - gml3 + output: gml3 x-fmt/240: name: Microsoft Acces Database File (2000) action: ignore @@ -2509,51 +2214,43 @@ x-fmt/252: action: convert convert: tool: document - outputs: - - pdf + output: pdf x-fmt/253: name: Microsoft Publisher 95 action: convert convert: tool: document - outputs: - - pdf + output: pdf x-fmt/254: name: Microsoft Publisher 97 action: convert convert: tool: document - outputs: - - pdf + output: pdf x-fmt/255: name: Microsoft Publisher 98 action: convert convert: tool: document - outputs: - - pdf + output: pdf x-fmt/256: name: Microsoft Publisher 2000 action: convert convert: tool: document - outputs: - - pdf + output: pdf x-fmt/257: name: Microsoft Publisher 2002 action: convert convert: tool: document - outputs: - - pdf + output: pdf x-fmt/258: name: Microsoft Visio Drawing 2000 action: convert convert: tool: document - outputs: - - svg - - pdf + output: svg x-fmt/263: name: ZIP Format alternatives: @@ -2620,16 +2317,13 @@ x-fmt/340: action: convert convert: tool: symphovert - outputs: - - odt + output: odt x-fmt/345: name: Microsoft Works Document action: convert convert: tool: document - outputs: - - odt - - pdf + output: odt x-fmt/346: name: Microstation CAD Drawing reidentify: @@ -2645,37 +2339,31 @@ x-fmt/375: action: convert convert: tool: document - outputs: - - svg - - pdf + output: svg x-fmt/382: name: Macromedia FLV 1 action: convert convert: tool: video - outputs: - - mp4 + output: mp4 x-fmt/384: name: Quicktime action: convert convert: tool: video - outputs: - - mp4 + output: mp4 x-fmt/385: name: MPEG-1 Program Stream action: convert convert: tool: video - outputs: - - mp4 + output: mp4 x-fmt/386: name: MPEG-2 Program Stream action: convert convert: tool: video - outputs: - - mp4 + output: mp4 x-fmt/387: name: Exchangeable Image File Format (Uncompressed) 2.2 ignore_if: @@ -2683,8 +2371,7 @@ x-fmt/387: action: convert convert: tool: image - outputs: - - png + output: png x-fmt/388: name: Exchangeable Image File Format (Uncompressed) 2.1 ignore_if: @@ -2692,8 +2379,7 @@ x-fmt/388: action: convert convert: tool: image - outputs: - - png + output: png x-fmt/390: name: Exchangeable Image File Format (Compressed) 2.1 ignore_if: @@ -2701,8 +2387,7 @@ x-fmt/390: action: convert convert: tool: image - outputs: - - png + output: png x-fmt/391: name: Exchangeable Image File Format (Compressed) 2.2 ignore_if: @@ -2710,8 +2395,7 @@ x-fmt/391: action: convert convert: tool: image - outputs: - - png + output: png x-fmt/392: name: JP2 (JPEG 2000 part 1) ignore_if: @@ -2719,24 +2403,19 @@ x-fmt/392: action: convert convert: tool: image - outputs: - - png + output: png x-fmt/393: name: WordPerfect for MS-DOS Document 5.0 action: convert convert: tool: document - outputs: - - odt - - pdf + output: odt x-fmt/394: name: WordPerfect for MS-DOS/Windows Document 5.1 action: convert convert: tool: document - outputs: - - odt - - pdf + output: odt x-fmt/398: name: Exchangeable Image File Format (Compressed) 2.0 ignore_if: @@ -2744,8 +2423,7 @@ x-fmt/398: action: convert convert: tool: image - outputs: - - png + output: png x-fmt/411: name: Windows Portable Executable action: ignore @@ -2795,8 +2473,7 @@ x-fmt/418: action: ignore convert: tool: image - outputs: - - png + output: png ignore: template: not-preservable reason: An ICO file contains an icon, which is typically used to represent a Windows @@ -2835,8 +2512,7 @@ x-fmt/429: action: convert convert: tool: browser - outputs: - - pdf + output: pdf x-fmt/430: name: MSG file action: extract @@ -2846,8 +2522,7 @@ x-fmt/430: on_success: convert convert: tool: msg - outputs: - - eml + output: eml x-fmt/441: name: AutoCAD Database File Locking Information action: ignore @@ -2872,9 +2547,7 @@ x-fmt/455: action: convert convert: tool: cad2d - outputs: - - pdf - - svg + output: svg xfmt/157: name: Microsoft Windows Enhanced Metafile 1.0 ignore_if: @@ -2882,8 +2555,7 @@ xfmt/157: action: convert convert: tool: image - outputs: - - png + output: png fmt/1549: name: Bentley Microstation Hidden Line File reidentify: From be2a3f2fff5b8007fb4ac1cf608494a5ce181d2b Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 4 Oct 2024 13:13:43 +0200 Subject: [PATCH 07/10] custom_signatures - sort keys 1. puid 2. signature 3. description 4. bof 5. plain_bof 6. eof 7. plain_bof 8. operator 9. extension --- custom_signatures.yml | 253 ++++++++++++++++++++++-------------------- 1 file changed, 135 insertions(+), 118 deletions(-) diff --git a/custom_signatures.yml b/custom_signatures.yml index b929895..29092e0 100644 --- a/custom_signatures.yml +++ b/custom_signatures.yml @@ -1,170 +1,187 @@ -#file: noinspection SpellCheckingInspection -- bof: (?i)^576F726450726F0DFB000000000000000005985C8172030040CCC1BFFFBDF970 - extension: .lwp - puid: x-fmt/340 +- puid: x-fmt/340 signature: Lotus WordPro Document -- bof: (?i)^00001A000(3|4|5)10040000000000 - extension: '.123' - puid: aca-fmt/1 + bof: (?i)^576F726450726F0DFB000000000000000005985C8172030040CCC1BFFFBDF970 + extension: .lwp +- puid: aca-fmt/1 signature: Lotus 1-2-3 Spreadsheet -- bof: (?i)(50|70)726F67(49|69)64[0-9A-F]{2,20}576f72642e446f63756d656e74 - extension: .doc - puid: aca-fmt/2 + bof: (?i)^00001A000(3|4|5)10040000000000 + extension: '.123' +- puid: aca-fmt/2 signature: Microsoft Word Markup -- bof: (?i)(50|70)726F67(49|69)64[0-9A-F]{2,18}457863656C2E5368656574 - extension: .xls - puid: aca-fmt/3 + bof: (?i)(50|70)726F67(49|69)64[0-9A-F]{2,20}576f72642e446f63756d656e74 + extension: .doc +- puid: aca-fmt/3 signature: Microsoft Excel Markup -- bof: (?i)75726e3a736368656d61732d6d6963726f736f66742d636f6d3a6f66666963653a657863656c + bof: (?i)(50|70)726F67(49|69)64[0-9A-F]{2,18}457863656C2E5368656574 extension: .xls - puid: aca-fmt/3 +- puid: aca-fmt/3 signature: Microsoft Excel Markup -- bof: (?i)^1a000003000014000000 - extension: .ntf - puid: aca-fmt/5 + bof: (?i)75726e3a736368656d61732d6d6963726f736f66742d636f6d3a6f66666963653a657863656c + extension: .xls +- puid: aca-fmt/5 signature: Lotus Notes Template -- bof: (?i)^0300000041505052 - extension: .adx - puid: aca-fmt/6 + bof: (?i)^1a000003000014000000 + extension: .ntf +- puid: aca-fmt/6 signature: Lotus Approach Index File -- bof: (?i)^1a000004000029000000 + bof: (?i)^0300000041505052 + extension: .adx +- puid: aca-fmt/8 + signature: Lotus Notes Database + bof: (?i)^1a000004000029000000 eof: (?i)bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb - extension: .nsf operator: AND - puid: aca-fmt/8 - signature: Lotus Notes Database -- bof: (?i)4D696E644d616E61676572 - extension: .mmap - puid: aca-fmt/4 + extension: .nsf +- puid: aca-fmt/4 signature: MindManager Mind Map -- bof: (?i)010000002E010000(43|03)000000 - extension: .id - puid: aca-fmt/7 + bof: (?i)4D696E644d616E61676572 + extension: .mmap +- puid: aca-fmt/7 signature: ID File -- bof: (?i)474946383961 - extension: .gif - puid: fmt/4 + bof: (?i)010000002E010000(43|03)000000 + extension: .id +- puid: fmt/4 signature: GIF 1989a -- bof: (?i)010690080004000000000001000100010790060008000000E404000000000000E80001088007001800000049504D2E4D6963726F736F6674204D61696C2E4E6F746500310801 - extension: .dat - puid: aca-fmt/9 + bof: (?i)474946383961 + extension: .gif +- puid: aca-fmt/9 signature: Microsoft email attachments archive (winmail) -- bof: (?i)010690080004000000000001000100010790060008000000E404000000000000E800010 + bof: (?i)010690080004000000000001000100010790060008000000E404000000000000E80001088007001800000049504D2E4D6963726F736F6674204D61696C2E4E6F746500310801 extension: .dat - puid: aca-fmt/9 +- puid: aca-fmt/9 signature: Microsoft email attachments archive (winmail) -- bof: (?i)^789F3E22 + bof: (?i)010690080004000000000001000100010790060008000000E404000000000000E800010 extension: .dat - puid: aca-fmt/9 +- puid: aca-fmt/9 signature: Microsoft email attachments archive (winmail) -- bof: (?i)41636365737356657273696F6E.{0,1024}30362E - extension: .mdb - puid: aca-fmt/10 + bof: (?i)^789F3E22 + extension: .dat +- puid: aca-fmt/10 signature: MS Access 95 -- bof: (?i)41636365737356657273696F6E.{0,1024}30372E + bof: (?i)41636365737356657273696F6E.{0,1024}30362E extension: .mdb - puid: aca-fmt/11 +- puid: aca-fmt/11 signature: MS Access 97 -- bof: (?i)410063006300650073007300560065007200730069006F006E.{0,2048}300038002E00 + bof: (?i)41636365737356657273696F6E.{0,1024}30372E extension: .mdb - puid: aca-fmt/12 +- puid: aca-fmt/12 signature: MS Access 2000 -- bof: (?i)410063006300650073007300560065007200730069006F006E.{0,2048}300039002E00 + bof: (?i)410063006300650073007300560065007200730069006F006E.{0,2048}300038002E00 extension: .mdb - puid: aca-fmt/13 +- puid: aca-fmt/13 signature: MS Access 2002/3 -- bof: (?i)^000100005374616E64617264204A65742044420000000000 + bof: (?i)410063006300650073007300560065007200730069006F006E.{0,2048}300039002E00 extension: .mdb - puid: aca-fmt/14 +- puid: aca-fmt/14 signature: MS Access database unspecified version (Jet 3 DB) -- bof: (?i)^000100005374616E64617264204A65742044420001000000 + bof: (?i)^000100005374616E64617264204A65742044420000000000 extension: .mdb - puid: aca-fmt/15 +- puid: aca-fmt/15 signature: MS Access database unspecified version (Jet 4 DB) -- bof: (?i)000100005374616E646172642041434520444200 + bof: (?i)^000100005374616E64617264204A65742044420001000000 extension: .mdb - puid: aca-fmt/16 +- puid: aca-fmt/16 signature: MS Access database unspecified version (ACE DB) -- bof: (?i)00A0E150E161BA2A6AB1A2A2FA5A9B5A7B5D90F1723131F2B0F17234F57639CA4A9A0A8ADA4A8AD161A390D1137A5A9B5A4A8ADB2B6DCBDBDF42B2F55C8CCD8C7CBD0D3D7FDC2C2F90 - extension: .map - puid: aca-fmt/17 + bof: (?i)000100005374616E646172642041434520444200 + extension: .mdb +- puid: aca-fmt/17 signature: MapInfo MAP file -- bof: (?i)^504B0304{26}6D696D65747970656170706C69636174696F6E2F766E642E6F617369732E6F70656E646F63756D656E742E74657874 - extension: .odt - puid: aca-fmt/18 + bof: (?i)00A0E150E161BA2A6AB1A2A2FA5A9B5A7B5D90F1723131F2B0F17234F57639CA4A9A0A8ADA4A8AD161A390D1137A5A9B5A4A8ADB2B6DCBDBDF42B2F55C8CCD8C7CBD0D3D7FDC2C2F90 + extension: .map +- puid: aca-fmt/18 signature: OpenDocument Text (unspecified version) -- bof: (?i)41007000700072006F0061006300680044006F006300 - extension: .apr - puid: aca-fmt/19 + bof: (?i)^504B0304{26}6D696D65747970656170706C69636174696F6E2F766E642E6F617369732E6F70656E646F63756D656E742E74657874 + extension: .odt +- puid: aca-fmt/19 signature: Lotus Approach View File -- bof: (?i)^217461626C650D0A2176657273696F6E.{20,512}446566696E6974696F6E205461626C65 - extension: .tab - puid: aca-fmt/20 + bof: (?i)41007000700072006F0061006300680044006F006300 + extension: .apr +- puid: aca-fmt/20 signature: MapInfo TAB file description: https://www.loc.gov/preservation/digital/formats/fdd/fdd000300.shtml -- bof: (?i)^6d696d65747970656170706c69636174696f6e2f766e642e6f617369732e6f70656e646f63756d656e742e7370726561647368656574 - extension: .ods - plain_bof: mimetypeapplication/vnd.oasis.opendocument.spreadsheet - puid: aca-fmt/21 + bof: (?i)^217461626C650D0A2176657273696F6E.{20,512}446566696E6974696F6E205461626C65 + extension: .tab +- puid: aca-fmt/21 signature: OpenDocument Spreadsheet (unspecified version) -- bof: (?i)^1F8B08 - extension: .emz - plain_bof: "" - puid: aca-fmt/22 + bof: (?i)^6d696d65747970656170706c69636174696f6e2f766e642e6f617369732e6f70656e646f63756d656e742e7370726561647368656574 + plain_bof: mimetypeapplication/vnd.oasis.opendocument.spreadsheet + extension: .ods +- puid: aca-fmt/22 signature: Windows Compressed Enhanced Metafile, usually image file - description: .emz files are actually .gz files, which are identified with a 10-byte header, containing a magic number (1f 8b), a compression ID (08 for DEFLATE which is normal), and a variety of timestamps and flags. .emz files are compressed image files that we can convert directly with libreoffice. -- bof: (?i)^3c3f786d6c(3[^e]|[^3].)*3e(0a|20)*3c3f6d736f2d6170706c69636174696f6e2070726f6769643d22576f72642e446f63756d656e74223f3e - plain_bof: | - - - puid: aca-fmt/23 + description: .emz files are actually .gz files, which are identified with a 10-byte + header, containing a magic number (1f 8b), a compression ID (08 for DEFLATE which + is normal), and a variety of timestamps and flags. .emz files are compressed image + files that we can convert directly with libreoffice. + bof: (?i)^1F8B08 + plain_bof: '' + extension: .emz +- puid: aca-fmt/23 signature: Microsoft Word XML Document - description: Microsoft Word allows exporting document as standalone XML files, which PRONOM incorrectly identifies as plain XML (fmt/101) -- bof: (?i)(42|62)(45|65)(47|67)(49|69)(4e|6e)3a(56|76)(43|63)(41|61)(52|72)(44|64) - operator: AND - eof: (?i)(45|65)(4e|6e)(44|64)3a(56|76)(43|63)(41|61)(52|72)(44|64)((0d)?0a)* - plain_bof: | - BEGIN:VCARD + description: Microsoft Word allows exporting document as standalone XML files, which + PRONOM incorrectly identifies as plain XML (fmt/101) + bof: (?i)^3c3f786d6c(3[^e]|[^3].)*3e(0a|20)*3c3f6d736f2d6170706c69636174696f6e2070726f6769643d22576f72642e446f63756d656e74223f3e + plain_bof: ' + + + + ' +- puid: aca-fmt/24 + signature: vCard (Unspecified version) + description: Only looks for BEGIN:VCARD and END:VCARD, as Pronom assumes VERSION + declaration must come directly after BEGIN:VCARD, which is not always the case + in the wild. + bof: (?i)(42|62)(45|65)(47|67)(49|69)(4e|6e)3a(56|76)(43|63)(41|61)(52|72)(44|64) + plain_bof: 'BEGIN:VCARD + * + END:VCARD + + ' + eof: (?i)(45|65)(4e|6e)(44|64)3a(56|76)(43|63)(41|61)(52|72)(44|64)((0d)?0a)* + operator: AND extension: .vcf - puid: aca-fmt/24 - signature: vCard (Unspecified version) - description: Only looks for BEGIN:VCARD and END:VCARD, as Pronom assumes VERSION declaration must come directly after BEGIN:VCARD, which is not always the case in the wild. -- bof: (?i)5B496E7465726E657453686F72746375745D0D0A55524C3D - plain_bof: | - [InternetShortcut] - URL= - extension: .url - puid: aca-fmt/25 +- puid: aca-fmt/25 signature: URL file description: Internet shortcut file. Looks for the tag "[InternetShortcut]\nURL=" -- bof: (?i)3c4e444c3e.*3c2f4e444c3e - plain_bof: | - + bof: (?i)5B496E7465726E657453686F72746375745D0D0A55524C3D + plain_bof: '[InternetShortcut] + + URL= + + ' + extension: .url +- puid: aca-fmt/26 + signature: Lotus Notes Doclink File + description: Link file used by Lotus Notes, a business productivity and collaboration + application suite; saved in an XML format and contains a reference to a Lotus + Notes document; used for sharing links to documents over email and in Web pages. + bof: (?i)3c4e444c3e.*3c2f4e444c3e + plain_bof: ' + * + + + ' extension: .ndl - puid: aca-fmt/26 - signature: Lotus Notes Doclink File - description: Link file used by Lotus Notes, a business productivity and collaboration application suite; saved in an XML format and contains a reference to a Lotus Notes document; used for sharing links to documents over email and in Web pages. -- bof: (?i)000000000000000000000000000000000000000000008040000000000000000000000000000000000000000000000000000000000000 - extension: .dgn - puid: aca-fmt/28 +- puid: aca-fmt/28 signature: Bentley Microstation V7 File description: CAD file. -- bof: (?i)^(..){0,250}616172687573737461647361726B69763D22687474703A2F2F7777772E616172687573737461647361726B69762E646B2F676D6C2F616172687573737461647361726B697622 - extension: .xsd - plain_bof: aarhusstadsarkiv="http://www.aarhusstadsarkiv.dk/gml/aarhusstadsarkiv" - puid: aca-fmt/29 + bof: (?i)000000000000000000000000000000000000000000008040000000000000000000000000000000000000000000000000000000000000 + extension: .dgn +- puid: aca-fmt/29 signature: XML Schema Definition (Custom XSD) -- bof: (?i)^50545653595354454D202020564953554D202020202020204772617068696B706172616D65746572 - extension: .gpa - plain_bof: PTVSYSTEM VISUM Graphikparameter - puid: aca-fmt/30 + bof: (?i)^(..){0,250}616172687573737461647361726B69763D22687474703A2F2F7777772E616172687573737461647361726B69762E646B2F676D6C2F616172687573737461647361726B697622 + plain_bof: aarhusstadsarkiv="http://www.aarhusstadsarkiv.dk/gml/aarhusstadsarkiv" + extension: .xsd +- puid: aca-fmt/30 signature: PTV Visum Graphics Parameters file -- bof: (?i)^3B3B204853462056 - extension: .easm - plain_bof: ;; HSF V - puid: aca-fmt/31 + bof: (?i)^50545653595354454D202020564953554D202020202020204772617068696B706172616D65746572 + plain_bof: PTVSYSTEM VISUM Graphikparameter + extension: .gpa +- puid: aca-fmt/31 signature: eDrawings Assembly File + bof: (?i)^3B3B204853462056 + plain_bof: ;; HSF V + extension: .easm From 6072401be0c67fac58e51b5c77c0f4d4dc4fe48c Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 4 Oct 2024 13:39:56 +0200 Subject: [PATCH 08/10] fileformats:aca-fmt/3 - set action to manual --- fileformats.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fileformats.yml b/fileformats.yml index 199e8a2..8ebe9de 100644 --- a/fileformats.yml +++ b/fileformats.yml @@ -40,10 +40,12 @@ aca-fmt/2: process: Open in MS Word and save as pdf. aca-fmt/3: name: Microsoft Excel Markup - action: convert - convert: - tool: document - output: odt + action: manual + manual: + reason: Cannot be recognized by LibreOffice. + process: | + 1. Open with Microsoft Excel + 2. Save as ODS aca-fmt/4: name: MindManager Mind Map action: ignore From 598c5656211a2e4b65062fcef12f35ac5cb0e063 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 4 Oct 2024 13:51:27 +0200 Subject: [PATCH 09/10] Revert "custom_signatures - sort keys" This reverts commit be2a3f2fff5b8007fb4ac1cf608494a5ce181d2b. --- custom_signatures.yml | 253 ++++++++++++++++++++---------------------- 1 file changed, 118 insertions(+), 135 deletions(-) diff --git a/custom_signatures.yml b/custom_signatures.yml index 29092e0..b929895 100644 --- a/custom_signatures.yml +++ b/custom_signatures.yml @@ -1,187 +1,170 @@ -- puid: x-fmt/340 - signature: Lotus WordPro Document - bof: (?i)^576F726450726F0DFB000000000000000005985C8172030040CCC1BFFFBDF970 +#file: noinspection SpellCheckingInspection +- bof: (?i)^576F726450726F0DFB000000000000000005985C8172030040CCC1BFFFBDF970 extension: .lwp -- puid: aca-fmt/1 - signature: Lotus 1-2-3 Spreadsheet - bof: (?i)^00001A000(3|4|5)10040000000000 + puid: x-fmt/340 + signature: Lotus WordPro Document +- bof: (?i)^00001A000(3|4|5)10040000000000 extension: '.123' -- puid: aca-fmt/2 - signature: Microsoft Word Markup - bof: (?i)(50|70)726F67(49|69)64[0-9A-F]{2,20}576f72642e446f63756d656e74 + puid: aca-fmt/1 + signature: Lotus 1-2-3 Spreadsheet +- bof: (?i)(50|70)726F67(49|69)64[0-9A-F]{2,20}576f72642e446f63756d656e74 extension: .doc -- puid: aca-fmt/3 - signature: Microsoft Excel Markup - bof: (?i)(50|70)726F67(49|69)64[0-9A-F]{2,18}457863656C2E5368656574 + puid: aca-fmt/2 + signature: Microsoft Word Markup +- bof: (?i)(50|70)726F67(49|69)64[0-9A-F]{2,18}457863656C2E5368656574 extension: .xls -- puid: aca-fmt/3 + puid: aca-fmt/3 signature: Microsoft Excel Markup - bof: (?i)75726e3a736368656d61732d6d6963726f736f66742d636f6d3a6f66666963653a657863656c +- bof: (?i)75726e3a736368656d61732d6d6963726f736f66742d636f6d3a6f66666963653a657863656c extension: .xls -- puid: aca-fmt/5 - signature: Lotus Notes Template - bof: (?i)^1a000003000014000000 + puid: aca-fmt/3 + signature: Microsoft Excel Markup +- bof: (?i)^1a000003000014000000 extension: .ntf -- puid: aca-fmt/6 - signature: Lotus Approach Index File - bof: (?i)^0300000041505052 + puid: aca-fmt/5 + signature: Lotus Notes Template +- bof: (?i)^0300000041505052 extension: .adx -- puid: aca-fmt/8 - signature: Lotus Notes Database - bof: (?i)^1a000004000029000000 + puid: aca-fmt/6 + signature: Lotus Approach Index File +- bof: (?i)^1a000004000029000000 eof: (?i)bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb - operator: AND extension: .nsf -- puid: aca-fmt/4 - signature: MindManager Mind Map - bof: (?i)4D696E644d616E61676572 + operator: AND + puid: aca-fmt/8 + signature: Lotus Notes Database +- bof: (?i)4D696E644d616E61676572 extension: .mmap -- puid: aca-fmt/7 - signature: ID File - bof: (?i)010000002E010000(43|03)000000 + puid: aca-fmt/4 + signature: MindManager Mind Map +- bof: (?i)010000002E010000(43|03)000000 extension: .id -- puid: fmt/4 - signature: GIF 1989a - bof: (?i)474946383961 + puid: aca-fmt/7 + signature: ID File +- bof: (?i)474946383961 extension: .gif -- puid: aca-fmt/9 - signature: Microsoft email attachments archive (winmail) - bof: (?i)010690080004000000000001000100010790060008000000E404000000000000E80001088007001800000049504D2E4D6963726F736F6674204D61696C2E4E6F746500310801 + puid: fmt/4 + signature: GIF 1989a +- bof: (?i)010690080004000000000001000100010790060008000000E404000000000000E80001088007001800000049504D2E4D6963726F736F6674204D61696C2E4E6F746500310801 extension: .dat -- puid: aca-fmt/9 + puid: aca-fmt/9 signature: Microsoft email attachments archive (winmail) - bof: (?i)010690080004000000000001000100010790060008000000E404000000000000E800010 +- bof: (?i)010690080004000000000001000100010790060008000000E404000000000000E800010 extension: .dat -- puid: aca-fmt/9 + puid: aca-fmt/9 signature: Microsoft email attachments archive (winmail) - bof: (?i)^789F3E22 +- bof: (?i)^789F3E22 extension: .dat -- puid: aca-fmt/10 + puid: aca-fmt/9 + signature: Microsoft email attachments archive (winmail) +- bof: (?i)41636365737356657273696F6E.{0,1024}30362E + extension: .mdb + puid: aca-fmt/10 signature: MS Access 95 - bof: (?i)41636365737356657273696F6E.{0,1024}30362E +- bof: (?i)41636365737356657273696F6E.{0,1024}30372E extension: .mdb -- puid: aca-fmt/11 + puid: aca-fmt/11 signature: MS Access 97 - bof: (?i)41636365737356657273696F6E.{0,1024}30372E +- bof: (?i)410063006300650073007300560065007200730069006F006E.{0,2048}300038002E00 extension: .mdb -- puid: aca-fmt/12 + puid: aca-fmt/12 signature: MS Access 2000 - bof: (?i)410063006300650073007300560065007200730069006F006E.{0,2048}300038002E00 +- bof: (?i)410063006300650073007300560065007200730069006F006E.{0,2048}300039002E00 extension: .mdb -- puid: aca-fmt/13 + puid: aca-fmt/13 signature: MS Access 2002/3 - bof: (?i)410063006300650073007300560065007200730069006F006E.{0,2048}300039002E00 +- bof: (?i)^000100005374616E64617264204A65742044420000000000 extension: .mdb -- puid: aca-fmt/14 + puid: aca-fmt/14 signature: MS Access database unspecified version (Jet 3 DB) - bof: (?i)^000100005374616E64617264204A65742044420000000000 +- bof: (?i)^000100005374616E64617264204A65742044420001000000 extension: .mdb -- puid: aca-fmt/15 + puid: aca-fmt/15 signature: MS Access database unspecified version (Jet 4 DB) - bof: (?i)^000100005374616E64617264204A65742044420001000000 +- bof: (?i)000100005374616E646172642041434520444200 extension: .mdb -- puid: aca-fmt/16 + puid: aca-fmt/16 signature: MS Access database unspecified version (ACE DB) - bof: (?i)000100005374616E646172642041434520444200 - extension: .mdb -- puid: aca-fmt/17 - signature: MapInfo MAP file - bof: (?i)00A0E150E161BA2A6AB1A2A2FA5A9B5A7B5D90F1723131F2B0F17234F57639CA4A9A0A8ADA4A8AD161A390D1137A5A9B5A4A8ADB2B6DCBDBDF42B2F55C8CCD8C7CBD0D3D7FDC2C2F90 +- bof: (?i)00A0E150E161BA2A6AB1A2A2FA5A9B5A7B5D90F1723131F2B0F17234F57639CA4A9A0A8ADA4A8AD161A390D1137A5A9B5A4A8ADB2B6DCBDBDF42B2F55C8CCD8C7CBD0D3D7FDC2C2F90 extension: .map -- puid: aca-fmt/18 - signature: OpenDocument Text (unspecified version) - bof: (?i)^504B0304{26}6D696D65747970656170706C69636174696F6E2F766E642E6F617369732E6F70656E646F63756D656E742E74657874 + puid: aca-fmt/17 + signature: MapInfo MAP file +- bof: (?i)^504B0304{26}6D696D65747970656170706C69636174696F6E2F766E642E6F617369732E6F70656E646F63756D656E742E74657874 extension: .odt -- puid: aca-fmt/19 - signature: Lotus Approach View File - bof: (?i)41007000700072006F0061006300680044006F006300 + puid: aca-fmt/18 + signature: OpenDocument Text (unspecified version) +- bof: (?i)41007000700072006F0061006300680044006F006300 extension: .apr -- puid: aca-fmt/20 + puid: aca-fmt/19 + signature: Lotus Approach View File +- bof: (?i)^217461626C650D0A2176657273696F6E.{20,512}446566696E6974696F6E205461626C65 + extension: .tab + puid: aca-fmt/20 signature: MapInfo TAB file description: https://www.loc.gov/preservation/digital/formats/fdd/fdd000300.shtml - bof: (?i)^217461626C650D0A2176657273696F6E.{20,512}446566696E6974696F6E205461626C65 - extension: .tab -- puid: aca-fmt/21 - signature: OpenDocument Spreadsheet (unspecified version) - bof: (?i)^6d696d65747970656170706c69636174696f6e2f766e642e6f617369732e6f70656e646f63756d656e742e7370726561647368656574 - plain_bof: mimetypeapplication/vnd.oasis.opendocument.spreadsheet +- bof: (?i)^6d696d65747970656170706c69636174696f6e2f766e642e6f617369732e6f70656e646f63756d656e742e7370726561647368656574 extension: .ods -- puid: aca-fmt/22 - signature: Windows Compressed Enhanced Metafile, usually image file - description: .emz files are actually .gz files, which are identified with a 10-byte - header, containing a magic number (1f 8b), a compression ID (08 for DEFLATE which - is normal), and a variety of timestamps and flags. .emz files are compressed image - files that we can convert directly with libreoffice. - bof: (?i)^1F8B08 - plain_bof: '' + plain_bof: mimetypeapplication/vnd.oasis.opendocument.spreadsheet + puid: aca-fmt/21 + signature: OpenDocument Spreadsheet (unspecified version) +- bof: (?i)^1F8B08 extension: .emz -- puid: aca-fmt/23 - signature: Microsoft Word XML Document - description: Microsoft Word allows exporting document as standalone XML files, which - PRONOM incorrectly identifies as plain XML (fmt/101) - bof: (?i)^3c3f786d6c(3[^e]|[^3].)*3e(0a|20)*3c3f6d736f2d6170706c69636174696f6e2070726f6769643d22576f72642e446f63756d656e74223f3e - plain_bof: ' - + plain_bof: "" + puid: aca-fmt/22 + signature: Windows Compressed Enhanced Metafile, usually image file + description: .emz files are actually .gz files, which are identified with a 10-byte header, containing a magic number (1f 8b), a compression ID (08 for DEFLATE which is normal), and a variety of timestamps and flags. .emz files are compressed image files that we can convert directly with libreoffice. +- bof: (?i)^3c3f786d6c(3[^e]|[^3].)*3e(0a|20)*3c3f6d736f2d6170706c69636174696f6e2070726f6769643d22576f72642e446f63756d656e74223f3e + plain_bof: | + - - ' -- puid: aca-fmt/24 - signature: vCard (Unspecified version) - description: Only looks for BEGIN:VCARD and END:VCARD, as Pronom assumes VERSION - declaration must come directly after BEGIN:VCARD, which is not always the case - in the wild. - bof: (?i)(42|62)(45|65)(47|67)(49|69)(4e|6e)3a(56|76)(43|63)(41|61)(52|72)(44|64) - plain_bof: 'BEGIN:VCARD - + puid: aca-fmt/23 + signature: Microsoft Word XML Document + description: Microsoft Word allows exporting document as standalone XML files, which PRONOM incorrectly identifies as plain XML (fmt/101) +- bof: (?i)(42|62)(45|65)(47|67)(49|69)(4e|6e)3a(56|76)(43|63)(41|61)(52|72)(44|64) + operator: AND + eof: (?i)(45|65)(4e|6e)(44|64)3a(56|76)(43|63)(41|61)(52|72)(44|64)((0d)?0a)* + plain_bof: | + BEGIN:VCARD * - END:VCARD - - ' - eof: (?i)(45|65)(4e|6e)(44|64)3a(56|76)(43|63)(41|61)(52|72)(44|64)((0d)?0a)* - operator: AND extension: .vcf -- puid: aca-fmt/25 - signature: URL file - description: Internet shortcut file. Looks for the tag "[InternetShortcut]\nURL=" - bof: (?i)5B496E7465726E657453686F72746375745D0D0A55524C3D - plain_bof: '[InternetShortcut] - + puid: aca-fmt/24 + signature: vCard (Unspecified version) + description: Only looks for BEGIN:VCARD and END:VCARD, as Pronom assumes VERSION declaration must come directly after BEGIN:VCARD, which is not always the case in the wild. +- bof: (?i)5B496E7465726E657453686F72746375745D0D0A55524C3D + plain_bof: | + [InternetShortcut] URL= - - ' extension: .url -- puid: aca-fmt/26 - signature: Lotus Notes Doclink File - description: Link file used by Lotus Notes, a business productivity and collaboration - application suite; saved in an XML format and contains a reference to a Lotus - Notes document; used for sharing links to documents over email and in Web pages. - bof: (?i)3c4e444c3e.*3c2f4e444c3e - plain_bof: ' - + puid: aca-fmt/25 + signature: URL file + description: Internet shortcut file. Looks for the tag "[InternetShortcut]\nURL=" +- bof: (?i)3c4e444c3e.*3c2f4e444c3e + plain_bof: | + * - - - ' extension: .ndl -- puid: aca-fmt/28 + puid: aca-fmt/26 + signature: Lotus Notes Doclink File + description: Link file used by Lotus Notes, a business productivity and collaboration application suite; saved in an XML format and contains a reference to a Lotus Notes document; used for sharing links to documents over email and in Web pages. +- bof: (?i)000000000000000000000000000000000000000000008040000000000000000000000000000000000000000000000000000000000000 + extension: .dgn + puid: aca-fmt/28 signature: Bentley Microstation V7 File description: CAD file. - bof: (?i)000000000000000000000000000000000000000000008040000000000000000000000000000000000000000000000000000000000000 - extension: .dgn -- puid: aca-fmt/29 - signature: XML Schema Definition (Custom XSD) - bof: (?i)^(..){0,250}616172687573737461647361726B69763D22687474703A2F2F7777772E616172687573737461647361726B69762E646B2F676D6C2F616172687573737461647361726B697622 - plain_bof: aarhusstadsarkiv="http://www.aarhusstadsarkiv.dk/gml/aarhusstadsarkiv" +- bof: (?i)^(..){0,250}616172687573737461647361726B69763D22687474703A2F2F7777772E616172687573737461647361726B69762E646B2F676D6C2F616172687573737461647361726B697622 extension: .xsd -- puid: aca-fmt/30 - signature: PTV Visum Graphics Parameters file - bof: (?i)^50545653595354454D202020564953554D202020202020204772617068696B706172616D65746572 - plain_bof: PTVSYSTEM VISUM Graphikparameter + plain_bof: aarhusstadsarkiv="http://www.aarhusstadsarkiv.dk/gml/aarhusstadsarkiv" + puid: aca-fmt/29 + signature: XML Schema Definition (Custom XSD) +- bof: (?i)^50545653595354454D202020564953554D202020202020204772617068696B706172616D65746572 extension: .gpa -- puid: aca-fmt/31 - signature: eDrawings Assembly File - bof: (?i)^3B3B204853462056 - plain_bof: ;; HSF V + plain_bof: PTVSYSTEM VISUM Graphikparameter + puid: aca-fmt/30 + signature: PTV Visum Graphics Parameters file +- bof: (?i)^3B3B204853462056 extension: .easm + plain_bof: ;; HSF V + puid: aca-fmt/31 + signature: eDrawings Assembly File From cb6e57bc860b5f2e34d133e290cd5afc4a9bed10 Mon Sep 17 00:00:00 2001 From: Matteo Campinoti Date: Fri, 4 Oct 2024 13:53:00 +0200 Subject: [PATCH 10/10] custom_signatures - sort keys 1. puid 2. signature 3. description 4. bof 5. plain_bof 6. eof 7. plain_bof 8. operator 9. extension --- custom_signatures.yml | 269 ++++++++++++++++++++++-------------------- 1 file changed, 143 insertions(+), 126 deletions(-) diff --git a/custom_signatures.yml b/custom_signatures.yml index 7b5aa08..c17b6ec 100644 --- a/custom_signatures.yml +++ b/custom_signatures.yml @@ -1,180 +1,197 @@ -#file: noinspection SpellCheckingInspection -- bof: (?i)^576F726450726F0DFB000000000000000005985C8172030040CCC1BFFFBDF970 - extension: .lwp - puid: x-fmt/340 +- puid: x-fmt/340 signature: Lotus WordPro Document -- bof: (?i)^00001A000(3|4|5)10040000000000 - extension: '.123' - puid: aca-fmt/1 + bof: (?i)^576F726450726F0DFB000000000000000005985C8172030040CCC1BFFFBDF970 + extension: .lwp +- puid: aca-fmt/1 signature: Lotus 1-2-3 Spreadsheet -- bof: (?i)(50|70)726F67(49|69)64[0-9A-F]{2,20}576f72642e446f63756d656e74 - extension: .doc - puid: aca-fmt/2 + bof: (?i)^00001A000(3|4|5)10040000000000 + extension: '.123' +- puid: aca-fmt/2 signature: Microsoft Word Markup -- bof: (?i)(50|70)726F67(49|69)64[0-9A-F]{2,18}457863656C2E5368656574 - extension: .xls - puid: aca-fmt/3 + bof: (?i)(50|70)726F67(49|69)64[0-9A-F]{2,20}576f72642e446f63756d656e74 + extension: .doc +- puid: aca-fmt/3 signature: Microsoft Excel Markup -- bof: (?i)75726e3a736368656d61732d6d6963726f736f66742d636f6d3a6f66666963653a657863656c + bof: (?i)(50|70)726F67(49|69)64[0-9A-F]{2,18}457863656C2E5368656574 extension: .xls - puid: aca-fmt/3 +- puid: aca-fmt/3 signature: Microsoft Excel Markup -- bof: (?i)^1a000003000014000000 - extension: .ntf - puid: aca-fmt/5 + bof: (?i)75726e3a736368656d61732d6d6963726f736f66742d636f6d3a6f66666963653a657863656c + extension: .xls +- puid: aca-fmt/5 signature: Lotus Notes Template -- bof: (?i)^0300000041505052 - extension: .adx - puid: aca-fmt/6 + bof: (?i)^1a000003000014000000 + extension: .ntf +- puid: aca-fmt/6 signature: Lotus Approach Index File -- bof: (?i)^1a000004000029000000 + bof: (?i)^0300000041505052 + extension: .adx +- puid: aca-fmt/8 + signature: Lotus Notes Database + bof: (?i)^1a000004000029000000 eof: (?i)bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb - extension: .nsf operator: AND - puid: aca-fmt/8 - signature: Lotus Notes Database -- bof: (?i)4D696E644d616E61676572 - extension: .mmap - puid: aca-fmt/4 + extension: .nsf +- puid: aca-fmt/4 signature: MindManager Mind Map -- bof: (?i)010000002E010000(43|03)000000 - extension: .id - puid: aca-fmt/7 + bof: (?i)4D696E644d616E61676572 + extension: .mmap +- puid: aca-fmt/7 signature: ID File -- bof: (?i)474946383961 - extension: .gif - puid: fmt/4 + bof: (?i)010000002E010000(43|03)000000 + extension: .id +- puid: fmt/4 signature: GIF 1989a -- bof: (?i)010690080004000000000001000100010790060008000000E404000000000000E80001088007001800000049504D2E4D6963726F736F6674204D61696C2E4E6F746500310801 - extension: .dat - puid: aca-fmt/9 + bof: (?i)474946383961 + extension: .gif +- puid: aca-fmt/9 signature: Microsoft email attachments archive (winmail) -- bof: (?i)010690080004000000000001000100010790060008000000E404000000000000E800010 + bof: (?i)010690080004000000000001000100010790060008000000E404000000000000E80001088007001800000049504D2E4D6963726F736F6674204D61696C2E4E6F746500310801 extension: .dat - puid: aca-fmt/9 +- puid: aca-fmt/9 signature: Microsoft email attachments archive (winmail) -- bof: (?i)^789F3E22 + bof: (?i)010690080004000000000001000100010790060008000000E404000000000000E800010 extension: .dat - puid: aca-fmt/9 +- puid: aca-fmt/9 signature: Microsoft email attachments archive (winmail) -- bof: (?i)41636365737356657273696F6E.{0,1024}30362E - extension: .mdb - puid: aca-fmt/10 + bof: (?i)^789F3E22 + extension: .dat +- puid: aca-fmt/10 signature: MS Access 95 -- bof: (?i)41636365737356657273696F6E.{0,1024}30372E + bof: (?i)41636365737356657273696F6E.{0,1024}30362E extension: .mdb - puid: aca-fmt/11 +- puid: aca-fmt/11 signature: MS Access 97 -- bof: (?i)410063006300650073007300560065007200730069006F006E.{0,2048}300038002E00 + bof: (?i)41636365737356657273696F6E.{0,1024}30372E extension: .mdb - puid: aca-fmt/12 +- puid: aca-fmt/12 signature: MS Access 2000 -- bof: (?i)410063006300650073007300560065007200730069006F006E.{0,2048}300039002E00 + bof: (?i)410063006300650073007300560065007200730069006F006E.{0,2048}300038002E00 extension: .mdb - puid: aca-fmt/13 +- puid: aca-fmt/13 signature: MS Access 2002/3 -- bof: (?i)^000100005374616E64617264204A65742044420000000000 + bof: (?i)410063006300650073007300560065007200730069006F006E.{0,2048}300039002E00 extension: .mdb - puid: aca-fmt/14 +- puid: aca-fmt/14 signature: MS Access database unspecified version (Jet 3 DB) -- bof: (?i)^000100005374616E64617264204A65742044420001000000 + bof: (?i)^000100005374616E64617264204A65742044420000000000 extension: .mdb - puid: aca-fmt/15 +- puid: aca-fmt/15 signature: MS Access database unspecified version (Jet 4 DB) -- bof: (?i)000100005374616E646172642041434520444200 + bof: (?i)^000100005374616E64617264204A65742044420001000000 extension: .mdb - puid: aca-fmt/16 +- puid: aca-fmt/16 signature: MS Access database unspecified version (ACE DB) -- bof: (?i)00A0E150E161BA2A6AB1A2A2FA5A9B5A7B5D90F1723131F2B0F17234F57639CA4A9A0A8ADA4A8AD161A390D1137A5A9B5A4A8ADB2B6DCBDBDF42B2F55C8CCD8C7CBD0D3D7FDC2C2F90 - extension: .map - puid: aca-fmt/17 + bof: (?i)000100005374616E646172642041434520444200 + extension: .mdb +- puid: aca-fmt/17 signature: MapInfo MAP file -- bof: (?i)^504B0304{26}6D696D65747970656170706C69636174696F6E2F766E642E6F617369732E6F70656E646F63756D656E742E74657874 - extension: .odt - puid: aca-fmt/18 + bof: (?i)00A0E150E161BA2A6AB1A2A2FA5A9B5A7B5D90F1723131F2B0F17234F57639CA4A9A0A8ADA4A8AD161A390D1137A5A9B5A4A8ADB2B6DCBDBDF42B2F55C8CCD8C7CBD0D3D7FDC2C2F90 + extension: .map +- puid: aca-fmt/18 signature: OpenDocument Text (unspecified version) -- bof: (?i)41007000700072006F0061006300680044006F006300 - extension: .apr - puid: aca-fmt/19 + bof: (?i)^504B0304{26}6D696D65747970656170706C69636174696F6E2F766E642E6F617369732E6F70656E646F63756D656E742E74657874 + extension: .odt +- puid: aca-fmt/19 signature: Lotus Approach View File -- bof: (?i)^217461626C650D0A2176657273696F6E.{20,512}446566696E6974696F6E205461626C65 - extension: .tab - puid: aca-fmt/20 + bof: (?i)41007000700072006F0061006300680044006F006300 + extension: .apr +- puid: aca-fmt/20 signature: MapInfo TAB file description: https://www.loc.gov/preservation/digital/formats/fdd/fdd000300.shtml -- bof: (?i)^6d696d65747970656170706c69636174696f6e2f766e642e6f617369732e6f70656e646f63756d656e742e7370726561647368656574 - extension: .ods - plain_bof: mimetypeapplication/vnd.oasis.opendocument.spreadsheet - puid: aca-fmt/21 + bof: (?i)^217461626C650D0A2176657273696F6E.{20,512}446566696E6974696F6E205461626C65 + extension: .tab +- puid: aca-fmt/21 signature: OpenDocument Spreadsheet (unspecified version) -- bof: (?i)^1F8B08 - extension: .emz - plain_bof: "" - puid: aca-fmt/22 + bof: (?i)^6d696d65747970656170706c69636174696f6e2f766e642e6f617369732e6f70656e646f63756d656e742e7370726561647368656574 + plain_bof: mimetypeapplication/vnd.oasis.opendocument.spreadsheet + extension: .ods +- puid: aca-fmt/22 signature: Windows Compressed Enhanced Metafile, usually image file - description: .emz files are actually .gz files, which are identified with a 10-byte header, containing a magic number (1f 8b), a compression ID (08 for DEFLATE which is normal), and a variety of timestamps and flags. .emz files are compressed image files that we can convert directly with libreoffice. -- bof: (?i)^3c3f786d6c(3[^e]|[^3].)*3e(0a|20)*3c3f6d736f2d6170706c69636174696f6e2070726f6769643d22576f72642e446f63756d656e74223f3e - plain_bof: | - - - puid: aca-fmt/23 + description: .emz files are actually .gz files, which are identified with a 10-byte + header, containing a magic number (1f 8b), a compression ID (08 for DEFLATE which + is normal), and a variety of timestamps and flags. .emz files are compressed image + files that we can convert directly with libreoffice. + bof: (?i)^1F8B08 + plain_bof: '' + extension: .emz +- puid: aca-fmt/23 signature: Microsoft Word XML Document - description: Microsoft Word allows exporting document as standalone XML files, which PRONOM incorrectly identifies as plain XML (fmt/101) -- bof: (?i)(42|62)(45|65)(47|67)(49|69)(4e|6e)3a(56|76)(43|63)(41|61)(52|72)(44|64) - operator: AND - eof: (?i)(45|65)(4e|6e)(44|64)3a(56|76)(43|63)(41|61)(52|72)(44|64)((0d)?0a)* - plain_bof: | - BEGIN:VCARD + description: Microsoft Word allows exporting document as standalone XML files, which + PRONOM incorrectly identifies as plain XML (fmt/101) + bof: (?i)^3c3f786d6c(3[^e]|[^3].)*3e(0a|20)*3c3f6d736f2d6170706c69636174696f6e2070726f6769643d22576f72642e446f63756d656e74223f3e + plain_bof: ' + + + + ' +- puid: aca-fmt/24 + signature: vCard (Unspecified version) + description: Only looks for BEGIN:VCARD and END:VCARD, as Pronom assumes VERSION + declaration must come directly after BEGIN:VCARD, which is not always the case + in the wild. + bof: (?i)(42|62)(45|65)(47|67)(49|69)(4e|6e)3a(56|76)(43|63)(41|61)(52|72)(44|64) + plain_bof: 'BEGIN:VCARD + * + END:VCARD + + ' + eof: (?i)(45|65)(4e|6e)(44|64)3a(56|76)(43|63)(41|61)(52|72)(44|64)((0d)?0a)* + operator: AND extension: .vcf - puid: aca-fmt/24 - signature: vCard (Unspecified version) - description: Only looks for BEGIN:VCARD and END:VCARD, as Pronom assumes VERSION declaration must come directly after BEGIN:VCARD, which is not always the case in the wild. -- bof: (?i)5B496E7465726E657453686F72746375745D0D0A55524C3D - plain_bof: | - [InternetShortcut] - URL= - extension: .url - puid: aca-fmt/25 +- puid: aca-fmt/25 signature: URL file description: Internet shortcut file. Looks for the tag "[InternetShortcut]\nURL=" -- bof: (?i)3c4e444c3e.*3c2f4e444c3e - plain_bof: | - + bof: (?i)5B496E7465726E657453686F72746375745D0D0A55524C3D + plain_bof: '[InternetShortcut] + + URL= + + ' + extension: .url +- puid: aca-fmt/26 + signature: Lotus Notes Doclink File + description: Link file used by Lotus Notes, a business productivity and collaboration + application suite; saved in an XML format and contains a reference to a Lotus + Notes document; used for sharing links to documents over email and in Web pages. + bof: (?i)3c4e444c3e.*3c2f4e444c3e + plain_bof: ' + * + + + ' extension: .ndl - puid: aca-fmt/26 - signature: Lotus Notes Doclink File - description: Link file used by Lotus Notes, a business productivity and collaboration application suite; saved in an XML format and contains a reference to a Lotus Notes document; used for sharing links to documents over email and in Web pages. -- bof: (?i)000000000000000000000000000000000000000000008040000000000000000000000000000000000000000000000000000000000000 - extension: .dgn - puid: aca-fmt/28 +- puid: aca-fmt/28 signature: Bentley Microstation V7 File description: CAD file. -- bof: (?i)^(..){0,250}616172687573737461647361726B69763D22687474703A2F2F7777772E616172687573737461647361726B69762E646B2F676D6C2F616172687573737461647361726B697622 - extension: .xsd - plain_bof: aarhusstadsarkiv="http://www.aarhusstadsarkiv.dk/gml/aarhusstadsarkiv" - puid: aca-fmt/29 + bof: (?i)000000000000000000000000000000000000000000008040000000000000000000000000000000000000000000000000000000000000 + extension: .dgn +- puid: aca-fmt/29 signature: XML Schema Definition (Custom XSD) -- bof: (?i)^50545653595354454D202020564953554D202020202020204772617068696B706172616D65746572 - extension: .gpa - plain_bof: PTVSYSTEM VISUM Graphikparameter - puid: aca-fmt/30 + bof: (?i)^(..){0,250}616172687573737461647361726B69763D22687474703A2F2F7777772E616172687573737461647361726B69762E646B2F676D6C2F616172687573737461647361726B697622 + plain_bof: aarhusstadsarkiv="http://www.aarhusstadsarkiv.dk/gml/aarhusstadsarkiv" + extension: .xsd +- puid: aca-fmt/30 signature: PTV Visum Graphics Parameters file -- bof: (?i)^3B3B204853462056 - extension: .easm - plain_bof: ;; HSF V - puid: aca-fmt/31 + bof: (?i)^50545653595354454D202020564953554D202020202020204772617068696B706172616D65746572 + plain_bof: PTVSYSTEM VISUM Graphikparameter + extension: .gpa +- puid: aca-fmt/31 signature: eDrawings Assembly File -- bof: (?i)^43616C63756C757820526F61642050726F6A6563742066696C65 - extension: .CRO - plain_bof: Calculux Road Project file - puid: aca-fmt/32 + bof: (?i)^3B3B204853462056 + plain_bof: ;; HSF V + extension: .easm +- puid: aca-fmt/32 signature: Calculux Road Project file -- bof: (?i)^50545653595354454D202020564953554D2020202020202056657273696F6E - extension: .ver - plain_bof: PTVSYSTEM VISUM Version - puid: aca-fmt/33 + bof: (?i)^43616C63756C757820526F61642050726F6A6563742066696C65 + plain_bof: Calculux Road Project file + extension: .CRO +- puid: aca-fmt/33 signature: PTV Visum File + bof: (?i)^50545653595354454D202020564953554D2020202020202056657273696F6E + plain_bof: PTVSYSTEM VISUM Version + extension: .ver