From a938cadb1bf3c0afbbf1a68d358a6d216824010f Mon Sep 17 00:00:00 2001 From: Brandon Phillips Date: Thu, 11 Apr 2019 18:12:25 -0600 Subject: [PATCH] feat(core): support withTempFile to do local file manipulation --- README.md | 12 ++++++- package.json | 2 ++ src/drivers/memory/MemoryDisk.test.ts | 43 +++++++++++++++++++++++++ src/lib/Disk.ts | 45 +++++++++++++++++++++++++++ src/lib/utils.ts | 42 +++++++++++++++++++++++++ yarn.lock | 14 ++++++++- 6 files changed, 156 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 08ab80c..0efcf44 100644 --- a/README.md +++ b/README.md @@ -189,6 +189,17 @@ A disk that uses a remote AWS S3 bucket. - `isTemporaryUrlValid(temporaryUrl: string, against: number | Date = Date.now()): boolean | null` to determine if a temporary URL generated with `getTemporaryUrl` is valid (i.e. unexpired). Will return `null` if the URL can't be determined either way or if the disk does not support temporary URLs. +- `async withTempFile(path: string, execute: ((path: string) => Promise | void) | null, extraOptions?: import('tmp').FileOptions): Promise` + to stream the contents of a file from the disk to a temporary file on the local filesystem for performing operations + that are easier (or more performant, etc.) to do with local data on the disk as opposed to in memory + (e.g. `disk.read(path)`). + - The caller can pass an async `execute` callback which will get a string path to the temp + file that contains the disk file's contents. Once `execute` returns/resolves, the file will be automatically + deleted. If an `execute` callback is not provided, the function will resolve with the path to the temp file. + **IMPORTANT:** it's the caller's responsibility using this approach to `unlink` the file when they're done with + it. + - This functionality is achieved using `tmp`. You can pass any additional `FileOptions` through to `tmp` (i.e. + `prefix`) using the third parameter `extraOptions`. ### [`MemoryDisk`](./src/drivers/memory/MemoryDisk.ts) Class (extends [`Disk`](#disk-abstract-class)) @@ -254,7 +265,6 @@ This library also exports some helper methods: ## TODO -- [ ] Hoist down the `withTempFile` logic from `@carimus/node-uploads` to this package. - [ ] Make the `MemoryDisk` test generic to run on any `Disk` and figure out how to run it safely with `LocalDisk` and `S3Disk`: - `S3Disk`: credentials and bucket from environment with cleanup `afterEach` and don't fail if env variables diff --git a/package.json b/package.json index a15f29d..6c697d0 100644 --- a/package.json +++ b/package.json @@ -40,6 +40,7 @@ "fs-extra": "^7.0.1", "ramda": "0.25.0", "stream-to-array": "^2.3.0", + "tmp": "^0.1.0", "url-join": "^4.0.0", "verror": "^1.10.0" }, @@ -53,6 +54,7 @@ "@types/jest": "^24.0.11", "@types/ramda": "types/npm-ramda#dist", "@types/stream-to-array": "^2.3.0", + "@types/tmp": "^0.1.0", "@types/url-join": "^4.0.0", "@types/verror": "^1.10.3", "@typescript-eslint/eslint-plugin": "^1.5.0", diff --git a/src/drivers/memory/MemoryDisk.test.ts b/src/drivers/memory/MemoryDisk.test.ts index 0f53d98..2fa6602 100644 --- a/src/drivers/memory/MemoryDisk.test.ts +++ b/src/drivers/memory/MemoryDisk.test.ts @@ -1,6 +1,11 @@ +import * as fs from 'fs'; +import { promisify } from 'util'; import { MemoryDisk } from './MemoryDisk'; import { streamToBuffer } from '../..'; +const readFileFromLocalFilesystem = promisify(fs.readFile); +const deleteFromLocalFilesystem = promisify(fs.unlink); + test("memory disk's basic methods work", async () => { const disk = new MemoryDisk(); expect(await disk.list()).toHaveLength(0); @@ -86,3 +91,41 @@ test('memory disk can generate URLs if one is provided in config', async () => { diskWithUrlsAndTempFallback.getTemporaryUrl('test.txt', 1000, true), ).toBe('http://localhost:1234/test.txt'); }); + +test('memory disk can create temp files for local manipulation', async () => { + const disk = new MemoryDisk(); + + // Write a file to the disk + const path = 'foo.txt'; + const originalFileData = Buffer.from('this is a test', 'utf8'); + await disk.write(path, originalFileData); + + // Get the temp file for it and check to make sure their contents match + const tempPath = await disk.withTempFile(path, async (path: string) => { + const tempFileData = await readFileFromLocalFilesystem(path); + expect(tempFileData.toString('base64')).toBe( + originalFileData.toString('base64'), + ); + }); + + // Ensure that once the callback is completed, the file doesn't exist since we didn't tell it not to cleanup + expect(tempPath).toBeTruthy(); + await expect(readFileFromLocalFilesystem(tempPath)).rejects.toBeTruthy(); + + // Do the same stuff again but using the bypass cleanup approach to take cleanup into our own hands + const persistentTempPath = await disk.withTempFile(path); + expect(persistentTempPath).toBeTruthy(); + const persistentTempFileData = await readFileFromLocalFilesystem( + persistentTempPath, + ); + expect(persistentTempFileData.toString('base64')).toBe( + originalFileData.toString('base64'), + ); + // Note that we use `.resolves.toBeUndefined()` to verify the file is deleted (unlink resolves with void/undefined) + expect( + deleteFromLocalFilesystem(persistentTempPath), + ).resolves.toBeUndefined(); + expect( + readFileFromLocalFilesystem(persistentTempPath), + ).rejects.toBeTruthy(); +}); diff --git a/src/lib/Disk.ts b/src/lib/Disk.ts index 1b33b0f..0334821 100644 --- a/src/lib/Disk.ts +++ b/src/lib/Disk.ts @@ -1,5 +1,7 @@ import * as stream from 'stream'; +import * as fs from 'fs'; import { DiskConfig, DiskListingObject, DiskObjectType } from './types'; +import { pipeStreams, withTempFile } from './utils'; import joinUrl = require('url-join'); /** @@ -210,4 +212,47 @@ export abstract class Disk { ): boolean | null { return null; } + + /** + * Download the file to the local disk as a temporary file for operations that require local data manipulation + * and which can't handle Buffers, i.e. operations expected to be performed on large files where it's easier to + * deal with the data in chunks off of the disk or something instead of keeping them in a Buffer in memory in their + * entirety. + * + * This methods streams the data directly to the local filesystem so large files shouldn't cause any memory issues. + * + * If an `execute` callback is not provided, the cleanup step will be skipped and the path that this resolves to + * will exist and can be manipulated directly. IMPORTANT: in such a scenario, the caller is responsible for + * deleting the file when they're done with it. + * + * @param path + * @param execute + * @param extraOptions + */ + public async withTempFile( + path: string, + execute: ((path: string) => Promise | void) | null = null, + extraOptions?: import('tmp').FileOptions, + ): Promise { + // Create a temp file, write the disk file's data to it, and pass its path to + return withTempFile( + async (tmpFilePath: string) => { + // Create a write stream to the temp file that will auto close once the stream is fully piped. + const tempFileWriteStream = fs.createWriteStream(tmpFilePath, { + autoClose: true, + }); + // Create a read stream for the file on the disk. + const diskFileReadStream = await this.createReadStream(path); + // Pipe the disk read stream to the temp file write stream. + await pipeStreams(diskFileReadStream, tempFileWriteStream); + // Run the caller callback if it was provided. + if (execute) { + await execute(tmpFilePath); + } + }, + // Skip clean up if no execute callback is provided. + !execute, + extraOptions, + ); + } } diff --git a/src/lib/utils.ts b/src/lib/utils.ts index 07b5427..5afcb1f 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -1,5 +1,6 @@ import { Readable, Writable } from 'stream'; import toArray = require('stream-to-array'); +import tmp = require('tmp'); /** * Stream a readable stream into memory. @@ -52,3 +53,44 @@ export async function pipeStreams( }); }); } + +/** + * Create a temp file and do something with it. + * + * @param execute An optionally async function that will receive the temp file's name (path) + * @param skipCleanup If true, don't delete the file until process end. + * @param extraOptions Additional options to pass into `tmp.file` + * @return The temporary's file path which won't exist after this resolves unless `skipCleanup` was `true` + */ +export async function withTempFile( + execute: (name: string) => Promise | void, + skipCleanup: boolean = false, + extraOptions: import('tmp').FileOptions = {}, +): Promise { + // Receive the temp file's name (path) and cleanup function from `tmp`, throwing if it rejects. + const { + name, + cleanupCallback, + }: { name: string; cleanupCallback: () => void } = await new Promise( + (resolve, reject) => { + tmp.file( + { discardDescriptor: true, ...extraOptions }, + (err, name, fd, cleanupCallback) => { + if (err) { + reject(err); + } else { + resolve({ name, cleanupCallback }); + } + }, + ); + }, + ); + // Run the execute callback with the name (path) + await execute(name); + // Don't delete the file if requested. + if (!skipCleanup) { + await cleanupCallback(); + } + // Return the temporary file's name (path) + return name; +} diff --git a/yarn.lock b/yarn.lock index 456d3d1..53bef79 100644 --- a/yarn.lock +++ b/yarn.lock @@ -651,6 +651,11 @@ dependencies: "@types/node" "*" +"@types/tmp@^0.1.0": + version "0.1.0" + resolved "https://registry.yarnpkg.com/@types/tmp/-/tmp-0.1.0.tgz#19cf73a7bcf641965485119726397a096f0049bd" + integrity sha512-6IwZ9HzWbCq6XoQWhxLpDjuADodH/MKXRUIDFudvgjcVdjFknvmR+DNsoUeer4XPrEnrZs04Jj+kfV9pFsrhmA== + "@types/url-join@^4.0.0": version "4.0.0" resolved "https://registry.yarnpkg.com/@types/url-join/-/url-join-4.0.0.tgz#72eff71648a429c7d4acf94e03780e06671369bd" @@ -6618,7 +6623,7 @@ right-pad@^1.0.1: resolved "https://registry.yarnpkg.com/right-pad/-/right-pad-1.0.1.tgz#8ca08c2cbb5b55e74dafa96bf7fd1a27d568c8d0" integrity sha1-jKCMLLtbVedNr6lr9/0aJ9VoyNA= -rimraf@2, rimraf@2.6.3, rimraf@^2.2.8, rimraf@^2.5.2, rimraf@^2.5.4, rimraf@^2.6.1, rimraf@^2.6.2, rimraf@~2.6.2: +rimraf@2, rimraf@2.6.3, rimraf@^2.2.8, rimraf@^2.5.2, rimraf@^2.5.4, rimraf@^2.6.1, rimraf@^2.6.2, rimraf@^2.6.3, rimraf@~2.6.2: version "2.6.3" resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.6.3.tgz#b2d104fe0d8fb27cf9e0a1cda8262dd3833c6cab" integrity sha512-mwqeW5XsA2qAejG46gYdENaxXjx9onRNCfn7L0duuP4hCuTIi/QO7PDK07KJfp1d+izWPrzEJDcSqBa0OZQriA== @@ -7404,6 +7409,13 @@ tmp@^0.0.33: dependencies: os-tmpdir "~1.0.2" +tmp@^0.1.0: + version "0.1.0" + resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.1.0.tgz#ee434a4e22543082e294ba6201dcc6eafefa2877" + integrity sha512-J7Z2K08jbGcdA1kkQpJSqLF6T0tdQqpR2pnSUXsIchbPdTI9v3e85cLW0d6WDhwuAleOV71j2xWs8qMPfK7nKw== + dependencies: + rimraf "^2.6.3" + tmpl@1.0.x: version "1.0.4" resolved "https://registry.yarnpkg.com/tmpl/-/tmpl-1.0.4.tgz#23640dd7b42d00433911140820e5cf440e521dd1"