Skip to content

Commit

Permalink
refactor: mimetype detection to use chunks instead of whole buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
aleortega committed Sep 27, 2024
1 parent 5d1f3ec commit c35d097
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 21 deletions.
109 changes: 91 additions & 18 deletions content/src/controller/utils.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { ContentItem } from '@dcl/catalyst-storage'
import { InvalidRequestError, Pagination } from '../types'
import { fromStream } from 'file-type'
import { Readable } from 'stream'
import { fromBuffer } from 'file-type'
import { Readable, Transform } from 'stream'

export function paginationObject(url: URL, maxPageSize: number = 1000): Pagination {
const pageSize = url.searchParams.has('pageSize') ? parseInt(url.searchParams.get('pageSize')!, 10) : 100
Expand Down Expand Up @@ -39,23 +39,96 @@ export function asEnumValue<T extends { [key: number]: string }>(
}
}

export async function createContentFileHeaders(content: ContentItem, hashId: string): Promise<Record<string, string>> {
const stream: Readable = await content.asRawStream()
const mime = await fromStream(stream)
const mimeType = mime?.mime || 'application/octet-stream'

const headers: Record<string, string> = {
'Content-Type': mimeType,
ETag: JSON.stringify(hashId), // by spec, the ETag must be a double-quoted string
'Access-Control-Expose-Headers': 'ETag',
'Cache-Control': 'public,max-age=31536000,s-maxage=31536000,immutable'
}
if (content.encoding) {
headers['Content-Encoding'] = content.encoding
function createMimeTypeDetectStream(maxChunkSize = 4100, detectionTimeoutMs = 1000) {
let buffer: Buffer = Buffer.alloc(0)
let detected = false
let timeout: NodeJS.Timeout | null = null

const clearTimeoutFn = () => {
if (timeout) {
clearTimeout(timeout)
timeout = null
}
}
if (content.size) {
headers['Content-Length'] = content.size.toString()

const detectMimeType = (emit: (event: string, mimeType: string) => void, _ = false) => {
timeout = setTimeout(() => {
if (!detected) {
emit('mime-detected', 'application/octet-stream')
}
}, detectionTimeoutMs)

// if it is JSON structure, fall-back on application/json MIME-TYPE automatically
const initialData = buffer.toString('utf-8', 0, 1)
if (initialData === '{' || initialData === '[') {
emit('mime-detected', 'application/json')
clearTimeoutFn()
return
}

fromBuffer(buffer.slice(0, maxChunkSize))
.then((mime) => {
const mimeType = mime?.mime || 'application/octet-stream'
emit('mime-detected', mimeType)
clearTimeoutFn()
})
.catch(() => {
emit('mime-detected', 'application/octet-stream')
clearTimeoutFn()
})
}

return headers
return new Transform({
transform(chunk, encoding, callback) {
buffer = Buffer.concat([buffer, chunk])

if (buffer.length >= maxChunkSize && !detected) {
detected = true
detectMimeType(this.emit.bind(this))
this.push(buffer)
}

callback()
},

flush(callback) {
if (!detected && buffer.length > 0) {
detectMimeType(this.emit.bind(this), true)
}
callback()
}
})
}

export async function createContentFileHeaders(content: ContentItem, hashId: string): Promise<Record<string, string>> {
const stream: Readable = await content.asRawStream()

const mimeDetectStream = createMimeTypeDetectStream(4100, 1000) // 1-second timeout

return new Promise((resolve, reject) => {

Check warning on line 108 in content/src/controller/utils.ts

View workflow job for this annotation

GitHub Actions / validations

'reject' is defined but never used. Allowed unused args must match /^_/u
mimeDetectStream.on('mime-detected', (mimeType: string) => {
const headers: Record<string, string> = {
'Content-Type': mimeType,
ETag: JSON.stringify(hashId), // by spec, the ETag must be a double-quoted string
'Access-Control-Expose-Headers': 'ETag',
'Cache-Control': 'public,max-age=31536000,s-maxage=31536000,immutable'
}

if (content.encoding) {
headers['Content-Encoding'] = content.encoding
}
if (content.size) {
headers['Content-Length'] = content.size.toString()
}

resolve(headers)
})

mimeDetectStream.on('error', (_) => {
mimeDetectStream.emit('mime-detected', 'application/octet-stream')
})

// Pipe the raw content stream through the MIME detection stream
stream.pipe(mimeDetectStream)
})
}
48 changes: 45 additions & 3 deletions content/test/integration/controller/entity-metadata.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ describe('Integration - Get wearable image and thumbnail', () => {

it('when entity has thumbnail, it should return the content and set the headers', async () => {
const deployResult = await buildDeployData(['wearable'], {
metadata: { thumbnail: 'some-binary-file.png' },
contentPaths: ['test/integration/resources/some-binary-file.png']
metadata: { thumbnail: 'heavy-image.jpg' },
contentPaths: ['test/integration/resources/heavy-image.jpg']
})

await server.deployEntity(deployResult.deployData)
Expand All @@ -90,7 +90,49 @@ describe('Integration - Get wearable image and thumbnail', () => {

for (const response of responses) {
expect(response.status).toEqual(200)
expect(response.headers.get('content-type')).toEqual('image/png')
expect(response.headers.get('content-type')).toEqual('image/jpeg')
expect(response.headers.get('ETag')).toBeTruthy()
expect(response.headers.get('Cache-Control')).toBeTruthy()
}
})

it('when entity has thumbnail, it should return the content and set the headers (GLB detection)', async () => {
const deployResult = await buildDeployData(['wearable'], {
metadata: { thumbnail: 'new.glb' },
contentPaths: ['test/integration/resources/new.glb']
})

await server.deployEntity(deployResult.deployData)

const responses = await Promise.all([
fetch(`${server.getUrl()}/queries/items/wearable/thumbnail`),
fetch(`${server.getUrl()}/queries/items/wearable/thumbnail`, { method: 'HEAD' })
])

for (const response of responses) {
expect(response.status).toEqual(200)
expect(response.headers.get('content-type')).toEqual('model/gltf-binary')
expect(response.headers.get('ETag')).toBeTruthy()
expect(response.headers.get('Cache-Control')).toBeTruthy()
}
})

it('when entity has thumbnail, it should return the content and set the headers (JSON detection)', async () => {
const deployResult = await buildDeployData(['wearable'], {
metadata: { thumbnail: 'another_scene.json' },
contentPaths: ['test/integration/resources/another_scene.json']
})

await server.deployEntity(deployResult.deployData)

const responses = await Promise.all([
fetch(`${server.getUrl()}/queries/items/wearable/thumbnail`),
fetch(`${server.getUrl()}/queries/items/wearable/thumbnail`, { method: 'HEAD' })
])

for (const response of responses) {
expect(response.status).toEqual(200)
expect(response.headers.get('content-type')).toEqual('application/json')
expect(response.headers.get('ETag')).toBeTruthy()
expect(response.headers.get('Cache-Control')).toBeTruthy()
}
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added content/test/integration/resources/new.glb
Binary file not shown.

0 comments on commit c35d097

Please sign in to comment.