From 6c4a45e4969b130e703c9fedbc9740c5fecf5614 Mon Sep 17 00:00:00 2001 From: Gagik Amaryan Date: Tue, 8 Oct 2024 14:59:06 +0200 Subject: [PATCH] fix(shell-api): Fix support for sharded time series collections with getShardDistribution MONGOSH-1447 (#2189) Before this fix: when the getShardDistribution command is invoked on a sharded time series collection, the output would say that the collection is not actually sharded. If the user invoked the same command on the related bucket collection, it would produce an incorrect output. --- packages/shell-api/src/collection.spec.ts | 12 ++- packages/shell-api/src/collection.ts | 109 ++++++++++++++------ packages/shell-api/src/shard.spec.ts | 116 ++++++++++++++++++++++ 3 files changed, 205 insertions(+), 32 deletions(-) diff --git a/packages/shell-api/src/collection.spec.ts b/packages/shell-api/src/collection.spec.ts index a9cf11da0..205da41f3 100644 --- a/packages/shell-api/src/collection.spec.ts +++ b/packages/shell-api/src/collection.spec.ts @@ -2258,8 +2258,16 @@ describe('Collection', function () { it('throws when collection is not sharded', async function () { const serviceProviderCursor = stubInterface(); serviceProviderCursor.limit.returns(serviceProviderCursor); - serviceProviderCursor.tryNext.resolves(null); - serviceProvider.find.returns(serviceProviderCursor as any); + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + serviceProviderCursor.tryNext.returns(null as any); + serviceProvider.find.returns(serviceProviderCursor); + + const tryNext = sinon.stub(); + tryNext.onCall(0).resolves({ storageStats: {} }); + tryNext.onCall(1).resolves(null); + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + serviceProvider.aggregate.returns({ tryNext } as any); + const error = await collection.getShardDistribution().catch((e) => e); expect(error).to.be.instanceOf(MongoshInvalidInputError); diff --git a/packages/shell-api/src/collection.ts b/packages/shell-api/src/collection.ts index 7c8128b91..cacfdd8f6 100644 --- a/packages/shell-api/src/collection.ts +++ b/packages/shell-api/src/collection.ts @@ -2076,31 +2076,73 @@ export default class Collection extends ShellApiWithMongoClass { }); } - @returnsPromise - @topologies([Topologies.Sharded]) - @apiVersions([]) - async getShardDistribution(): Promise { - this._emitCollectionApiCall('getShardDistribution', {}); - - await getConfigDB(this._database); // Warns if not connected to mongos - - const result = {} as Document; - const config = this._mongo.getDB('config'); + /** + * Helper for getting collection info for sharded collections. + * @throws If the collection is not sharded. + * @returns collection info based on given collStats. + */ + async _getShardedCollectionInfo( + config: Database, + collStats: Document[] + ): Promise { const ns = `${this._database._name}.${this._name}`; - - const configCollectionsInfo = await config + const existingConfigCollectionsInfo = await config .getCollection('collections') .findOne({ _id: ns, ...onlyShardedCollectionsInConfigFilter, }); - if (!configCollectionsInfo) { + + if (existingConfigCollectionsInfo !== null) { + return existingConfigCollectionsInfo; + } + + // If the collection info is not found, check if it is timeseries and use the bucket + const timeseriesShardStats = collStats.find( + (extractedShardStats) => + typeof extractedShardStats.storageStats.timeseries !== 'undefined' + ); + + if (!timeseriesShardStats) { throw new MongoshInvalidInputError( `Collection ${this._name} is not sharded`, ShellApiErrors.NotConnectedToShardedCluster ); } + const { storageStats } = timeseriesShardStats; + + const timeseries: Document = storageStats.timeseries; + const timeseriesBucketNs: string = timeseries.bucketsNs; + + const timeseriesCollectionInfo = await config + .getCollection('collections') + .findOne({ + _id: timeseriesBucketNs, + ...onlyShardedCollectionsInConfigFilter, + }); + + if (!timeseriesCollectionInfo) { + throw new MongoshRuntimeError( + `Error finding collection information for ${timeseriesBucketNs}`, + CommonErrors.CommandFailed + ); + } + + return timeseriesCollectionInfo; + } + + @returnsPromise + @topologies([Topologies.Sharded]) + @apiVersions([]) + async getShardDistribution(): Promise { + this._emitCollectionApiCall('getShardDistribution', {}); + + await getConfigDB(this._database); // Warns if not connected to mongos + + const result = {} as Document; + const config = this._mongo.getDB('config'); + const collStats = await ( await this.aggregate({ $collStats: { storageStats: {} } }) ).toArray(); @@ -2115,12 +2157,15 @@ export default class Collection extends ShellApiWithMongoClass { avgObjSize: number; }[] = []; + const configCollectionsInfo = await this._getShardedCollectionInfo( + config, + collStats + ); + await Promise.all( - collStats.map((extShardStats) => + collStats.map((extractedShardStats) => (async (): Promise => { - // Extract and store only the relevant subset of the stats for this shard - const { shard } = extShardStats; - + const { shard } = extractedShardStats; // If we have an UUID, use that for lookups. If we have only the ns, // use that. (On 5.0+ servers, config.chunk has uses the UUID, before // that it had the ns). @@ -2131,39 +2176,43 @@ export default class Collection extends ShellApiWithMongoClass { const [host, numChunks] = await Promise.all([ config .getCollection('shards') - .findOne({ _id: extShardStats.shard }), + .findOne({ _id: extractedShardStats.shard }), config.getCollection('chunks').countDocuments(countChunksQuery), ]); const shardStats = { shardId: shard, host: host !== null ? host.host : null, - size: extShardStats.storageStats.size, - count: extShardStats.storageStats.count, + size: extractedShardStats.storageStats.size, + count: extractedShardStats.storageStats.count, numChunks: numChunks, - avgObjSize: extShardStats.storageStats.avgObjSize, + avgObjSize: extractedShardStats.storageStats.avgObjSize, }; const key = `Shard ${shardStats.shardId} at ${shardStats.host}`; - const estChunkData = + // In sharded timeseries collections we do not have a count + // so we intentionally pass NaN as a result to the client. + const shardStatsCount: number = shardStats.count ?? NaN; + + const estimatedChunkDataPerChunk = shardStats.numChunks === 0 ? 0 : shardStats.size / shardStats.numChunks; - const estChunkCount = + const estimatedDocsPerChunk = shardStats.numChunks === 0 ? 0 - : Math.floor(shardStats.count / shardStats.numChunks); + : Math.floor(shardStatsCount / shardStats.numChunks); result[key] = { data: dataFormat(coerceToJSNumber(shardStats.size)), - docs: shardStats.count, + docs: shardStatsCount, chunks: shardStats.numChunks, - 'estimated data per chunk': dataFormat(estChunkData), - 'estimated docs per chunk': estChunkCount, + 'estimated data per chunk': dataFormat(estimatedChunkDataPerChunk), + 'estimated docs per chunk': estimatedDocsPerChunk, }; totals.size += coerceToJSNumber(shardStats.size); - totals.count += coerceToJSNumber(shardStats.count); + totals.count += coerceToJSNumber(shardStatsCount); totals.numChunks += coerceToJSNumber(shardStats.numChunks); conciseShardsStats.push(shardStats); @@ -2326,7 +2375,7 @@ export default class Collection extends ShellApiWithMongoClass { return await this._mongo._serviceProvider.getSearchIndexes( this._database._name, this._name, - indexName as string | undefined, + indexName, { ...(await this._database._baseOptions()), ...options } ); } @@ -2355,7 +2404,7 @@ export default class Collection extends ShellApiWithMongoClass { this._name, [ { - name: (indexName as string | undefined) ?? 'default', + name: indexName ?? 'default', // Omitting type when it is 'search' for compat with older servers ...(type && type !== 'search' && { type: type as 'search' | 'vectorSearch' }), diff --git a/packages/shell-api/src/shard.spec.ts b/packages/shell-api/src/shard.spec.ts index 9193ae66d..c8ec6a3cd 100644 --- a/packages/shell-api/src/shard.spec.ts +++ b/packages/shell-api/src/shard.spec.ts @@ -2554,6 +2554,122 @@ describe('Shard', function () { }); }); }); + + describe('collection.getShardDistribution()', function () { + let db: Database; + const dbName = 'get-shard-distribution-test'; + const ns = `${dbName}.test`; + + beforeEach(async function () { + db = sh._database.getSiblingDB(dbName); + await db.getCollection('test').insertOne({ key: 1 }); + await db.getCollection('test').createIndex({ key: 1 }); + }); + + afterEach(async function () { + await db.dropDatabase(); + }); + + context('unsharded collections', function () { + it('throws an error', async function () { + const caughtError = await db + .getCollection('test') + .getShardDistribution() + .catch((e) => e); + expect(caughtError.message).includes( + 'Collection test is not sharded' + ); + }); + }); + + context('sharded collections', function () { + beforeEach(async function () { + expect((await sh.enableSharding(dbName)).ok).to.equal(1); + expect( + (await sh.shardCollection(ns, { key: 1 })).collectionsharded + ).to.equal(ns); + }); + + it('returns the correct StatsResult', async function () { + const result = await db.getCollection('test').getShardDistribution(); + const shardDistributionValue = result.value as Document; + + expect(result.type).to.equal('StatsResult'); + + const shardFields = Object.keys(shardDistributionValue).filter( + (field) => field !== 'Totals' + ); + expect(shardFields.length).to.equal(1); + const shardField = shardFields[0]; + expect( + shardDistributionValue[shardField]['estimated docs per chunk'] + ).to.equal(1); + + expect(shardDistributionValue.Totals.docs).to.equal(1); + expect(shardDistributionValue.Totals.chunks).to.equal(1); + }); + }); + + // We explicitly test sharded time series collections as it fallbacks to the bucket information + context('sharded timeseries collections', function () { + skipIfServerVersion(mongos, '< 5.1'); + + const timeseriesCollectionName = 'getShardDistributionTS'; + const timeseriesNS = `${dbName}.${timeseriesCollectionName}`; + + beforeEach(async function () { + expect((await sh.enableSharding(dbName)).ok).to.equal(1); + + expect( + ( + await sh.shardCollection( + timeseriesNS, + { 'metadata.bucketId': 1 }, + { + timeseries: { + timeField: 'timestamp', + metaField: 'metadata', + granularity: 'hours', + }, + } + ) + ).collectionsharded + ).to.equal(timeseriesNS); + await db.getCollection(timeseriesCollectionName).insertOne({ + metadata: { + bucketId: 1, + type: 'temperature', + }, + timestamp: new Date('2021-05-18T00:00:00.000Z'), + temp: 12, + }); + }); + + it('returns the correct StatsResult', async function () { + const result = await db + .getCollection(timeseriesCollectionName) + .getShardDistribution(); + const shardDistributionValue = result.value as Document; + + expect(result.type).to.equal('StatsResult'); + + const shardFields = Object.keys(shardDistributionValue).filter( + (field) => field !== 'Totals' + ); + expect(shardFields.length).to.equal(1); + const shardField = shardFields[0]; + + // Timeseries will have count NaN + expect( + shardDistributionValue[shardField]['estimated docs per chunk'] + ).to.be.NaN; + + expect(shardDistributionValue.Totals.docs).to.be.NaN; + expect(shardDistributionValue.Totals.chunks).to.equal(1); + }); + }); + }); + describe('collection.stats()', function () { let db: Database; let hasTotalSize: boolean;