diff --git a/workspaces/confluence/.changeset/khaki-lizards-tickle.md b/workspaces/confluence/.changeset/khaki-lizards-tickle.md new file mode 100644 index 0000000000..7aad56f272 --- /dev/null +++ b/workspaces/confluence/.changeset/khaki-lizards-tickle.md @@ -0,0 +1,5 @@ +--- +'@backstage-community/plugin-search-backend-module-confluence-collator': minor +--- + +Add query parameter that allows providing a CQL query that is combined with spaces to more finely select the documents to index diff --git a/workspaces/confluence/plugins/search-backend-module-confluence-collator/README.md b/workspaces/confluence/plugins/search-backend-module-confluence-collator/README.md index 1f1a8942ba..9d27a54200 100644 --- a/workspaces/confluence/plugins/search-backend-module-confluence-collator/README.md +++ b/workspaces/confluence/plugins/search-backend-module-confluence-collator/README.md @@ -36,7 +36,8 @@ backend.start(); Before you are able to start index confluence spaces to search, you need to go through the [search getting started guide](https://backstage.io/docs/features/search/getting-started). -When you have your `packages/backend/src/plugins/search.ts` file ready to make modifications, add the following code snippet to add the `ConfluenceCollatorFactory`. Note that you can optionally modify the `spaces`, otherwise it will resolve and index **all** spaces authorized by the token. +When you have your `packages/backend/src/plugins/search.ts` file ready to make modifications, add the following code snippet to add the `ConfluenceCollatorFactory`. Note that you can optionally modify +the `spaces` or [`query`](https://developer.atlassian.com/cloud/confluence/advanced-searching-using-cql), otherwise it will resolve and index **all** spaces and documents authorized by the token. ```ts indexBuilder.addCollator({ @@ -62,6 +63,7 @@ confluence: auth: token: '${CONFLUENCE_TOKEN}' spaces: [] # Warning, it is highly recommended to safely list the spaces that you want to index, either all documents will be indexed. + query: '' # If your spaces contain documents you don't want to index, you can use a CQL query to more precisely select them. This is combined with the spaces parameter above ``` The sections below will go into more details about the Base URL and Auth Methods. diff --git a/workspaces/confluence/plugins/search-backend-module-confluence-collator/api-report.md b/workspaces/confluence/plugins/search-backend-module-confluence-collator/api-report.md index 974648c925..ec8f84c2ad 100644 --- a/workspaces/confluence/plugins/search-backend-module-confluence-collator/api-report.md +++ b/workspaces/confluence/plugins/search-backend-module-confluence-collator/api-report.md @@ -36,6 +36,7 @@ export type ConfluenceCollatorFactoryOptions = { username?: string; password?: string; spaces?: string[]; + query?: string; parallelismLimit?: number; logger: LoggerService; }; diff --git a/workspaces/confluence/plugins/search-backend-module-confluence-collator/config.d.ts b/workspaces/confluence/plugins/search-backend-module-confluence-collator/config.d.ts index fee9642a7f..bcb7cea686 100644 --- a/workspaces/confluence/plugins/search-backend-module-confluence-collator/config.d.ts +++ b/workspaces/confluence/plugins/search-backend-module-confluence-collator/config.d.ts @@ -56,6 +56,10 @@ export interface Config { * Spaces to index */ spaces?: string[]; + /** + * CQL query to select the pages to index. It is combined with spaces parameter above when finding documents. + */ + query?: string; /** * An abstract value that controls the concurrency level of the * collation process. Increasing this value will both increase the diff --git a/workspaces/confluence/plugins/search-backend-module-confluence-collator/src/collators/ConfluenceCollatorFactory.ts b/workspaces/confluence/plugins/search-backend-module-confluence-collator/src/collators/ConfluenceCollatorFactory.ts index 14d2c7595c..95a7c7ad20 100644 --- a/workspaces/confluence/plugins/search-backend-module-confluence-collator/src/collators/ConfluenceCollatorFactory.ts +++ b/workspaces/confluence/plugins/search-backend-module-confluence-collator/src/collators/ConfluenceCollatorFactory.ts @@ -48,6 +48,7 @@ export type ConfluenceCollatorFactoryOptions = { username?: string; password?: string; spaces?: string[]; + query?: string; parallelismLimit?: number; logger: LoggerService; }; @@ -118,6 +119,7 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory { private readonly username: string | undefined; private readonly password: string | undefined; private readonly spaces: string[] | undefined; + private readonly query: string | undefined; private readonly parallelismLimit: number | undefined; private readonly logger: LoggerService; public readonly type: string = 'confluence'; @@ -130,6 +132,7 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory { this.username = options.username; this.password = options.password; this.spaces = options.spaces; + this.query = options.query; this.parallelismLimit = options.parallelismLimit; this.logger = options.logger.child({ documentType: this.type }); } @@ -142,6 +145,8 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory { const username = config.getOptionalString('confluence.auth.username'); const password = config.getOptionalString('confluence.auth.password'); const spaces = config.getOptionalStringArray('confluence.spaces') ?? []; + const query = config.getOptionalString('confluence.query') ?? ''; + const parallelismLimit = config.getOptionalNumber( 'confluence.parallelismLimit', ); @@ -173,6 +178,7 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory { username, password, spaces, + query, parallelismLimit, }); } @@ -182,19 +188,8 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory { } async *execute(): AsyncGenerator { - let spacesList: string[] = await this.getSpacesConfig(); - - if (spacesList.length === 0) { - this.logger.info( - 'No confluence.spaces configured in app-config.yaml, fetching all spaces', - ); - - spacesList = await this.discoverSpaces(); - } - - this.logger.info(`Indexing spaces: ${JSON.stringify(spacesList)}`); - - const documentsList = await this.getDocumentsFromSpaces(spacesList); + const query = await this.getConfluenceQuery(); + const documentsList = await this.getDocuments(query); this.logger.debug(`Document list: ${JSON.stringify(documentsList)}`); @@ -228,25 +223,6 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory { } } - private async discoverSpaces(): Promise { - const data = await this.get( - `${this.baseUrl}/rest/api/space?&limit=1000&type=global&status=current`, - ); - - if (!data.results) { - return []; - } - - const spacesList = []; - for (const result of data.results) { - spacesList.push(result.key); - } - - this.logger.debug(`Discovered spaces: ${JSON.stringify(spacesList)}`); - - return spacesList; - } - private async getSpacesConfig(): Promise { const spaceList: string[] = []; if (this.spaces?.length === 0) { @@ -255,13 +231,24 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory { return this.spaces || []; } - private async getDocumentsFromSpace(space: string): Promise { + private async getConfluenceQuery(): Promise { + let spaceList = await this.getSpacesConfig(); + const spaceQuery = spaceList.map(s => `space="${s}"`).join(' or '); + let query = spaceQuery; + const additionalQuery = this.query; + if (additionalQuery !== '') { + query = `(${spaceQuery}) and (${additionalQuery})`; + } + return query; + } + + private async getDocuments(query: string): Promise { const documentsList = []; - this.logger.info(`Exploring space: "${space}"`); + this.logger.info(`Exploring documents using query: ${query}`); let next = true; - let requestUrl = `${this.baseUrl}/rest/api/content?limit=1000&status=current&spaceKey=${space}`; + let requestUrl = `${this.baseUrl}/rest/api/content/search?limit=1000&status=current&cql=${query}`; while (next) { const data = await this.get(requestUrl); if (!data.results) { @@ -280,16 +267,6 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory { return documentsList; } - private async getDocumentsFromSpaces(spaces: string[]): Promise { - const documentsList = []; - - for (const space of spaces) { - documentsList.push(...(await this.getDocumentsFromSpace(space))); - } - - return documentsList; - } - private async getDocumentInfo( documentUrl: string, ): Promise {