Skip to content

Commit

Permalink
workspace(confluence): add query parameter to confluence collator plu…
Browse files Browse the repository at this point in the history
…gin (backstage#906)

* add query parameter to confluence collator plugin

In order to allow more precise selection of documents, introduce a
`query` parameter that allows users to provide a CQL query to the
plugin. When provided this parameter is combined with the `spaces`
parameter to more accurately select the documents to index from
a given confluence instance.

* bump minor version instead of patch and link to Atlassian CQL docs in README
* update API report

Signed-off-by: Tamim Khan <[email protected]>
  • Loading branch information
tamimkh authored Sep 3, 2024
1 parent f374a98 commit 33f9933
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 46 deletions.
5 changes: 5 additions & 0 deletions workspaces/confluence/.changeset/khaki-lizards-tickle.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@backstage-community/plugin-search-backend-module-confluence-collator': minor
---

Add query parameter that allows providing a CQL query that is combined with spaces to more finely select the documents to index
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ backend.start();

Before you are able to start index confluence spaces to search, you need to go through the [search getting started guide](https://backstage.io/docs/features/search/getting-started).

When you have your `packages/backend/src/plugins/search.ts` file ready to make modifications, add the following code snippet to add the `ConfluenceCollatorFactory`. Note that you can optionally modify the `spaces`, otherwise it will resolve and index **all** spaces authorized by the token.
When you have your `packages/backend/src/plugins/search.ts` file ready to make modifications, add the following code snippet to add the `ConfluenceCollatorFactory`. Note that you can optionally modify
the `spaces` or [`query`](https://developer.atlassian.com/cloud/confluence/advanced-searching-using-cql), otherwise it will resolve and index **all** spaces and documents authorized by the token.

```ts
indexBuilder.addCollator({
Expand All @@ -62,6 +63,7 @@ confluence:
auth:
token: '${CONFLUENCE_TOKEN}'
spaces: [] # Warning, it is highly recommended to safely list the spaces that you want to index, either all documents will be indexed.
query: '' # If your spaces contain documents you don't want to index, you can use a CQL query to more precisely select them. This is combined with the spaces parameter above
```
The sections below will go into more details about the Base URL and Auth Methods.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export type ConfluenceCollatorFactoryOptions = {
username?: string;
password?: string;
spaces?: string[];
query?: string;
parallelismLimit?: number;
logger: LoggerService;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ export interface Config {
* Spaces to index
*/
spaces?: string[];
/**
* CQL query to select the pages to index. It is combined with spaces parameter above when finding documents.
*/
query?: string;
/**
* An abstract value that controls the concurrency level of the
* collation process. Increasing this value will both increase the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ export type ConfluenceCollatorFactoryOptions = {
username?: string;
password?: string;
spaces?: string[];
query?: string;
parallelismLimit?: number;
logger: LoggerService;
};
Expand Down Expand Up @@ -118,6 +119,7 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory {
private readonly username: string | undefined;
private readonly password: string | undefined;
private readonly spaces: string[] | undefined;
private readonly query: string | undefined;
private readonly parallelismLimit: number | undefined;
private readonly logger: LoggerService;
public readonly type: string = 'confluence';
Expand All @@ -130,6 +132,7 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory {
this.username = options.username;
this.password = options.password;
this.spaces = options.spaces;
this.query = options.query;
this.parallelismLimit = options.parallelismLimit;
this.logger = options.logger.child({ documentType: this.type });
}
Expand All @@ -142,6 +145,8 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory {
const username = config.getOptionalString('confluence.auth.username');
const password = config.getOptionalString('confluence.auth.password');
const spaces = config.getOptionalStringArray('confluence.spaces') ?? [];
const query = config.getOptionalString('confluence.query') ?? '';

const parallelismLimit = config.getOptionalNumber(
'confluence.parallelismLimit',
);
Expand Down Expand Up @@ -173,6 +178,7 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory {
username,
password,
spaces,
query,
parallelismLimit,
});
}
Expand All @@ -182,19 +188,8 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory {
}

async *execute(): AsyncGenerator<IndexableConfluenceDocument> {
let spacesList: string[] = await this.getSpacesConfig();

if (spacesList.length === 0) {
this.logger.info(
'No confluence.spaces configured in app-config.yaml, fetching all spaces',
);

spacesList = await this.discoverSpaces();
}

this.logger.info(`Indexing spaces: ${JSON.stringify(spacesList)}`);

const documentsList = await this.getDocumentsFromSpaces(spacesList);
const query = await this.getConfluenceQuery();
const documentsList = await this.getDocuments(query);

this.logger.debug(`Document list: ${JSON.stringify(documentsList)}`);

Expand Down Expand Up @@ -228,25 +223,6 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory {
}
}

private async discoverSpaces(): Promise<string[]> {
const data = await this.get(
`${this.baseUrl}/rest/api/space?&limit=1000&type=global&status=current`,
);

if (!data.results) {
return [];
}

const spacesList = [];
for (const result of data.results) {
spacesList.push(result.key);
}

this.logger.debug(`Discovered spaces: ${JSON.stringify(spacesList)}`);

return spacesList;
}

private async getSpacesConfig(): Promise<string[]> {
const spaceList: string[] = [];
if (this.spaces?.length === 0) {
Expand All @@ -255,13 +231,24 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory {
return this.spaces || [];
}

private async getDocumentsFromSpace(space: string): Promise<string[]> {
private async getConfluenceQuery(): Promise<string> {
let spaceList = await this.getSpacesConfig();
const spaceQuery = spaceList.map(s => `space="${s}"`).join(' or ');
let query = spaceQuery;
const additionalQuery = this.query;
if (additionalQuery !== '') {
query = `(${spaceQuery}) and (${additionalQuery})`;
}
return query;
}

private async getDocuments(query: string): Promise<string[]> {
const documentsList = [];

this.logger.info(`Exploring space: "${space}"`);
this.logger.info(`Exploring documents using query: ${query}`);

let next = true;
let requestUrl = `${this.baseUrl}/rest/api/content?limit=1000&status=current&spaceKey=${space}`;
let requestUrl = `${this.baseUrl}/rest/api/content/search?limit=1000&status=current&cql=${query}`;
while (next) {
const data = await this.get<ConfluenceDocumentList>(requestUrl);
if (!data.results) {
Expand All @@ -280,16 +267,6 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory {
return documentsList;
}

private async getDocumentsFromSpaces(spaces: string[]): Promise<string[]> {
const documentsList = [];

for (const space of spaces) {
documentsList.push(...(await this.getDocumentsFromSpace(space)));
}

return documentsList;
}

private async getDocumentInfo(
documentUrl: string,
): Promise<IndexableConfluenceDocument[]> {
Expand Down

0 comments on commit 33f9933

Please sign in to comment.