diff --git a/Makefile b/Makefile index 585203a34c4..e47609c73c0 100644 --- a/Makefile +++ b/Makefile @@ -353,6 +353,7 @@ reindex: itsJustJavascript node --enable-source-maps itsJustJavascript/baker/algolia/indexToAlgolia.js node --enable-source-maps itsJustJavascript/baker/algolia/indexChartsToAlgolia.js node --enable-source-maps itsJustJavascript/baker/algolia/indexExplorersToAlgolia.js + node --enable-source-maps itsJustJavascript/baker/algolia/indexExplorerViewsToAlgolia.js clean: rm -rf node_modules itsJustJavascript diff --git a/baker/algolia/configureAlgolia.ts b/baker/algolia/configureAlgolia.ts index 60766e198f4..5bfef4f47bd 100644 --- a/baker/algolia/configureAlgolia.ts +++ b/baker/algolia/configureAlgolia.ts @@ -131,6 +131,22 @@ export const configureAlgolia = async () => { disableTypoToleranceOnAttributes: ["text"], }) + const explorerViewsIndex = client.initIndex( + getIndexName(SearchIndexName.ExplorerViews) + ) + + await explorerViewsIndex.setSettings({ + ...baseSettings, + searchableAttributes: [ + "unordered(viewTitle)", + "unordered(viewSettings)", + ], + customRanking: ["desc(score)", "asc(viewIndexWithinExplorer)"], + attributeForDistinct: "viewTitleAndExplorerSlug", + distinct: true, + minWordSizefor1Typo: 6, + }) + const synonyms = [ ["kids", "children"], ["pork", "pigmeat"], diff --git a/baker/algolia/indexExplorerViewsToAlgolia.ts b/baker/algolia/indexExplorerViewsToAlgolia.ts new file mode 100644 index 00000000000..0d666ff7fab --- /dev/null +++ b/baker/algolia/indexExplorerViewsToAlgolia.ts @@ -0,0 +1,233 @@ +import { Knex } from "knex" +import * as db from "../../db/db.js" +import { ExplorerBlockGraphers } from "./indexExplorersToAlgolia.js" +import { DecisionMatrix } from "../../explorer/ExplorerDecisionMatrix.js" +import { tsvFormat } from "d3-dsv" +import { + ExplorerChoiceParams, + ExplorerControlType, +} from "../../explorer/ExplorerConstants.js" +import { GridBoolean } from "../../gridLang/GridLangConstants.js" +import { getAnalyticsPageviewsByUrlObj } from "../../db/model/Pageview.js" +import { keyBy } from "lodash" +import { ALGOLIA_INDEXING } from "../../settings/serverSettings.js" +import { getAlgoliaClient } from "./configureAlgolia.js" +import { getIndexName } from "../../site/search/searchClient.js" +import { SearchIndexName } from "../../site/search/searchTypes.js" + +interface ExplorerViewEntry { + viewTitle: string + viewSubtitle: string + viewSettings: string[] + viewQueryParams: string + + viewGrapherId?: number + + // Potential ranking criteria + viewIndexWithinExplorer: number + titleLength: number + numNonDefaultSettings: number + // viewViews_7d: number +} + +interface ExplorerViewEntryWithExplorerInfo extends ExplorerViewEntry { + explorerSlug: string + explorerTitle: string + explorerViews_7d: number + viewTitleAndExplorerSlug: string // used for deduplication: `viewTitle | explorerSlug` + + score: number + + objectID?: string +} + +// Creates a search-ready string from a choice. +// Special handling is pretty much only necessary for checkboxes: If they are not ticked, then their name is not included. +// Imagine a "Per capita" checkbox, for example. If it's not ticked, then we don't want searches for "per capita" to wrongfully match it. +const explorerChoiceToViewSettings = ( + choices: ExplorerChoiceParams, + decisionMatrix: DecisionMatrix +): string[] => { + return Object.entries(choices).map(([choiceName, choiceValue]) => { + const choiceControlType = + decisionMatrix.choiceNameToControlTypeMap.get(choiceName) + if (choiceControlType === ExplorerControlType.Checkbox) + return choiceValue === GridBoolean.true ? choiceName : "" + else return choiceValue + }) +} + +const getExplorerViewRecordsForExplorerSlug = async ( + knex: Knex, + slug: string +): Promise => { + const explorerConfig = await knex + .table("explorers") + .select("config") + .where({ slug }) + .first() + .then((row) => JSON.parse(row.config) as any) + + const explorerGrapherBlock: ExplorerBlockGraphers = + explorerConfig.blocks.filter( + (block: any) => block.type === "graphers" + )[0] as ExplorerBlockGraphers + + if (explorerGrapherBlock === undefined) + throw new Error(`Explorer ${slug} has no grapher block`) + + // TODO: Maybe make DecisionMatrix accept JSON directly + const tsv = tsvFormat(explorerGrapherBlock.block) + const explorerDecisionMatrix = new DecisionMatrix(tsv) + + console.log( + `Processing explorer ${slug} (${explorerDecisionMatrix.numRows} rows)` + ) + + const defaultSettings = explorerDecisionMatrix.defaultSettings + + const records = explorerDecisionMatrix + .allDecisionsAsQueryParams() + .map((choice, i) => { + explorerDecisionMatrix.setValuesFromChoiceParams(choice) + + // Check which choices are non-default, i.e. are not the first available option in a dropdown/radio + const nonDefaultSettings = Object.entries( + explorerDecisionMatrix.availableChoiceOptions + ).filter(([choiceName, choiceOptions]) => { + // Keep only choices which are not the default, which is: + // - either the options marked as `default` in the decision matrix + // - or the first available option in the decision matrix + return ( + choiceOptions.length > 1 && + !(defaultSettings[choiceName] !== undefined + ? defaultSettings[choiceName] === choice[choiceName] + : choice[choiceName] === choiceOptions[0]) + ) + }) + + const record: ExplorerViewEntry = { + viewTitle: explorerDecisionMatrix.selectedRow.title, + viewSubtitle: explorerDecisionMatrix.selectedRow.subtitle, + viewSettings: explorerChoiceToViewSettings( + choice, + explorerDecisionMatrix + ), + viewGrapherId: explorerDecisionMatrix.selectedRow.grapherId, + viewQueryParams: explorerDecisionMatrix.toString(), + + viewIndexWithinExplorer: i, + titleLength: explorerDecisionMatrix.selectedRow.title?.length, + numNonDefaultSettings: nonDefaultSettings.length, + } + return record + }) + + // Enrich `grapherId`-powered views with title/subtitle + const grapherIds = records + .filter((record) => record.viewGrapherId !== undefined) + .map((record) => record.viewGrapherId as number) + + if (grapherIds.length) { + console.log( + `Fetching grapher info from ${grapherIds.length} graphers for explorer ${slug}` + ) + const grapherIdToTitle = await knex + .table("charts") + .select( + "id", + knex.raw("config->>'$.title' as title"), + knex.raw("config->>'$.subtitle' as subtitle") + ) + .whereIn("id", grapherIds) + .andWhereRaw("config->>'$.isPublished' = 'true'") + .then((rows) => keyBy(rows, "id")) + + for (const record of records) { + if (record.viewGrapherId !== undefined) { + const grapherInfo = grapherIdToTitle[record.viewGrapherId] + if (grapherInfo === undefined) { + console.warn( + `Grapher id ${record.viewGrapherId} not found for explorer ${slug}` + ) + continue + } + record.viewTitle = grapherInfo.title + record.viewSubtitle = grapherInfo.subtitle + record.titleLength = grapherInfo.title?.length + } + } + } + + // TODO: Handle indicator-based explorers + + return records +} + +const getExplorerViewRecords = async ( + knex: Knex +): Promise => { + // db.getPublishedExplorersBySlug(knex) + + const publishedExplorers = Object.values( + await db.getPublishedExplorersBySlug(knex) + ) + + const pageviews = await getAnalyticsPageviewsByUrlObj(knex) + + let records = [] as ExplorerViewEntryWithExplorerInfo[] + for (const explorerInfo of publishedExplorers) { + const explorerViewRecords = await getExplorerViewRecordsForExplorerSlug( + knex, + explorerInfo.slug + ) + + const explorerPageviews = + pageviews[`/explorers/${explorerInfo.slug}`]?.views_7d ?? 0 + records = records.concat( + explorerViewRecords.map( + (record, i): ExplorerViewEntryWithExplorerInfo => ({ + ...record, + explorerSlug: explorerInfo.slug, + explorerTitle: explorerInfo.title, + explorerViews_7d: explorerPageviews, + viewTitleAndExplorerSlug: `${record.viewTitle} | ${explorerInfo.slug}`, + // Scoring function + score: + explorerPageviews * 10 - + record.numNonDefaultSettings * 50 - + record.titleLength, + + objectID: `${explorerInfo.slug}-${i}`, + }) + ) + ) + } + + return records +} + +const indexExplorerViewsToAlgolia = async () => { + if (!ALGOLIA_INDEXING) return + + const client = getAlgoliaClient() + if (!client) { + console.error(`Failed indexing charts (Algolia client not initialized)`) + return + } + + const index = client.initIndex(getIndexName(SearchIndexName.ExplorerViews)) + + await db.getConnection() + const records = await getExplorerViewRecords(db.knexInstance()) + await index.replaceAllObjects(records) + + await db.closeTypeOrmAndKnexConnections() +} + +process.on("unhandledRejection", (e) => { + console.error(e) + process.exit(1) +}) + +indexExplorerViewsToAlgolia() diff --git a/baker/algolia/indexExplorersToAlgolia.ts b/baker/algolia/indexExplorersToAlgolia.ts index 8a4eefe75bf..0bff7250cb0 100644 --- a/baker/algolia/indexExplorersToAlgolia.ts +++ b/baker/algolia/indexExplorersToAlgolia.ts @@ -23,7 +23,7 @@ type ExplorerBlockColumns = { block: { name: string; additionalInfo?: string }[] } -type ExplorerBlockGraphers = { +export type ExplorerBlockGraphers = { type: "graphers" block: { title?: string diff --git a/explorer/ExplorerDecisionMatrix.ts b/explorer/ExplorerDecisionMatrix.ts index 0b0ff8b802e..2f2f454df5b 100644 --- a/explorer/ExplorerDecisionMatrix.ts +++ b/explorer/ExplorerDecisionMatrix.ts @@ -86,7 +86,7 @@ export class DecisionMatrix { table: CoreTable @observable currentParams: ExplorerChoiceParams = {} constructor(delimited: string, hash = "") { - this.choices = makeChoicesMap(delimited) + this.choiceNameToControlTypeMap = makeChoicesMap(delimited) this.table = new CoreTable(parseDelimited(dropColumnTypes(delimited)), [ // todo: remove col def? { @@ -141,7 +141,7 @@ export class DecisionMatrix { ) } - private choices: Map + choiceNameToControlTypeMap: Map hash: string toConstrainedOptions(): ExplorerChoiceParams { @@ -243,7 +243,7 @@ export class DecisionMatrix { } @computed private get choiceNames(): ChoiceName[] { - return Array.from(this.choices.keys()) + return Array.from(this.choiceNameToControlTypeMap.keys()) } @computed private get allChoiceOptions(): ChoiceMap { @@ -256,7 +256,7 @@ export class DecisionMatrix { return choiceMap } - @computed private get availableChoiceOptions(): ChoiceMap { + @computed get availableChoiceOptions(): ChoiceMap { const result: ChoiceMap = {} this.choiceNames.forEach((choiceName) => { result[choiceName] = this.allChoiceOptions[choiceName].filter( @@ -317,7 +317,7 @@ export class DecisionMatrix { } // The first row with defaultView column value of "true" determines the default view to use - private get defaultSettings() { + get defaultSettings() { const hits = this.rowsWith({ [GrapherGrammar.defaultView.keyword]: "true", }) @@ -373,7 +373,7 @@ export class DecisionMatrix { constrainedOptions ) ) - const type = this.choices.get(title)! + const type = this.choiceNameToControlTypeMap.get(title)! return { title, diff --git a/site/search/Autocomplete.tsx b/site/search/Autocomplete.tsx index 3f30becbde1..cadd2af7ab7 100644 --- a/site/search/Autocomplete.tsx +++ b/site/search/Autocomplete.tsx @@ -70,7 +70,12 @@ const getItemUrl: AutocompleteSource["getItemUrl"] = ({ item }) => const prependSubdirectoryToAlgoliaItemUrl = (item: BaseItem): string => { const indexName = parseIndexName(item.__autocomplete_indexName as string) const subdirectory = indexNameToSubdirectoryMap[indexName] - return `${subdirectory}/${item.slug}` + switch (indexName) { + case SearchIndexName.ExplorerViews: + return `${subdirectory}/${item.explorerSlug}${item.viewQueryParams}` + default: + return `${subdirectory}/${item.slug}` + } } const FeaturedSearchesSource: AutocompleteSource = { @@ -130,6 +135,14 @@ const AlgoliaSource: AutocompleteSource = { distinct: true, }, }, + { + indexName: getIndexName(SearchIndexName.ExplorerViews), + query, + params: { + hitsPerPage: 1, + distinct: true, + }, + }, { indexName: getIndexName(SearchIndexName.Explorers), query, @@ -149,11 +162,20 @@ const AlgoliaSource: AutocompleteSource = { item.__autocomplete_indexName as string ) const indexLabel = - index === SearchIndexName.Charts - ? "Chart" - : index === SearchIndexName.Explorers - ? "Explorer" - : pageTypeDisplayNames[item.type as PageType] + index === SearchIndexName.Charts ? ( + "Chart" + ) : index === SearchIndexName.Explorers ? ( + "Explorer" + ) : index === SearchIndexName.ExplorerViews ? ( + <> + in {item.explorerTitle} Data Explorer + + ) : ( + pageTypeDisplayNames[item.type as PageType] + ) + + const mainAttribute = + index === SearchIndexName.ExplorerViews ? "viewTitle" : "title" return (
= { diff --git a/site/search/searchTypes.ts b/site/search/searchTypes.ts index bb23b325138..1491bb6c190 100644 --- a/site/search/searchTypes.ts +++ b/site/search/searchTypes.ts @@ -68,6 +68,7 @@ export type IChartHit = Hit & ChartRecord export enum SearchIndexName { Explorers = "explorers", + ExplorerViews = "explorer-views", Charts = "charts", Pages = "pages", } @@ -85,4 +86,5 @@ export const indexNameToSubdirectoryMap: Record = { [SearchIndexName.Pages]: "", [SearchIndexName.Charts]: "/grapher", [SearchIndexName.Explorers]: "/explorers", + [SearchIndexName.ExplorerViews]: "/explorers", }