Skip to content

Commit

Permalink
Working GM_GEO_LOOKUP_TABLE
Browse files Browse the repository at this point in the history
  • Loading branch information
motin committed Mar 20, 2019
1 parent 79af127 commit 7638eb4
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 14 deletions.
23 changes: 23 additions & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Gapminder-specific custom functions for Google Spreadsheets.

* [GM_AGGR](#gm_aggr)
* [GM_DATA](#gm_data)
* [GM_GEO_LOOKUP_TABLE](#gm_geo_lookup_table)
* [GM_GROWTH](#gm_growth)
* [GM_ID](#gm_id)
* [GM_IMPORT](#gm_import)
Expand Down Expand Up @@ -80,6 +81,28 @@ Takes 2-4 seconds: =GM\_DATA(B7:D, "pop", "year", "countries\_etc", 'data:pop:ye
**Returns:** `any`[][]
A two-dimensional array containing the cell/column contents described above in the summary.

___
<a id="gm_geo_lookup_table"></a>

### GM_GEO_LOOKUP_TABLE

**GM_GEO_LOOKUP_TABLE**(geography: *`string`*): `string`[][]

*Defined in GM_GEO_LOOKUP_TABLE.ts:18*

Inserts a table with Gapminder’s geo ids together with their aliases (all spellings we have seen before), including lower cased variants without diacritics and special characters to allow for somewhat fuzzy matching.

To be used as the source range for VLOOKUP where the dataset is too large for GM\_ID or GM\_NAME to be used directly.

**Parameters:**

| Name | Type | Description |
| ------ | ------ | ------ |
| geography | `string` | Should be one of the sets listed in the gapminder geo ontology such as "countries\_etc" |

**Returns:** `string`[][]
A two-dimensional array containing the cell/column contents described above in the summary.

___
<a id="gm_growth"></a>

Expand Down
36 changes: 36 additions & 0 deletions src/GM_GEO_LOOKUP_TABLE.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import test, { ExecutionContext, Macro } from "ava";
import { GM_GEO_LOOKUP_TABLE } from "./GM_GEO_LOOKUP_TABLE";
import { MinimalUrlFetchApp } from "./lib/MinimalUrlFetchApp";
import { MinimalUtilities } from "./lib/MinimalUtilities";
(global as any).UrlFetchApp = MinimalUrlFetchApp;
(global as any).Utilities = MinimalUtilities;

/**
* @hidden
*/
const testGmGeoLookupTable: Macro<any> = (
t: ExecutionContext,
{ geography, expectedTopFiveRowsOfOutput }
) => {
const output = GM_GEO_LOOKUP_TABLE(geography);
const topFiveRowsOfOutput = output.slice(0, 5);
// t.log({ topFiveRowsOfOutput, expectedTopFiveRowsOfOutput });
t.deepEqual(topFiveRowsOfOutput, expectedTopFiveRowsOfOutput);
};

[
/* tslint:disable:object-literal-sort-keys */
{
geography: "world_4region",
expectedTopFiveRowsOfOutput: [
["alias", "geo", "name"],
["Africa", "africa", "Africa"],
["africa", "africa", "Africa"],
["africa (total)", "americas", "The Americas"],
["Africa (total)", "americas", "The Americas"]
]
}
/* tslint:enable:object-literal-sort-keys */
].forEach((testData, index) => {
test("testGmGeoLookupTable - " + index, testGmGeoLookupTable, testData);
});
57 changes: 57 additions & 0 deletions src/GM_GEO_LOOKUP_TABLE.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import {
GeoAliasesAndSynonymsDataRow,
GeoAliasesAndSynonymsLookupTable,
GeoAliasesAndSynonymsWorksheetData,
geoAliasesAndSynonymsWorksheetDataToGeoLookupTable,
getGeoAliasesAndSynonymsWorksheetData
} from "./gsheetsData/geoAliasesAndSynonyms";

/**
* Inserts a table with Gapminder’s geo ids together with their aliases (all spellings we have seen before), including lower cased
* variants without diacritics and special characters to allow for somewhat fuzzy matching.
*
* To be used as the source range for VLOOKUP where the dataset is too large for GM_ID or GM_NAME to be used directly.
*
* @param geography Should be one of the sets listed in the gapminder geo ontology such as "countries_etc"
* @return A two-dimensional array containing the cell/column contents described above in the summary.
*/
export function GM_GEO_LOOKUP_TABLE(geography: string): string[][] {
if (!geography) {
geography = "countries_etc";
}
const geoAliasesAndSynonymsWorksheetData: GeoAliasesAndSynonymsWorksheetData = getGeoAliasesAndSynonymsWorksheetData(
geography
);
const fuzzyMatchLookupTable: GeoAliasesAndSynonymsLookupTable = geoAliasesAndSynonymsWorksheetDataToGeoLookupTable(
geoAliasesAndSynonymsWorksheetData,
null
);
const exactMatchLookupTable: GeoAliasesAndSynonymsLookupTable = geoAliasesAndSynonymsWorksheetDataToGeoLookupTable(
geoAliasesAndSynonymsWorksheetData,
lookupKey => lookupKey
);
const lookupTable = { ...fuzzyMatchLookupTable, ...exactMatchLookupTable };
const lookupKeys = Object.keys(lookupTable);
const lookupTableOutput = lookupKeys.map(lookupKey => {
const geoAliasesAndSynonymsDataRow: GeoAliasesAndSynonymsDataRow =
lookupTable[lookupKey];
return [
lookupKey,
geoAliasesAndSynonymsDataRow.geo,
geoAliasesAndSynonymsDataRow.name
];
});
const sortedLookupTableOutput = lookupTableOutput.sort((a, b) => {
const geoIdComparison = a[1].localeCompare(b[1], "en", {
sensitivity: "accent"
});
const aliasComparison = a[0].localeCompare(b[0], "en", {
sensitivity: "accent"
});
if (geoIdComparison === 0) {
return aliasComparison;
}
return geoIdComparison;
});
return [["alias", "geo", "name"]].concat(sortedLookupTableOutput);
}
40 changes: 26 additions & 14 deletions src/gsheetsData/geoAliasesAndSynonyms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import { ListGeoAliasesAndSynonyms } from "./types/listGeoAliasesAndSynonyms";
/**
* @hidden
*/
interface GeoAliasesAndSynonymsDataRow {
export interface GeoAliasesAndSynonymsDataRow {
/* tslint:disable:object-literal-sort-keys */
alias: string;
geo: string;
Expand All @@ -27,32 +27,39 @@ interface MissingGeoAliasDataRow {
/**
* @hidden
*/
interface GeoAliasesAndSynonymsWorksheetData {
export interface GeoAliasesAndSynonymsWorksheetData {
rows: GeoAliasesAndSynonymsDataRow[];
}

/**
* @hidden
*/
interface GeoAliasesAndSynonymsLookupTable {
export interface GeoAliasesAndSynonymsLookupTable {
[alias: string]: GeoAliasesAndSynonymsDataRow;
}

/**
* @hidden
*/
export function getGeoAliasesAndSynonymsLookupTable(geography) {
const data = getGeoAliasesAndSynonymsWorksheetData(geography);
return geoAliasesAndSynonymsWorksheetDataToGeoLookupTable(data, null);
}

/**
* @hidden
*/
export function getGeoAliasesAndSynonymsWorksheetData(geography) {
if (!geoAliasesAndSynonymsDocWorksheetReferencesByGeopgraphy[geography]) {
throw new Error(`Unknown Gapminder geography: "${geography}"`);
}
const worksheetDataResponse: ListGeoAliasesAndSynonyms.Response = fetchWorksheetData(
geoAliasesAndSynonymsDocSpreadsheetId,
geoAliasesAndSynonymsDocWorksheetReferencesByGeopgraphy[geography]
);
const data = gsheetsDataApiFeedsListGeoAliasesAndSynonymsResponseToWorksheetData(
return gsheetsDataApiFeedsListGeoAliasesAndSynonymsResponseToWorksheetData(
worksheetDataResponse
);
return geoAliasesAndSynonymsWorksheetDataToGeoLookupTable(data);
}

/**
Expand All @@ -79,7 +86,9 @@ function gsheetsDataApiFeedsListGeoAliasesAndSynonymsResponseToWorksheetData(
* By trimming the lookup keys, we allow slightly fuzzy matching, such as "Foo " == "foo" and "Fóo*" == "Foo"
* @hidden
*/
export function keyNormalizerForSlightlySmarterLookups(lookupKey) {
export function keyNormalizerForSlightlySmarterLookups(
lookupKey: string
): string {
const trimmedLowerCasedWithoutDiacritics = removeDiacritics(
lookupKey.trim().toLowerCase()
);
Expand All @@ -89,16 +98,16 @@ export function keyNormalizerForSlightlySmarterLookups(lookupKey) {
/**
* @hidden
*/
function geoAliasesAndSynonymsWorksheetDataToGeoLookupTable(
data: GeoAliasesAndSynonymsWorksheetData
export function geoAliasesAndSynonymsWorksheetDataToGeoLookupTable(
data: GeoAliasesAndSynonymsWorksheetData,
normalizer: (lookupKey: string) => string
): GeoAliasesAndSynonymsLookupTable {
if (!normalizer) {
normalizer = keyNormalizerForSlightlySmarterLookups;
}
return data.rows.reduce((lookupTableAccumulator, currentValue) => {
lookupTableAccumulator[
keyNormalizerForSlightlySmarterLookups(currentValue.geo)
] = currentValue;
lookupTableAccumulator[
keyNormalizerForSlightlySmarterLookups(currentValue.alias)
] = currentValue;
lookupTableAccumulator[normalizer(currentValue.geo)] = currentValue;
lookupTableAccumulator[normalizer(currentValue.alias)] = currentValue;
return lookupTableAccumulator;
}, {});
}
Expand All @@ -110,6 +119,9 @@ export function matchColumnValuesUsingGeoAliasesAndSynonyms(
columnValues,
geography
) {
if (!geography) {
geography = "countries_etc";
}
const lookupTable = getGeoAliasesAndSynonymsLookupTable(geography);
return columnValues.map(
(inputRow): GeoAliasesAndSynonymsDataRow | MissingGeoAliasDataRow => {
Expand Down
14 changes: 14 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import { GM_AGGR } from "./GM_AGGR";
import { GM_DATA } from "./GM_DATA";
import { GM_GEO_LOOKUP_TABLE } from "./GM_GEO_LOOKUP_TABLE";
import { GM_GROWTH } from "./GM_GROWTH";
import { GM_ID } from "./GM_ID";
import { GM_IMPORT } from "./GM_IMPORT";
Expand Down Expand Up @@ -96,6 +97,19 @@ import { menuRefreshDataDependencies } from "./menuRefreshDataDependencies";
);
};

/**
* Inserts a table with Gapminder’s geo ids together with their aliases (all spellings we have seen before), including lower cased
* variants without diacritics and special characters to allow for somewhat fuzzy matching.
*
* To be used as the source range for VLOOKUP where the dataset is too large for GM_ID or GM_NAME to be used directly.
*
* @param {"countries_etc"} geography (Optional with default "countries_etc") Should be one of the sets listed in the gapminder geo ontology such as "countries_etc"
* @customfunction
*/
(global as any).GM_GEO_LOOKUP_TABLE = function(geography: string) {
return GM_GEO_LOOKUP_TABLE(geography);
};

/**
* Inserts the growth per time unit of a common Gapminder concept column, including a header row, matched against the input table range.
*
Expand Down

0 comments on commit 7638eb4

Please sign in to comment.