From 927cd70e82ca6d6552a36506d3c1f4d6278abf9e Mon Sep 17 00:00:00 2001 From: Richard Kettelerij Date: Wed, 4 Dec 2024 13:02:42 +0100 Subject: [PATCH] feat: add search endpoint - expand query parameter parsing --- internal/search/datasources/datasource.go | 5 +- .../search/datasources/postgres/postgres.go | 19 +- internal/search/domain/spatialref.go | 12 ++ internal/search/main.go | 164 +++++++++++++++--- internal/search/main_test.go | 131 ++++++++++---- 5 files changed, 264 insertions(+), 67 deletions(-) create mode 100644 internal/search/domain/spatialref.go diff --git a/internal/search/datasources/datasource.go b/internal/search/datasources/datasource.go index 955d2bb..34fa7a4 100644 --- a/internal/search/datasources/datasource.go +++ b/internal/search/datasources/datasource.go @@ -2,12 +2,15 @@ package datasources import ( "context" + + "github.com/PDOK/gomagpie/internal/search/domain" ) // Datasource knows how make different kinds of queries/actions on the underlying actual datastore. // This abstraction allows the rest of the system to stay datastore agnostic. type Datasource interface { - Suggest(ctx context.Context, suggestForThis string) ([]string, error) + Suggest(ctx context.Context, searchTerm string, collections map[string]map[string]string, + srid domain.SRID, limit int) ([]string, error) // Close closes (connections to) the datasource gracefully Close() diff --git a/internal/search/datasources/postgres/postgres.go b/internal/search/datasources/postgres/postgres.go index 5830b27..4e81662 100644 --- a/internal/search/datasources/postgres/postgres.go +++ b/internal/search/datasources/postgres/postgres.go @@ -4,6 +4,7 @@ import ( "context" "fmt" + "github.com/PDOK/gomagpie/internal/search/domain" "github.com/jackc/pgx/v5" pgxgeom "github.com/twpayne/pgx-geom" @@ -36,17 +37,17 @@ func (p *Postgres) Close() { _ = p.db.Close(p.ctx) } -func (p *Postgres) Suggest(ctx context.Context, suggestForThis string) ([]string, error) { +func (p *Postgres) Suggest(ctx context.Context, searchTerm string, _ map[string]map[string]string, _ domain.SRID, limit int) ([]string, error) { queryCtx, cancel := context.WithTimeout(ctx, p.queryTimeout) defer cancel() // Prepare dynamic full-text search query // Split terms by spaces and append :* to each term - terms := strings.Fields(suggestForThis) + terms := strings.Fields(searchTerm) for i, term := range terms { terms[i] = term + ":*" } - searchTerm := strings.Join(terms, " & ") + searchTermForPostgres := strings.Join(terms, " & ") sqlQuery := fmt.Sprintf( `SELECT @@ -56,17 +57,15 @@ func (p *Postgres) Suggest(ctx context.Context, suggestForThis string) ([]string FROM ( SELECT display_name, ts_rank_cd(ts, to_tsquery('%[1]s'), 1) AS rank, - ts_headline('dutch', suggest, to_tsquery('%[2]s')) AS highlighted_text - FROM - %[3]s - WHERE ts @@ to_tsquery('%[4]s') LIMIT 500 + ts_headline('dutch', suggest, to_tsquery('%[1]s')) AS highlighted_text + FROM %[2]s + WHERE ts @@ to_tsquery('%[1]s') LIMIT 500 ) r GROUP BY display_name - ORDER BY rank DESC, display_name ASC LIMIT 50`, - searchTerm, searchTerm, p.searchIndex, searchTerm) + ORDER BY rank DESC, display_name ASC LIMIT $1`, searchTermForPostgres, p.searchIndex) // Execute query - rows, err := p.db.Query(queryCtx, sqlQuery) + rows, err := p.db.Query(queryCtx, sqlQuery, limit) if err != nil { return nil, fmt.Errorf("query '%s' failed: %w", sqlQuery, err) } diff --git a/internal/search/domain/spatialref.go b/internal/search/domain/spatialref.go new file mode 100644 index 0000000..5a666a6 --- /dev/null +++ b/internal/search/domain/spatialref.go @@ -0,0 +1,12 @@ +package domain + +const ( + CrsURIPrefix = "http://www.opengis.net/def/crs/" + UndefinedSRID = 0 + WGS84SRIDPostgis = 4326 // Use the same SRID as used during ETL + WGS84CodeOGC = "CRS84" +) + +// SRID Spatial Reference System Identifier: a unique value to unambiguously identify a spatial coordinate system. +// For example '28992' in https://www.opengis.net/def/crs/EPSG/0/28992 +type SRID int diff --git a/internal/search/main.go b/internal/search/main.go index 28b4610..ba63fca 100644 --- a/internal/search/main.go +++ b/internal/search/main.go @@ -1,18 +1,37 @@ package search import ( + "context" + "errors" + "fmt" "log" "net/http" "net/url" + "regexp" + "strconv" "strings" "time" "github.com/PDOK/gomagpie/internal/engine" ds "github.com/PDOK/gomagpie/internal/search/datasources" "github.com/PDOK/gomagpie/internal/search/datasources/postgres" + "github.com/PDOK/gomagpie/internal/search/domain" ) -const timeout = time.Second * 15 +const ( + queryParam = "q" + limitParam = "limit" + crsParam = "crs" + + limitDefault = 10 + limitMax = 50 + + timeout = time.Second * 15 +) + +var ( + deepObjectParamRegex = regexp.MustCompile(`\w+\[\w+]`) +) type Search struct { engine *engine.Engine @@ -31,31 +50,42 @@ func NewSearch(e *engine.Engine, dbConn string, searchIndex string) *Search { // Suggest autosuggest locations based on user input func (s *Search) Suggest() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { - params := parseQueryParams(r.URL.Query()) - searchQuery := params["q"] - delete(params, "q") - format := params["f"] - delete(params, "f") - crs := params["crs"] - delete(params, "crs") - limit := params["limit"] - delete(params, "limit") - - log.Printf("crs %s, limit %d, format %s, query %s, params %v", crs, limit, format, searchQuery, params) - - suggestions, err := s.datasource.Suggest(r.Context(), searchQuery.(string)) // TODO check before casting + collections, searchTerm, outputSRID, limit, err := parseQueryParams(r.URL.Query()) if err != nil { - engine.RenderProblem(engine.ProblemServerError, w, err.Error()) + engine.RenderProblem(engine.ProblemBadRequest, w, err.Error()) + return + } + suggestions, err := s.datasource.Suggest(r.Context(), searchTerm, collections, outputSRID, limit) + if err != nil { + handleQueryError(w, err) + return + } + format := s.engine.CN.NegotiateFormat(r) + switch format { + case engine.FormatGeoJSON, engine.FormatJSON: + serveJSON(suggestions, engine.MediaTypeGeoJSON, w) + default: + engine.RenderProblem(engine.ProblemNotAcceptable, w, fmt.Sprintf("format '%s' is not supported", format)) return } - serveJSON(suggestions, engine.MediaTypeGeoJSON, w) } } -func parseQueryParams(query url.Values) map[string]any { - result := make(map[string]any, len(query)) +func parseQueryParams(query url.Values) (collectionsWithParams map[string]map[string]string, searchTerm string, outputSRID domain.SRID, limit int, err error) { + err = validateNoUnknownParams(query) + if err != nil { + return + } + searchTerm, searchTermErr := parseSearchTerm(query) + collectionsWithParams = parseCollectionDeepObjectParams(query) + outputSRID, outputSRIDErr := parseCrsToSRID(query, crsParam) + limit, limitErr := parseLimit(query) + err = errors.Join(searchTermErr, limitErr, outputSRIDErr) + return +} - deepObjectParams := make(map[string]map[string]string) +func parseCollectionDeepObjectParams(query url.Values) map[string]map[string]string { + deepObjectParams := make(map[string]map[string]string, len(query)) for key, values := range query { if strings.Contains(key, "[") { // Extract deepObject parameters @@ -67,15 +97,17 @@ func parseQueryParams(query url.Values) map[string]any { deepObjectParams[mainKey] = make(map[string]string) } deepObjectParams[mainKey][subKey] = values[0] - } else { - // Extract regular (flat) parameters - result[key] = values[0] } } - for mainKey, subParams := range deepObjectParams { - result[mainKey] = subParams + return deepObjectParams +} + +func parseSearchTerm(query url.Values) (searchTerm string, err error) { + searchTerm = query.Get(queryParam) + if searchTerm == "" { + err = fmt.Errorf("no search term provided, '%s' query parameter is required", queryParam) } - return result + return } func newDatasource(e *engine.Engine, dbConn string, searchIndex string) ds.Datasource { @@ -86,3 +118,85 @@ func newDatasource(e *engine.Engine, dbConn string, searchIndex string) ds.Datas e.RegisterShutdownHook(datasource.Close) return datasource } + +// log error, but send generic message to client to prevent possible information leakage from datasource +func handleQueryError(w http.ResponseWriter, err error) { + msg := "failed to fulfill search request" + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + // provide more context when user hits the query timeout + msg += ": querying took too long (timeout encountered). Simplify your request and try again, or contact support" + } + log.Printf("%s, error: %v\n", msg, err) + engine.RenderProblem(engine.ProblemServerError, w, msg) // don't include sensitive information in details msg +} + +// implements req 7.6 (https://docs.ogc.org/is/17-069r4/17-069r4.html#query_parameters) +func validateNoUnknownParams(query url.Values) error { + copyParams := clone(query) + copyParams.Del(engine.FormatParam) + copyParams.Del(queryParam) + copyParams.Del(limitParam) + copyParams.Del(crsParam) + for key := range query { + if deepObjectParamRegex.MatchString(key) { + copyParams.Del(key) + } + } + if len(copyParams) > 0 { + return fmt.Errorf("unknown query parameter(s) found: %v", copyParams.Encode()) + } + return nil +} + +func clone(params url.Values) url.Values { + copyParams := url.Values{} + for k, v := range params { + copyParams[k] = v + } + return copyParams +} + +func parseCrsToSRID(params url.Values, paramName string) (domain.SRID, error) { + param := params.Get(paramName) + if param == "" { + return domain.UndefinedSRID, nil + } + param = strings.TrimSpace(param) + if !strings.HasPrefix(param, domain.CrsURIPrefix) { + return domain.UndefinedSRID, fmt.Errorf("%s param should start with %s, got: %s", paramName, domain.CrsURIPrefix, param) + } + var srid domain.SRID + lastIndex := strings.LastIndex(param, "/") + if lastIndex != -1 { + crsCode := param[lastIndex+1:] + if crsCode == domain.WGS84CodeOGC { + return domain.WGS84SRIDPostgis, nil // CRS84 is WGS84, just like EPSG:4326 (only axis order differs but SRID is the same) + } + val, err := strconv.Atoi(crsCode) + if err != nil { + return 0, fmt.Errorf("expected numerical CRS code, received: %s", crsCode) + } + srid = domain.SRID(val) + } + return srid, nil +} + +func parseLimit(params url.Values) (int, error) { + limit := limitDefault + var err error + if params.Get(limitParam) != "" { + limit, err = strconv.Atoi(params.Get(limitParam)) + if err != nil { + err = errors.New("limit must be numeric") + } + // "If the value of the limit parameter is larger than the maximum value, this SHALL NOT result + // in an error (instead use the maximum as the parameter value)." + if limit > limitMax { + limit = limitMax + } + } + if limit < 0 { + err = errors.New("limit can't be negative") + } + return limit, err +} diff --git a/internal/search/main_test.go b/internal/search/main_test.go index aedf1fa..20cb570 100644 --- a/internal/search/main_test.go +++ b/internal/search/main_test.go @@ -24,6 +24,8 @@ import ( "github.com/testcontainers/testcontainers-go/wait" ) +const testSearchIndex = "search_index" + func init() { // change working dir to root _, filename, _, _ := runtime.Caller(0) @@ -40,7 +42,7 @@ func TestSuggest(t *testing.T) { } ctx := context.Background() - // given postgres available + // given available postgres dbPort, postgisContainer, err := setupPostgis(ctx, t) if err != nil { t.Error(err) @@ -49,42 +51,110 @@ func TestSuggest(t *testing.T) { dbConn := fmt.Sprintf("postgres://postgres:postgres@127.0.0.1:%d/%s?sslmode=disable", dbPort.Int(), "test_db") + // given available engine + eng, err := engine.NewEngine("internal/etl/testdata/config.yaml", false, false) + assert.NoError(t, err) + + // given search endpoint + searchEndpoint := NewSearch(eng, dbConn, testSearchIndex) + // given empty search index - err = etl.CreateSearchIndex(dbConn, "search_index") + err = etl.CreateSearchIndex(dbConn, testSearchIndex) assert.NoError(t, err) - // given imported gpkg + // given imported geopackage cfg, err := config.NewConfig("internal/etl/testdata/config.yaml") assert.NoError(t, err) table := config.FeatureTable{Name: "addresses", FID: "fid", Geom: "geom"} - err = etl.ImportFile(cfg, "search_index", "internal/etl/testdata/addresses-crs84.gpkg", table, 1000, dbConn) - assert.NoError(t, err) - - // given engine available - e, err := engine.NewEngine("internal/etl/testdata/config.yaml", false, false) + err = etl.ImportFile(cfg, testSearchIndex, "internal/etl/testdata/addresses-crs84.gpkg", table, 1000, dbConn) assert.NoError(t, err) - // given server available - rr, ts := createMockServer() - defer ts.Close() - - // when perform autosuggest - searchEndpoint := NewSearch(e, dbConn, "search_index") - handler := searchEndpoint.Suggest() - req, err := createRequest("http://localhost:8080/search/suggest?q=\"Oudeschild\"") - assert.NoError(t, err) - handler.ServeHTTP(rr, req) - - // then - assert.Equal(t, 200, rr.Code) - assert.JSONEq(t, `[ - "Barentszstraat, 1792AD Oudeschild", - "Bolwerk, 1792AS Oudeschild", - "Commandeurssingel, 1792AV Oudeschild", - "De Houtmanstraat, 1792BC Oudeschild", - "De Ruyterstraat, 1792AP Oudeschild", - "De Wittstraat, 1792BP Oudeschild" - ]`, rr.Body.String()) + // run test cases + type fields struct { + url string + } + type want struct { + body string + statusCode int + } + tests := []struct { + name string + fields fields + want want + }{ + { + name: "Suggest: Oudeschild", + fields: fields{ + url: "http://localhost:8080/search/suggest?q=\"Oudeschild\"&limit=50", + }, + want: want{ + body: `[ + "Barentszstraat, 1792AD Oudeschild", + "Bolwerk, 1792AS Oudeschild", + "Commandeurssingel, 1792AV Oudeschild", + "De Houtmanstraat, 1792BC Oudeschild", + "De Ruyterstraat, 1792AP Oudeschild", + "De Wittstraat, 1792BP Oudeschild" + ]`, + statusCode: http.StatusOK, + }, + }, + { + name: "Suggest: Den ", + fields: fields{ + url: "http://localhost:8080/search/suggest?q=\"Den\"&limit=50", + }, + want: want{ + body: `[ + "Abbewaal, 1791WZ Den Burg", + "Achterom, 1791AN Den Burg", + "Akenbuurt, 1791PJ Den Burg", + "Amaliaweg, 1797SW Den Hoorn", + "Bakkenweg, 1797RJ Den Hoorn", + "Beatrixlaan, 1791GE Den Burg", + "Ada van Hollandstraat, 1791DH Den Burg", + "Anne Frankstraat, 1791DT Den Burg" + ]`, + statusCode: http.StatusOK, + }, + }, + { + name: "Suggest: Den. With deepCopy params", + fields: fields{ + url: "http://localhost:8080/search/suggest?q=\"Den\"&weg[version]=2&weg[relevance]=0.8&adres[version]=1&adres[relevance]=1&limit=10&f=json&crs=http%3A%2F%2Fwww.opengis.net%2Fdef%2Fcrs%2FEPSG%2F0%2F28992", + }, + want: want{ + body: `[ + "Abbewaal, 1791WZ Den Burg", + "Achterom, 1791AN Den Burg", + "Akenbuurt, 1791PJ Den Burg", + "Amaliaweg, 1797SW Den Hoorn", + "Bakkenweg, 1797RJ Den Hoorn", + "Beatrixlaan, 1791GE Den Burg", + "Ada van Hollandstraat, 1791DH Den Burg", + "Anne Frankstraat, 1791DT Den Burg" + ]`, + statusCode: http.StatusOK, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // given available server + rr, ts := createMockServer() + defer ts.Close() + + // when + handler := searchEndpoint.Suggest() + req, err := createRequest(tt.fields.url) + assert.NoError(t, err) + handler.ServeHTTP(rr, req) + + // then + assert.Equal(t, tt.want.statusCode, rr.Code) + assert.JSONEq(t, tt.want.body, rr.Body.String()) + }) + } } func setupPostgis(ctx context.Context, t *testing.T) (nat.Port, testcontainers.Container, error) { @@ -154,7 +224,6 @@ func createRequest(url string) (*http.Request, error) { if req == nil || err != nil { return req, err } - rctx := chi.NewRouteContext() - req = req.WithContext(context.WithValue(req.Context(), chi.RouteCtxKey, rctx)) + req = req.WithContext(context.WithValue(req.Context(), chi.RouteCtxKey, chi.NewRouteContext())) return req, err }