diff --git a/arches/app/media/js/viewmodels/map.js b/arches/app/media/js/viewmodels/map.js index eac6659c0a3..d3f6dbf9539 100644 --- a/arches/app/media/js/viewmodels/map.js +++ b/arches/app/media/js/viewmodels/map.js @@ -12,6 +12,261 @@ define([ const viewModel = function(params) { var self = this; + const searchLayerIds = [ + 'searchtiles-unclustered-polygon-fill', + 'searchtiles-unclustered-point', + 'searchtiles-clusters', + 'searchtiles-clusters-halo', + 'searchtiles-cluster-count', + 'searchtiles-unclustered-polypoint' + ]; + const searchLayerDefinitions = [ + { + "id": "searchtiles-unclustered-polygon-fill", + "type": "fill", + "paint": { + "fill-color": "#fa6003", + "fill-opacity": 0.3, + "fill-outline-color": "#fa6003" + }, + "filter": [ + "==", + "$type", + "Polygon" + ], + "source": "search-layer-source", + "source-layer": "search_layer", + "minzoom": 10, + "tolerance": 0.75 + }, + { + "id": "searchtiles-unclustered-point", + "type": "circle", + "paint": { + "circle-color": "#fa6003", + "circle-radius": 6, + "circle-opacity": 1 + }, + "filter": [ + "!", + [ + "has", + "point_count" + ] + ], + "source": "search-layer-source", + "source-layer": "search_layer" + }, + { + "id": "searchtiles-clusters", + "type": "circle", + "paint": { + "circle-color": "#fa6003", + "circle-radius": [ + "step", + [ + "get", + "point_count" + ], + 10, + 100, + 20, + 750, + 30, + 1500, + 40, + 2500, + 50, + 5000, + 65 + ], + "circle-opacity": [ + "case", + [ + "boolean", + [ + "has", + "point_count" + ], + true + ], + 1, + 0 + ] + }, + "filter": [ + "all", + [ + "==", + "$type", + "Point" + ], + [ + "!=", + "highlight", + true + ] + ], + "source": "search-layer-source", + "source-layer": "search_layer" + }, + { + "id": "searchtiles-clusters-halo", + "type": "circle", + "paint": { + "circle-color": "#fa6003", + "circle-radius": [ + "step", + [ + "get", + "point_count" + ], + 20, + 100, + 30, + 750, + 40, + 1500, + 50, + 2500, + 60, + 5000, + 75 + ], + "circle-opacity": [ + "case", + [ + "boolean", + [ + "has", + "point_count" + ], + true + ], + 0.5, + 0 + ] + }, + "filter": [ + "all", + [ + "==", + "$type", + "Point" + ], + [ + "!=", + "highlight", + true + ] + ], + "maxzoom": 14, + "source": "search-layer-source", + "source-layer": "search_layer" + }, + { + "id": "searchtiles-cluster-count", + "type": "symbol", + "paint": { + "text-color": "#fff" + }, + "filter": [ + "has", + "point_count" + ], + "layout": { + "text-font": [ + "DIN Offc Pro Medium", + "Arial Unicode MS Bold" + ], + "text-size": 14, + "text-field": "{point_count}" + }, + "maxzoom": 14, + "source": "search-layer-source", + "source-layer": "search_layer" + }, + { + "id": "searchtiles-unclustered-polypoint", + "type": "circle", + "paint": { + "circle-color": "#fa6003", + "circle-radius": 0, + "circle-opacity": 0, + "circle-stroke-color": "#fff", + "circle-stroke-width": 0 + }, + "filter": [ + "!", + [ + "has", + "point_count" + ] + ], + "layout": { + "visibility": "none" + }, + "source": "search-layer-source", + "source-layer": "search_layer" + } + ]; + this.searchQueryId = params.searchQueryId; + this.searchQueryId.subscribe(function (searchId) { + if (searchId) { + self.addSearchLayer(searchId); + } else { + // optionally, remove the search layer if searchId becomes undefined + self.removeSearchLayer(); + } + }); + + this.addSearchLayer = function (searchId) { + console.log(searchId); + if (!self.map()) + return; + const tileUrlTemplate = `http://localhost:8000/search-layer/{z}/{x}/{y}.pbf?searchid=${encodeURIComponent(searchId)}`; + + // Remove existing source and layer if they exist + searchLayerIds.forEach(layerId => { + if (self.map().getLayer(layerId)) { + self.map().removeLayer(layerId); + } + if (self.map().getSource(layerId)) { + self.map().removeSource(layerId); + } + }); + if (self.map().getSource('search-layer-source')) { + self.map().removeSource('search-layer-source'); + } + + // Add the vector tile source + self.map().addSource('search-layer-source', { + type: 'vector', + tiles: [tileUrlTemplate], + minzoom: 0, + maxzoom: 22, + }); + + // Add the layer to display the data + searchLayerDefinitions.forEach(mapLayer => { + self.map().addLayer(mapLayer); + }); + + // Optionally, fit the map to the data bounds + // self.fitMapToDataBounds(searchId); + }; + + this.removeSearchLayer = function () { + searchLayerDefinitions.forEach(mapLayer => { + if (self.map().getLayer(mapLayer.id)) { + self.map().removeLayer(mapLayer.id); + } + }); + if (self.map().getSource('search-layer-source')) { + self.map().removeSource('search-layer-source'); + } + }; + var geojsonSourceFactory = function() { return { @@ -62,6 +317,10 @@ define([ map.fitBounds(ko.unwrap(params.bounds), boundingOptions); } + // If searchQueryId is already available, add the search layer + if (self.searchQueryId()) { + self.addSearchLayer(self.searchQueryId()); + } }); this.bounds = ko.observable(ko.unwrap(params.bounds) || arches.hexBinBounds); diff --git a/arches/app/media/js/views/components/search/map-filter.js b/arches/app/media/js/views/components/search/map-filter.js index 2f00cce545d..d3ca1e20018 100644 --- a/arches/app/media/js/views/components/search/map-filter.js +++ b/arches/app/media/js/views/components/search/map-filter.js @@ -40,6 +40,7 @@ define([ options.name = "Map Filter"; BaseFilter.prototype.initialize.call(this, options); + options.searchQueryId = this.searchQueryId; options.sources = { "geojson-search-buffer-data": { "type": "geojson", @@ -369,14 +370,6 @@ define([ this.updateFilter(); }, this); - this.searchAggregations.subscribe(this.updateSearchResultsLayers, this); - if (ko.isObservable(bins)) { - bins.subscribe(this.updateSearchResultsLayers, this); - } - if (this.searchAggregations()) { - this.updateSearchResultsLayers(); - } - this.mouseoverInstanceId.subscribe(updateSearchResultPointLayer); }, this); }, diff --git a/arches/app/media/js/views/components/search/standard-search-view.js b/arches/app/media/js/views/components/search/standard-search-view.js index e311bcf4382..7134caf1939 100644 --- a/arches/app/media/js/views/components/search/standard-search-view.js +++ b/arches/app/media/js/views/components/search/standard-search-view.js @@ -15,6 +15,8 @@ define([ this.selectedPopup = ko.observable(''); this.sharedStateObject.selectedPopup = this.selectedPopup; + this.searchQueryId = ko.observable(null); + this.sharedStateObject.searchQueryId = this.searchQueryId; var firstEnabledFilter = _.find(this.sharedStateObject.searchFilterConfigs, function(filter) { return filter.config.layoutType === 'tabbed'; }, this); @@ -51,6 +53,47 @@ define([ this.searchFilterVms[componentName](this); }, + doQuery: function() { + const queryObj = JSON.parse(this.queryString()); + if (self.updateRequest) { self.updateRequest.abort(); } + self.updateRequest = $.ajax({ + type: "GET", + url: arches.urls.search_results, + data: queryObj, + context: this, + success: function(response) { + _.each(this.sharedStateObject.searchResults, function(value, key, results) { + if (key !== 'timestamp') { + delete this.sharedStateObject.searchResults[key]; + } + }, this); + _.each(response, function(value, key, response) { + if (key !== 'timestamp') { + this.sharedStateObject.searchResults[key] = value; + } + }, this); + this.sharedStateObject.searchResults.timestamp(response.timestamp); + this.searchQueryId(this.sharedStateObject.searchResults.searchqueryid); + this.sharedStateObject.userIsReviewer(response.reviewer); + this.sharedStateObject.userid(response.userid); + this.sharedStateObject.total(response.total_results); + this.sharedStateObject.hits(response.results.hits.hits.length); + this.sharedStateObject.alert(false); + }, + error: function(response, status, error) { + const alert = new AlertViewModel('ep-alert-red', arches.translations.requestFailed.title, response.responseJSON?.message); + if(self.updateRequest.statusText !== 'abort'){ + this.alert(alert); + } + this.sharedStateObject.loading(false); + }, + complete: function(request, status) { + self.updateRequest = undefined; + window.history.pushState({}, '', '?' + $.param(queryObj).split('+').join('%20')); + this.sharedStateObject.loading(false); + } + }); + }, }); return ko.components.register(componentName, { diff --git a/arches/app/search/components/standard_search_view.py b/arches/app/search/components/standard_search_view.py index 6af783b1af6..59ac59f5fbb 100644 --- a/arches/app/search/components/standard_search_view.py +++ b/arches/app/search/components/standard_search_view.py @@ -19,6 +19,7 @@ from django.utils.translation import gettext as _ from datetime import datetime import logging +import json details = { @@ -136,6 +137,86 @@ def append_dsl(self, search_query_object, **kwargs): if load_tiles: search_query_object["query"].include("tiles") + def set_search_pit(self, search_query_object, se, cache, **kwargs): + query_obj = kwargs.get("search_request_object", self.request.GET) + resourceids_only_query_hash_key = create_searchresults_cache_key( + self.request, query_obj, resourceids_only=True + ) + pit_response = se.es.open_point_in_time( + index=RESOURCES_INDEX, keep_alive="2m" # Adjust as needed + ) + pit_id = pit_response["pit_id"] + + # Perform the search + search_params = { + # Your search query parameters + } + + search_response = search_query_object["query"].search( + index=RESOURCES_INDEX, + body=search_params, + pit={"id": pit_id, "keep_alive": "2m"}, + size=1000, # Adjust as needed + ) + # TODO: how can I cache the search query itself? The QueryObject is really hard to serialize + # could just re-instantiate the filters from the search_layer to regenerate the QueryObject from scratch + + # Cache the pit_id and search parameters + cache.set( + resourceids_only_query_hash_key, + json.dumps({"pit_id": pit_id, "search_params": search_params}), + timeout=120, + ) + return resourceids_only_query_hash_key + + def execute_resourceids_only_query( + self, search_query_object, response_object, cache, **kwargs + ): + # cached_response_json = cache.get(cache_key) + query_obj = kwargs.get("search_request_object", self.request.GET) + resourceids_only_query_hash_key = create_searchresults_cache_key( + self.request, query_obj, resourceids_only=True + ) + # did we already cache result resourceids for this query under this query hash? + cached_result_resourceids = cache.get(resourceids_only_query_hash_key) + if ( + cached_result_resourceids + ): # we already did the work here; we'll return the hash key + return resourceids_only_query_hash_key + else: + print( + f"no cached resourceids for hashkey {resourceids_only_query_hash_key}" + ) + + if resourceinstanceid is None: + results = search_query_object["query"].search( + index=RESOURCES_INDEX, limit=10000, scroll="1m" + ) + scroll_id = results["_scroll_id"] + scroll_size = results["hits"]["total"]["value"] + total_results = results["hits"]["total"]["value"] + if query_obj.get("paging-filter", None) is None: + while scroll_size > 0: + page = search_query_object["query"].se.es.scroll( + scroll_id=scroll_id, scroll="3m" + ) + scroll_size = len(page["hits"]["hits"]) + results["hits"]["hits"] += page["hits"]["hits"] + else: + results = search_query_object["query"].search( + index=RESOURCES_INDEX, id=resourceinstanceid + ) + total_results = 1 + + if results is not None: + all_resourceids = [hit["_id"] for hit in results["hits"]["hits"]] + cache.set( + resourceids_only_query_hash_key, + json.dumps(all_resourceids), + settings.SEARCH_RESULTS_CACHE_TIMEOUT, + ) + return resourceids_only_query_hash_key + def execute_query(self, search_query_object, response_object, **kwargs): for_export = get_str_kwarg_as_bool("export", self.request.GET) pages = self.request.GET.get("pages", None) @@ -232,6 +313,53 @@ def handle_search_results_query( if returnDsl: return response_object, search_query_object + # at this point we want to FIRST do an unlimited query to get all resourceids + # of the results + # THEN SECOND we want to do a second query to get a rich set of results only for the page + unpaged_query = None + search_query_object["query"].include("tiles") + for_export = get_str_kwarg_as_bool("export", sorted_query_obj) + if not for_export: + resourceids_only_query_hash_key = self.execute_resourceids_only_query( + search_query_object, + response_object, + cache, + search_request_object=sorted_query_obj, + resourceinstanceid=resourceinstanceid, + ) + + # now I know the resourceids have been cached under the resourceids_only_query_hash_key + # I should set a start/end limit for the second query + paging_filter = search_filter_factory.get_filter("paging-filter") + if paging_filter: + paging_filter.append_dsl( + search_query_object, + permitted_nodegroups=permitted_nodegroups, + include_provisional=include_provisional, + load_tiles=load_tiles, + for_export=for_export, + querystring=sorted_query_obj.get("paging-filter", "{}"), + search_request_object=sorted_query_obj, + ) + + search_query_object["query"].include("graph_id") + # if geom_only or for_export or map_manager or load_tiles: + search_query_object["query"].include("geometries") + search_query_object["query"].include("points") + # if not geom_only: + for prop in essential_result_properties: + search_query_object["query"].include(prop) + # if load_tiles: + # search_query_object["query"].include("tiles") + search_query_object["query"].include("resourceinstanceid") + + self.execute_paged_query( + search_query_object, + response_object, + search_request_object=sorted_query_obj, + resourceinstanceid=resourceinstanceid, + ) + for filter_type, querystring in list(sorted_query_obj.items()): search_filter = search_filter_factory.get_filter(filter_type) if search_filter: diff --git a/arches/app/views/search_layer.py b/arches/app/views/search_layer.py new file mode 100644 index 00000000000..2db577a43e4 --- /dev/null +++ b/arches/app/views/search_layer.py @@ -0,0 +1,249 @@ +import math +from django.views import View + +from django.core.cache import caches +from arches.app.models.system_settings import settings +from django.utils.translation import gettext as _ + +from arches.app.search.search_engine_factory import SearchEngineFactory +from arches.app.search.elasticsearch_dsl_builder import ( + Query, + Bool, + GeoShape, + Nested, + GeoTileGridAgg, + NestedAgg, + Aggregation, +) + +# from django.db import connection +from django.http import Http404, HttpResponse +from arches.app.utils.betterJSONSerializer import JSONDeserializer +from pprint import pprint + +# from django.contrib.gis.geos import Polygon +from datetime import datetime, timedelta +from time import time +import mercantile +import mapbox_vector_tile + +ZOOM_THRESHOLD = 14 +EXTENT = 4096 + + +class SearchLayer(View): + def get(self, request, zoom, x, y): + start = time() + print(f"ZOOM: {zoom}") + searchid = request.GET.get("searchid", None) + if not searchid: + print("NO SEARCHID FOUND ON REQUEST") + raise Http404(_("Missing 'searchid' query parameter.")) + + EARTHCIRCUM = 40075016.6856 + PIXELSPERTILE = 256 + cache = caches["default"] + pit_id = cache.get(searchid + "_pit") + query_dsl = cache.get(searchid + "_dsl") + # pprint(query_dsl) + # {"pit_id": pit_id, "dsl": query.dsl} + if pit_id is None or query_dsl is None: + print(f"no resourceids found in cache for searchid: {searchid}") + raise Http404(_("Missing resourceids from search cache.")) + + se = SearchEngineFactory().create() + query_dsl = JSONDeserializer().deserialize(query_dsl, indent=4) + new_query = Query(se, limit=0) + new_query.prepare() + new_query.dsl = query_dsl + # spatial_query = Bool() + # if int(y) == 203: + # print("\n\n\nwhats my new query\n\n\n") + # pprint(new_query.__str__()) + tile_x = int(x) + tile_y = int(y) + tile_z = int(zoom) + tile_bounds = mercantile.bounds(tile_x, tile_y, tile_z) + bbox = ( + tile_bounds.west, + tile_bounds.south, + tile_bounds.east, + tile_bounds.north, + ) + geo_bbox_query = { + "geo_bounding_box": { + "points.point": { + "top_left": {"lat": tile_bounds.north, "lon": tile_bounds.west}, + "bottom_right": {"lat": tile_bounds.south, "lon": tile_bounds.east}, + } + } + } + + if int(zoom) < ZOOM_THRESHOLD: + + geotile_agg = GeoTileGridAgg( + precision=int(zoom), field="points.point", size=10000 + ) + centroid_agg = Aggregation( + type="geo_centroid", name="centroid", field="points.point" + ) + geotile_agg.add_aggregation(centroid_agg) + nested_agg = NestedAgg(path="points", name="geo_aggs") + nested_agg.add_aggregation(geotile_agg) + + # Build the filter aggregation + geo_filter_agg = Aggregation( + type="filter", + name="geo_filter", + filter=Nested(path="points", query=geo_bbox_query).dsl, + ) + + # Add the geotile_grid aggregation under the filter aggregation + geo_filter_agg.add_aggregation(geotile_agg) + + # Update the nested aggregation + nested_agg = NestedAgg(path="points", name="geo_aggs") + nested_agg.add_aggregation(geo_filter_agg) + new_query.add_aggregation(nested_agg) + + # pit doesn't allow scroll context or index + new_query.dsl["source_includes"] = [] + new_query.dsl["size"] = 0 + # if int(y) == 203: + # pprint(new_query.dsl) + results = se.es.search( + pit={"id": pit_id, "keep_alive": "2m"}, _source=False, **new_query.dsl + ) + elapsed = time() - start + # print( + # "_______Time to finish search_layer search 1 (total: {0}) = {1}".format(results["hits"]["total"]["value"], timedelta(seconds=elapsed)) + # ) + # print("search done") + # print(results["hits"]["total"]) + # pprint(results) + features = [] + buckets = results["aggregations"]["geo_aggs"]["geo_filter"]["zoomed_grid"][ + "buckets" + ] + # print(f"Number of buckets: {len(buckets)}") + + for bucket in buckets: + centroid = bucket["centroid"]["location"] + lon = centroid["lon"] + lat = centroid["lat"] + doc_count = bucket["doc_count"] + # px, py = lnglat_to_tile_px(lon, lat, tile_x, tile_y, tile_z, EXTENT) + + feature = { + "geometry": {"type": "Point", "coordinates": [lon, lat]}, + "properties": {"count": doc_count}, + } + + features.append(feature) + + layers = [ + { + "name": "clusters", # Layer name + "features": features, + "version": 2, + "extent": EXTENT, + } + ] + else: + # Fetch individual features + # Add the spatial filter to the query + points_spatial_query = Nested(path="points", query=geo_bbox_query) + # new_query.add_query(spatial_query) + + geometries_spatial_query = Nested(path="geometries", query=geo_bbox_query) + spatial_bool_query = Bool() + spatial_bool_query.should(points_spatial_query) + spatial_bool_query.should(geometries_spatial_query) + new_query.add_query(spatial_bool_query) + + new_query.dsl["size"] = 10000 + + new_query.include("points.point") + new_query.include("geometries.geom") + # new_query.include("resourceinstanceid") + # Add other fields if needed + + # Execute the search + results = se.es.search( + pit={"id": pit_id, "keep_alive": "2m"}, **new_query.dsl + ) + + # Process the hits to generate features + features = [] + point_features = [] + geometry_features = [] + + for hit in results["hits"]["hits"]: + source = hit["_source"] + resource_id = hit.get("_id") + + # Handle points + points = source.get("points", []) + for point in points: + point_geom = point.get("point") + if point_geom: + lon = point_geom.get("lon") + lat = point_geom.get("lat") + if lon and lat: + feature = { + "geometry": { + "type": "Point", + "coordinates": [lon, lat], + }, + "properties": { + "resourceinstanceid": resource_id, + "count": 1, + }, + } + point_features.append(feature) + geometries = source.get("geometries", []) + for geometry in geometries: + geom = geometry.get("geom") + if geom: + geom_type = geom.get("type") + coordinates = geom.get("coordinates") + if coordinates: + feature = { + "geometry": { + "type": geom_type, + "coordinates": coordinates, + }, + "properties": {"resourceinstanceid": resource_id}, + } + pprint(feature) + geometry_features.append(feature) + + # Build layers + layers = [] + + if point_features: + point_layer = { + "name": "points", + "features": point_features, + "version": 2, + "extent": EXTENT, + } + layers.append(point_layer) + + if geometry_features: + geometry_layer = { + "name": "geometries", + "features": geometry_features, + "version": 2, + "extent": EXTENT, + } + layers.append(geometry_layer) + + tile = mapbox_vector_tile.encode( + layers, quantize_bounds=bbox, y_coord_down=True, extents=EXTENT + ) + return HttpResponse(tile, content_type="application/vnd.mapbox-vector-tile") + + +def create_searchlayer_mvt_cache_key(searchid_hash, zoom, x, y, user): + return f"searchlayer_mvt_{searchid_hash}_{zoom}_{x}_{y}_{user}" diff --git a/arches/urls.py b/arches/urls.py index e2772c1999c..29fb817f887 100644 --- a/arches/urls.py +++ b/arches/urls.py @@ -56,6 +56,7 @@ ResourceActivityStreamPageView, ResourceActivityStreamCollectionView, ) +from arches.app.views.search_layer import SearchLayer from arches.app.views.plugin import PluginView from arches.app.views.workflow_history import WorkflowHistoryView from arches.app.views.concept import RDMView @@ -675,6 +676,11 @@ api.MVT.as_view(), name="mvt", ), + path( + "search-layer///.pbf", + SearchLayer.as_view(), + name="search_layer", + ), re_path(r"^images$", api.Images.as_view(), name="images"), re_path( r"^ontology_properties$",