Skip to content

Commit

Permalink
Add first class support to Wikidata in the backend (#2075)
Browse files Browse the repository at this point in the history
* Main commit which adds support to wikidata POIs.

* Added TODO

* Add support for iNature reference merge

* Remove update mechanism

* Add support for wikidata in website links

* Improve text for merge algorithm

* Add tests for wikidata and iNature merger

* Improve coverage, remove todo comments.
  • Loading branch information
HarelM authored Nov 13, 2024
1 parent 36c817b commit f03ad4d
Show file tree
Hide file tree
Showing 23 changed files with 480 additions and 153 deletions.
17 changes: 6 additions & 11 deletions IsraelHiking.API/Controllers/PointsOfInterestController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -240,19 +240,14 @@ public Task<IFeature> GetClosestPoint(string location, string source, string lan
[Route("updates/{lastModified}/")]
[Route("updates/{lastModified}/{modifiedUntil}")]
[HttpGet]
public async Task<UpdatesResponse> GetPointOfInterestUpdates(DateTime lastModified, DateTime? modifiedUntil)
[Obsolete("Remove by 5.2025")]
public UpdatesResponse GetPointOfInterestUpdates(DateTime lastModified, DateTime? modifiedUntil)
{
var response = await _pointsOfInterestProvider.GetUpdates(lastModified, modifiedUntil ?? DateTime.Now);
var imageUrls = new List<string>();
foreach (var feature in response.Features)
return new UpdatesResponse
{
var currentImageUrls = feature.Attributes.GetNames()
.Where(a => a.StartsWith(FeatureAttributes.IMAGE_URL))
.Select(k => feature.Attributes[k].ToString());
imageUrls.AddRange(currentImageUrls.ToList());
}
response.Images = await _imageUrlStoreExecutor.GetAllImagesForUrls(imageUrls.ToArray());
return response;
Features = Array.Empty<IFeature>(),
Images = Array.Empty<ImageItem>()
};
}

/// <summary>
Expand Down
88 changes: 80 additions & 8 deletions IsraelHiking.API/Executors/FeaturesMergeExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,13 @@ public FeaturesMergeExecutor(IOptions<ConfigurationData> options,
public List<IFeature> Merge(List<IFeature> osmFeatures, List<IFeature> externalFeatures)
{
AddAlternativeTitleToNatureReserves(osmFeatures);
externalFeatures = MergeWikipediaAndWikidataIntoWikidata(externalFeatures);
externalFeatures = MergeWikipediaToOsmByWikipediaTags(osmFeatures, externalFeatures);
_logger.LogInformation($"Starting to sort features by importance: {osmFeatures.Count}");
externalFeatures = MergeWikidataToOsmByWikidataTags(osmFeatures, externalFeatures);
externalFeatures = MergeINatureToOsmByINatureTags(osmFeatures, externalFeatures);
_logger.LogInformation($"Starting to sort OSM features by importance: {osmFeatures.Count}");
osmFeatures = osmFeatures.OrderBy(f => f, new FeatureComparer()).ToList();
_logger.LogInformation($"Finished sorting features by importance: {osmFeatures.Count}");
_logger.LogInformation($"Finished sorting OSM features by importance: {osmFeatures.Count}");
osmFeatures = MergePlaceNodes(osmFeatures);
var namesAttributes = new List<string> {FeatureAttributes.NAME, FeatureAttributes.MTB_NAME};
namesAttributes.AddRange(Languages.Array.Select(language => FeatureAttributes.NAME + ":" + language));
Expand All @@ -95,7 +98,7 @@ public List<IFeature> Merge(List<IFeature> osmFeatures, List<IFeature> externalF

private List<IFeature> MergeOsmElementsByName(List<IFeature> orderedOsmFeatures, string nameAttribute)
{
_logger.LogInformation($"Starting OSM merging by {nameAttribute}.");
_logger.LogInformation($"Starting OSM merging by {nameAttribute}, current items count: {orderedOsmFeatures.Count}");
var featureIdsToRemove = new ConcurrentBag<string>();
var groupedByName = orderedOsmFeatures.Where(f => f.Attributes.Exists(nameAttribute))
.GroupBy(f => f.Attributes[nameAttribute].ToString()).ToList();
Expand Down Expand Up @@ -130,17 +133,16 @@ private List<IFeature> MergeOsmElementsByName(List<IFeature> orderedOsmFeatures,
features.RemoveAt(0);
}
});
_logger.LogInformation($"Finished processing geometries, removing items.");
var list = featureIdsToRemove.ToHashSet();
orderedOsmFeatures = orderedOsmFeatures.Where(f => list.Contains(f.GetId()) == false).ToList();
_logger.LogInformation($"Finished OSM merging by name: {orderedOsmFeatures.Count}");
_logger.LogInformation($"Finished OSM merging by name, removed {list.Count} items, remaining OSM items: {orderedOsmFeatures.Count}");
return orderedOsmFeatures;
}

private List<IFeature> MergeExternalFeaturesToOsm(List<IFeature> osmFeatures, List<IFeature> externalFeatures)
{
var featureIdsToRemove = new HashSet<string>();
_logger.LogInformation("Starting external features merging by title into OSM.");
_logger.LogInformation($"Starting external features merging by title into OSM. Current OSM items: {osmFeatures.Count}, external features: {externalFeatures.Count}");
var titlesDictionary = new Dictionary<string, List<IFeature>>();
foreach (var osmFeature in osmFeatures)
{
Expand Down Expand Up @@ -172,7 +174,7 @@ private List<IFeature> MergeExternalFeaturesToOsm(List<IFeature> osmFeatures, Li
}
}
externalFeatures = externalFeatures.Where(f => featureIdsToRemove.Contains(f.GetId()) == false).ToList();
_logger.LogInformation("Finished external features merging by title into OSM. " + externalFeatures.Count);
_logger.LogInformation("Finished external features merging by title into OSM. Remaining external features: " + externalFeatures.Count);
return externalFeatures;
}

Expand All @@ -196,7 +198,7 @@ private List<IFeature> MergePlaceNodes(List<IFeature> osmFeatures)
}
});
var list = featureIdsToRemove.ToList();
WriteToBothLoggers($"Finished places merging. Merged places: {list.Count}");
WriteToBothLoggers($"Finished places merging. Removed places entities: {list.Count}");
return osmFeatures.Where(f => list.Contains(f.GetId()) == false).ToList();
}

Expand Down Expand Up @@ -522,6 +524,28 @@ private bool IsFeaturesTagsMismatched(IFeature target, IFeature source, string t
source.Attributes.GetNames().Contains(tagName));
}

private List<IFeature> MergeWikipediaAndWikidataIntoWikidata(List<IFeature> externalFeatures)
{
WriteToBothLoggers("Starting joining Wikipedia and wikidata.");
var wikidataFeatures = externalFeatures.Where(f => f.Attributes[FeatureAttributes.POI_SOURCE].Equals(Sources.WIKIDATA)).ToList();
var wikipediaFeatures = externalFeatures
.Where(f => f.Attributes[FeatureAttributes.POI_SOURCE].Equals(Sources.WIKIPEDIA))
.ToDictionary(f => f.Attributes[FeatureAttributes.NAME], f => f);
var featureIdsToRemove = new HashSet<string>();
foreach (var wikidataFeature in wikidataFeatures)
{
var names = wikidataFeature.Attributes.GetNames().Where(n => n.StartsWith(FeatureAttributes.NAME))
.Select(n => wikidataFeature.Attributes[n].ToString());
foreach (var name in names.Where(n => wikipediaFeatures.ContainsKey(n)))
{
featureIdsToRemove.Add(wikipediaFeatures[name].GetId());
MergeFeatures(wikidataFeature, wikipediaFeatures[name]);
}
}
WriteToBothLoggers($"Finished joining Wikipedia and wikidata. Merged features: {featureIdsToRemove.Count}");
return externalFeatures.Where(f => featureIdsToRemove.Contains(f.GetId()) == false).ToList();
}

private List<IFeature> MergeWikipediaToOsmByWikipediaTags(List<IFeature> osmFeatures, List<IFeature> externalFeatures)
{
WriteToBothLoggers("Starting joining Wikipedia markers.");
Expand Down Expand Up @@ -549,6 +573,54 @@ private List<IFeature> MergeWikipediaToOsmByWikipediaTags(List<IFeature> osmFeat
WriteToBothLoggers($"Finished joining Wikipedia markers. Merged features: {featureIdsToRemove.Count}");
return externalFeatures.Where(f => featureIdsToRemove.Contains(f.GetId()) == false).ToList();
}

private List<IFeature> MergeWikidataToOsmByWikidataTags(List<IFeature> osmFeatures, List<IFeature> externalFeatures)
{
WriteToBothLoggers("Starting joining Wikidata markers.");
var featureIdsToRemove = new HashSet<string>();
var wikidataFeatures = externalFeatures.Where(f => f.Attributes[FeatureAttributes.POI_SOURCE].Equals(Sources.WIKIDATA)).ToList();
var osmWikiFeatures = osmFeatures.Where(f =>
f.Attributes.GetNames().Any(n => n == FeatureAttributes.WIKIDATA) &&
f.Attributes[FeatureAttributes.POI_SOURCE].Equals(Sources.OSM))
.ToList();
foreach (var osmWikiFeature in osmWikiFeatures)
{
var wikidataId = osmWikiFeature.Attributes[FeatureAttributes.WIKIDATA].ToString();
var wikiFeatureToRemove = wikidataFeatures.FirstOrDefault(f => f.Attributes[FeatureAttributes.ID].ToString() == wikidataId);
if (wikiFeatureToRemove == null)
{
continue;
}
featureIdsToRemove.Add(wikiFeatureToRemove.GetId());
MergeFeatures(osmWikiFeature, wikiFeatureToRemove);
}
WriteToBothLoggers($"Finished joining Wikidata markers. Merged features: {featureIdsToRemove.Count}");
return externalFeatures.Where(f => featureIdsToRemove.Contains(f.GetId()) == false).ToList();
}

private List<IFeature> MergeINatureToOsmByINatureTags(List<IFeature> osmFeatures, List<IFeature> externalFeatures)
{
WriteToBothLoggers("Starting joining iNature markers.");
var featureIdsToRemove = new HashSet<string>();
var iNatureFeatures = externalFeatures.Where(f => f.Attributes[FeatureAttributes.POI_SOURCE].Equals(Sources.INATURE)).ToList();
var osmINatureFeatures = osmFeatures.Where(f =>
f.Attributes.GetNames().Any(n => n == FeatureAttributes.INATURE_REF) &&
f.Attributes[FeatureAttributes.POI_SOURCE].Equals(Sources.OSM))
.ToList();
foreach (var osmINatureFeature in osmINatureFeatures)
{
var iNaturePage = osmINatureFeature.Attributes[FeatureAttributes.INATURE_REF].ToString();
var iNatureFeatureToRemove = iNatureFeatures.FirstOrDefault(f => f.Attributes[FeatureAttributes.NAME].ToString() == iNaturePage);
if (iNatureFeatureToRemove == null)
{
continue;
}
featureIdsToRemove.Add(iNatureFeatureToRemove.GetId());
MergeFeatures(osmINatureFeature, iNatureFeatureToRemove);
}
WriteToBothLoggers($"Finished joining iNature markers. Merged features: {featureIdsToRemove.Count}");
return externalFeatures.Where(f => featureIdsToRemove.Contains(f.GetId()) == false).ToList();
}

private void AddAlternativeTitleToNatureReserves(List<IFeature> features)
{
Expand Down
1 change: 1 addition & 0 deletions IsraelHiking.API/RegisterApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ public static void AddIHMApi(this IServiceCollection services)
services.AddTransient<IPointsOfInterestAdapter, NakebPointsOfInterestAdapter>();
services.AddTransient<IPointsOfInterestAdapter, INaturePointsOfInterestAdapter>();
services.AddTransient<IPointsOfInterestAdapter, WikipediaPointsOfInterestAdapter>();
services.AddTransient<IPointsOfInterestAdapter, WikidataPointsOfInterestAdapter>();
services.AddTransient<CsvPointsOfInterestAdapter>();
services.AddSingleton<IPointsOfInterestAdapterFactory, PointsOfInterestAdapterFactory>();
// last one is the least important
Expand Down
23 changes: 2 additions & 21 deletions IsraelHiking.API/Services/Osm/DatabasesUpdaterService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -145,27 +145,8 @@ private async Task RebuildPointsOfInterest(RebuildContext rebuildContext)
var externalFeatures = sources.Select(s => _externalSourcesRepository.GetExternalPoisBySource(s)).SelectMany(t => t.Result).ToList();
var features = _featuresMergeExecutor.Merge(osmFeaturesTask.Result, externalFeatures);
_unauthorizedImageUrlsRemover.RemoveImages(features);
var exitingFeatures = await _pointsOfInterestRepository.GetAllPointsOfInterest(true);
_logger.LogInformation($"Adding deleted features to new ones, total merged features: {features.Count} total existing features including deleted: {exitingFeatures.Count} of them: {exitingFeatures.Count(f => f.Attributes.Exists(FeatureAttributes.POI_DELETED))}");
var newFeaturesDictionary = features.ToDictionary(f => f.GetId(), f => f);
var deletedFeatures = exitingFeatures.Where(f => f.GetLastModified() <= rebuildContext.StartTime && !newFeaturesDictionary.ContainsKey(f.GetId())).ToArray();
foreach (var deletedFeatureToMark in deletedFeatures)
{
if (!deletedFeatureToMark.Attributes.Exists(FeatureAttributes.POI_DELETED))
{
deletedFeatureToMark.Attributes.Add(FeatureAttributes.POI_DELETED, true);
deletedFeatureToMark.SetLastModified(DateTime.Now);
_logger.LogInformation("Removed feature id: " + deletedFeatureToMark.GetId());
}
}
var featuresToStore = features.Concat(deletedFeatures).ToList();
_logger.LogInformation($"Added deleted features to new ones: {deletedFeatures.Length} total features to store: {featuresToStore.Count}");
await _pointsOfInterestRepository.StorePointsOfInterestDataToSecondaryIndex(featuresToStore);
_logger.LogInformation("Getting all features added since rebuild started: " + rebuildContext.StartTime.ToLongTimeString());
var addedFeaturesAfterRebuildStart = await _pointsOfInterestRepository.GetPointsOfInterestUpdates(rebuildContext.StartTime, DateTime.Now);
_logger.LogInformation("Got all features added since rebuild started: " + addedFeaturesAfterRebuildStart.Count);
await _pointsOfInterestRepository.StorePointsOfInterestDataToSecondaryIndex(addedFeaturesAfterRebuildStart);
_logger.LogInformation("Finished storing all features");
await _pointsOfInterestRepository.StorePointsOfInterestDataToSecondaryIndex(features);
_logger.LogInformation("Finished storing all features " + features.Count);
await _pointsOfInterestRepository.SwitchPointsOfInterestIndices();
_logger.LogInformation("Finished rebuilding POIs database.");
}
Expand Down
8 changes: 0 additions & 8 deletions IsraelHiking.API/Services/Poi/IPointsOfInterestProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,6 @@ public interface IPointsOfInterestProvider
/// <returns></returns>
public Task<IFeature> GetClosestPoint(Coordinate location, string source, string language = "");

/// <summary>
/// Get the all the points that were undated since the given date, and up until a given data
/// </summary>
/// <param name="lastModifiedDate">The last modidifaction date that the client has</param>
/// <param name="modifiedUntil">The end time of the updates to reduce response size</param>
/// <returns></returns>
public Task<UpdatesResponse> GetUpdates(DateTime lastModifiedDate, DateTime modifiedUntil);

/// <summary>
/// Get all points from the OSM repository
/// </summary>
Expand Down
Loading

0 comments on commit f03ad4d

Please sign in to comment.