Skip to content

Commit

Permalink
Merge pull request #5879 from ibi-group/cluster-geocoder
Browse files Browse the repository at this point in the history
Rework clustering logic for sandbox geocoder
  • Loading branch information
leonardehrenfried authored Jun 3, 2024
2 parents c71c2a7 + 6a73f01 commit 0695d7d
Show file tree
Hide file tree
Showing 7 changed files with 217 additions and 123 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import java.time.LocalDate;
import java.util.List;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -213,21 +215,21 @@ class StopClusters {
)
void stopClustersWithTypos(String searchTerm) {
var results = index.queryStopClusters(searchTerm).toList();
var ids = results.stream().map(StopCluster::id).toList();
var ids = results.stream().map(primaryId()).toList();
assertEquals(List.of(ALEXANDERPLATZ_STATION.getId()), ids);
}

@Test
void fuzzyStopClusters() {
var result1 = index.queryStopClusters("arts").map(StopCluster::id).toList();
var result1 = index.queryStopClusters("arts").map(primaryId()).toList();
assertEquals(List.of(ARTS_CENTER.getId()), result1);
}

@Test
void deduplicatedStopClusters() {
var result = index.queryStopClusters("lich").toList();
assertEquals(1, result.size());
assertEquals(LICHTERFELDE_OST_1.getName().toString(), result.getFirst().name());
assertEquals(LICHTERFELDE_OST_1.getName().toString(), result.getFirst().primary().name());
}

@ParameterizedTest
Expand Down Expand Up @@ -259,7 +261,7 @@ void deduplicatedStopClusters() {
}
)
void stopClustersWithSpace(String query) {
var result = index.queryStopClusters(query).map(StopCluster::id).toList();
var result = index.queryStopClusters(query).map(primaryId()).toList();
assertEquals(List.of(FIVE_POINTS_STATION.getId()), result);
}

Expand All @@ -268,24 +270,28 @@ void stopClustersWithSpace(String query) {
void fuzzyStopCode(String query) {
var result = index.queryStopClusters(query).toList();
assertEquals(1, result.size());
assertEquals(ARTS_CENTER.getName().toString(), result.getFirst().name());
assertEquals(ARTS_CENTER.getName().toString(), result.getFirst().primary().name());
}

@Test
void modes() {
var result = index.queryStopClusters("westh").toList();
assertEquals(1, result.size());
var stop = result.getFirst();
assertEquals(WESTHAFEN.getName().toString(), stop.name());
assertEquals(List.of(FERRY.name(), BUS.name()), stop.modes());
var cluster = result.getFirst();
assertEquals(WESTHAFEN.getName().toString(), cluster.primary().name());
assertEquals(List.of(FERRY.name(), BUS.name()), cluster.primary().modes());
}

@Test
void agenciesAndFeedPublisher() {
var result = index.queryStopClusters("alexanderplatz").toList().getFirst();
assertEquals(ALEXANDERPLATZ_STATION.getName().toString(), result.name());
assertEquals(List.of(StopClusterMapper.toAgency(BVG)), result.agencies());
assertEquals("A Publisher", result.feedPublisher().name());
var cluster = index.queryStopClusters("alexanderplatz").toList().getFirst();
assertEquals(ALEXANDERPLATZ_STATION.getName().toString(), cluster.primary().name());
assertEquals(List.of(StopClusterMapper.toAgency(BVG)), cluster.primary().agencies());
assertEquals("A Publisher", cluster.primary().feedPublisher().name());
}
}

private static @Nonnull Function<StopCluster, FeedScopedId> primaryId() {
return c -> c.primary().id();
}
}
100 changes: 40 additions & 60 deletions src/ext/java/org/opentripplanner/ext/geocoder/LuceneIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Stream;
import javax.annotation.Nullable;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
Expand Down Expand Up @@ -40,7 +40,7 @@
import org.apache.lucene.search.suggest.document.FuzzyCompletionQuery;
import org.apache.lucene.search.suggest.document.SuggestIndexSearcher;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.opentripplanner.ext.geocoder.StopCluster.Coordinate;
import org.opentripplanner.framework.collection.ListUtils;
import org.opentripplanner.framework.i18n.I18NString;
import org.opentripplanner.standalone.api.OtpServerRequestContext;
import org.opentripplanner.transit.model.framework.FeedScopedId;
Expand All @@ -52,22 +52,22 @@ public class LuceneIndex implements Serializable {

private static final String TYPE = "type";
private static final String ID = "id";
private static final String SECONDARY_IDS = "secondary_ids";
private static final String SUGGEST = "suggest";
private static final String NAME = "name";
private static final String NAME_NGRAM = "name_ngram";
private static final String CODE = "code";
private static final String LAT = "latitude";
private static final String LON = "longitude";
private static final String MODE = "mode";
private static final String AGENCY_IDS = "agency_ids";

private final TransitService transitService;
private final Analyzer analyzer;
private final SuggestIndexSearcher searcher;
private final StopClusterMapper stopClusterMapper;

public LuceneIndex(TransitService transitService) {
this.transitService = transitService;
StopClusterMapper stopClusterMapper = new StopClusterMapper(transitService);
this.stopClusterMapper = new StopClusterMapper(transitService);

this.analyzer =
new PerFieldAnalyzerWrapper(
Expand Down Expand Up @@ -95,12 +95,11 @@ public LuceneIndex(TransitService transitService) {
directoryWriter,
StopLocation.class,
stopLocation.getId().toString(),
stopLocation.getName(),
stopLocation.getCode(),
List.of(),
ListUtils.ofNullable(stopLocation.getName()),
ListUtils.ofNullable(stopLocation.getCode()),
stopLocation.getCoordinate().latitude(),
stopLocation.getCoordinate().longitude(),
Set.of(),
Set.of()
stopLocation.getCoordinate().longitude()
)
);

Expand All @@ -111,12 +110,11 @@ public LuceneIndex(TransitService transitService) {
directoryWriter,
StopLocationsGroup.class,
stopLocationsGroup.getId().toString(),
stopLocationsGroup.getName(),
null,
List.of(),
ListUtils.ofNullable(stopLocationsGroup.getName()),
List.of(),
stopLocationsGroup.getCoordinate().latitude(),
stopLocationsGroup.getCoordinate().longitude(),
Set.of(),
Set.of()
stopLocationsGroup.getCoordinate().longitude()
)
);

Expand All @@ -129,13 +127,12 @@ public LuceneIndex(TransitService transitService) {
addToIndex(
directoryWriter,
StopCluster.class,
stopCluster.id().toString(),
I18NString.of(stopCluster.name()),
stopCluster.code(),
stopCluster.primaryId(),
stopCluster.secondaryIds(),
stopCluster.names(),
stopCluster.codes(),
stopCluster.coordinate().lat(),
stopCluster.coordinate().lon(),
stopCluster.modes(),
stopCluster.agencyIds()
stopCluster.coordinate().lon()
)
);
}
Expand Down Expand Up @@ -183,30 +180,16 @@ public Stream<StopCluster> queryStopClusters(String query) {
}

private StopCluster toStopCluster(Document document) {
var clusterId = FeedScopedId.parse(document.get(ID));
var name = document.get(NAME);
var code = document.get(CODE);
var lat = document.getField(LAT).numericValue().doubleValue();
var lon = document.getField(LON).numericValue().doubleValue();
var modes = Arrays.asList(document.getValues(MODE));
var agencies = Arrays
.stream(document.getValues(AGENCY_IDS))
.map(id -> transitService.getAgencyForId(FeedScopedId.parse(id)))
.filter(Objects::nonNull)
.map(StopClusterMapper::toAgency)
var primaryId = FeedScopedId.parse(document.get(ID));
var primary = stopClusterMapper.toLocation(primaryId);

var secondaryIds = Arrays
.stream(document.getValues(SECONDARY_IDS))
.map(FeedScopedId::parse)
.map(stopClusterMapper::toLocation)
.toList();
var feedPublisher = StopClusterMapper.toFeedPublisher(
transitService.getFeedInfo(clusterId.getFeedId())
);
return new StopCluster(
clusterId,
code,
name,
new Coordinate(lat, lon),
modes,
agencies,
feedPublisher
);

return new StopCluster(primary, secondaryIds);
}

static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) {
Expand All @@ -230,36 +213,33 @@ private static void addToIndex(
IndexWriter writer,
Class<?> type,
String id,
I18NString name,
@Nullable String code,
Collection<String> secondaryIds,
Collection<I18NString> names,
Collection<String> codes,
double latitude,
double longitude,
Collection<String> modes,
Collection<String> agencyIds
double longitude
) {
String typeName = type.getSimpleName();

Document document = new Document();
document.add(new StoredField(ID, id));
for (var secondaryId : secondaryIds) {
document.add(new StoredField(SECONDARY_IDS, secondaryId));
}
document.add(new TextField(TYPE, typeName, Store.YES));
document.add(new TextField(NAME, Objects.toString(name), Store.YES));
document.add(new TextField(NAME_NGRAM, Objects.toString(name), Store.YES));
document.add(new ContextSuggestField(SUGGEST, Objects.toString(name), 1, typeName));
for (var name : names) {
document.add(new TextField(NAME, Objects.toString(name), Store.YES));
document.add(new TextField(NAME_NGRAM, Objects.toString(name), Store.YES));
document.add(new ContextSuggestField(SUGGEST, Objects.toString(name), 1, typeName));
}
document.add(new StoredField(LAT, latitude));
document.add(new StoredField(LON, longitude));

if (code != null) {
for (var code : codes) {
document.add(new TextField(CODE, code, Store.YES));
document.add(new ContextSuggestField(SUGGEST, code, 1, typeName));
}

for (var mode : modes) {
document.add(new TextField(MODE, mode, Store.YES));
}
for (var ids : agencyIds) {
document.add(new TextField(AGENCY_IDS, ids, Store.YES));
}

try {
writer.addDocument(document);
} catch (IOException ex) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
package org.opentripplanner.ext.geocoder;

import java.util.Collection;
import javax.annotation.Nullable;
import org.opentripplanner.transit.model.framework.FeedScopedId;
import org.opentripplanner.framework.i18n.I18NString;

/**
* A package-private helper type for transporting data before serializing.
*/
record LuceneStopCluster(
FeedScopedId id,
@Nullable String code,
String name,
StopCluster.Coordinate coordinate,
Collection<String> modes,
Collection<String> agencyIds
String primaryId,
Collection<String> secondaryIds,
Collection<I18NString> names,
Collection<String> codes,
StopCluster.Coordinate coordinate
) {}
36 changes: 27 additions & 9 deletions src/ext/java/org/opentripplanner/ext/geocoder/StopCluster.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.util.Collection;
import java.util.List;
import java.util.Objects;
import javax.annotation.Nullable;
import org.opentripplanner.transit.model.framework.FeedScopedId;

Expand All @@ -14,15 +15,7 @@
* - if a stop has a parent station only the parent is returned
* - if stops are closer than 10 meters to each and have an identical name, only one is returned
*/
record StopCluster(
FeedScopedId id,
@Nullable String code,
String name,
Coordinate coordinate,
Collection<String> modes,
List<Agency> agencies,
@Nullable FeedPublisher feedPublisher
) {
record StopCluster(Location primary, Collection<Location> secondaries) {
/**
* Easily serializable version of a coordinate
*/
Expand All @@ -37,4 +30,29 @@ public record Agency(FeedScopedId id, String name) {}
* Easily serializable version of a feed publisher
*/
public record FeedPublisher(String name) {}

public enum LocationType {
STATION,
STOP,
}

public record Location(
FeedScopedId id,
@Nullable String code,
LocationType type,
String name,
Coordinate coordinate,
Collection<String> modes,
List<Agency> agencies,
@Nullable FeedPublisher feedPublisher
) {
public Location {
Objects.requireNonNull(id);
Objects.requireNonNull(name);
Objects.requireNonNull(type);
Objects.requireNonNull(coordinate);
Objects.requireNonNull(modes);
Objects.requireNonNull(agencies);
}
}
}
Loading

0 comments on commit 0695d7d

Please sign in to comment.