Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework clustering logic for sandbox geocoder #5879

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import java.time.LocalDate;
import java.util.List;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -213,21 +215,21 @@ class StopClusters {
)
void stopClustersWithTypos(String searchTerm) {
var results = index.queryStopClusters(searchTerm).toList();
var ids = results.stream().map(StopCluster::id).toList();
var ids = results.stream().map(primaryId()).toList();
assertEquals(List.of(ALEXANDERPLATZ_STATION.getId()), ids);
}

@Test
void fuzzyStopClusters() {
var result1 = index.queryStopClusters("arts").map(StopCluster::id).toList();
var result1 = index.queryStopClusters("arts").map(primaryId()).toList();
assertEquals(List.of(ARTS_CENTER.getId()), result1);
}

@Test
void deduplicatedStopClusters() {
var result = index.queryStopClusters("lich").toList();
assertEquals(1, result.size());
assertEquals(LICHTERFELDE_OST_1.getName().toString(), result.getFirst().name());
assertEquals(LICHTERFELDE_OST_1.getName().toString(), result.getFirst().primary().name());
}

@ParameterizedTest
Expand Down Expand Up @@ -259,7 +261,7 @@ void deduplicatedStopClusters() {
}
)
void stopClustersWithSpace(String query) {
var result = index.queryStopClusters(query).map(StopCluster::id).toList();
var result = index.queryStopClusters(query).map(primaryId()).toList();
assertEquals(List.of(FIVE_POINTS_STATION.getId()), result);
}

Expand All @@ -268,24 +270,28 @@ void stopClustersWithSpace(String query) {
void fuzzyStopCode(String query) {
var result = index.queryStopClusters(query).toList();
assertEquals(1, result.size());
assertEquals(ARTS_CENTER.getName().toString(), result.getFirst().name());
assertEquals(ARTS_CENTER.getName().toString(), result.getFirst().primary().name());
}

@Test
void modes() {
var result = index.queryStopClusters("westh").toList();
assertEquals(1, result.size());
var stop = result.getFirst();
assertEquals(WESTHAFEN.getName().toString(), stop.name());
assertEquals(List.of(FERRY.name(), BUS.name()), stop.modes());
var cluster = result.getFirst();
assertEquals(WESTHAFEN.getName().toString(), cluster.primary().name());
assertEquals(List.of(FERRY.name(), BUS.name()), cluster.primary().modes());
}

@Test
void agenciesAndFeedPublisher() {
var result = index.queryStopClusters("alexanderplatz").toList().getFirst();
assertEquals(ALEXANDERPLATZ_STATION.getName().toString(), result.name());
assertEquals(List.of(StopClusterMapper.toAgency(BVG)), result.agencies());
assertEquals("A Publisher", result.feedPublisher().name());
var cluster = index.queryStopClusters("alexanderplatz").toList().getFirst();
assertEquals(ALEXANDERPLATZ_STATION.getName().toString(), cluster.primary().name());
assertEquals(List.of(StopClusterMapper.toAgency(BVG)), cluster.primary().agencies());
assertEquals("A Publisher", cluster.primary().feedPublisher().name());
}
}

private static @Nonnull Function<StopCluster, FeedScopedId> primaryId() {
return c -> c.primary().id();
}
}
100 changes: 40 additions & 60 deletions src/ext/java/org/opentripplanner/ext/geocoder/LuceneIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Stream;
import javax.annotation.Nullable;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
Expand Down Expand Up @@ -40,7 +40,7 @@
import org.apache.lucene.search.suggest.document.FuzzyCompletionQuery;
import org.apache.lucene.search.suggest.document.SuggestIndexSearcher;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.opentripplanner.ext.geocoder.StopCluster.Coordinate;
import org.opentripplanner.framework.collection.ListUtils;
import org.opentripplanner.framework.i18n.I18NString;
import org.opentripplanner.standalone.api.OtpServerRequestContext;
import org.opentripplanner.transit.model.framework.FeedScopedId;
Expand All @@ -52,22 +52,22 @@ public class LuceneIndex implements Serializable {

private static final String TYPE = "type";
private static final String ID = "id";
private static final String SECONDARY_IDS = "secondary_ids";
private static final String SUGGEST = "suggest";
private static final String NAME = "name";
private static final String NAME_NGRAM = "name_ngram";
private static final String CODE = "code";
private static final String LAT = "latitude";
private static final String LON = "longitude";
private static final String MODE = "mode";
private static final String AGENCY_IDS = "agency_ids";

private final TransitService transitService;
private final Analyzer analyzer;
private final SuggestIndexSearcher searcher;
private final StopClusterMapper stopClusterMapper;

public LuceneIndex(TransitService transitService) {
this.transitService = transitService;
StopClusterMapper stopClusterMapper = new StopClusterMapper(transitService);
this.stopClusterMapper = new StopClusterMapper(transitService);

this.analyzer =
new PerFieldAnalyzerWrapper(
Expand Down Expand Up @@ -95,12 +95,11 @@ public LuceneIndex(TransitService transitService) {
directoryWriter,
StopLocation.class,
stopLocation.getId().toString(),
stopLocation.getName(),
stopLocation.getCode(),
List.of(),
ListUtils.ofNullable(stopLocation.getName()),
ListUtils.ofNullable(stopLocation.getCode()),
stopLocation.getCoordinate().latitude(),
stopLocation.getCoordinate().longitude(),
Set.of(),
Set.of()
stopLocation.getCoordinate().longitude()
)
);

Expand All @@ -111,12 +110,11 @@ public LuceneIndex(TransitService transitService) {
directoryWriter,
StopLocationsGroup.class,
stopLocationsGroup.getId().toString(),
stopLocationsGroup.getName(),
null,
List.of(),
ListUtils.ofNullable(stopLocationsGroup.getName()),
List.of(),
stopLocationsGroup.getCoordinate().latitude(),
stopLocationsGroup.getCoordinate().longitude(),
Set.of(),
Set.of()
stopLocationsGroup.getCoordinate().longitude()
)
);

Expand All @@ -129,13 +127,12 @@ public LuceneIndex(TransitService transitService) {
addToIndex(
directoryWriter,
StopCluster.class,
stopCluster.id().toString(),
I18NString.of(stopCluster.name()),
stopCluster.code(),
stopCluster.primaryId(),
stopCluster.secondaryIds(),
stopCluster.names(),
stopCluster.codes(),
stopCluster.coordinate().lat(),
stopCluster.coordinate().lon(),
stopCluster.modes(),
stopCluster.agencyIds()
stopCluster.coordinate().lon()
)
);
}
Expand Down Expand Up @@ -183,30 +180,16 @@ public Stream<StopCluster> queryStopClusters(String query) {
}

private StopCluster toStopCluster(Document document) {
var clusterId = FeedScopedId.parse(document.get(ID));
var name = document.get(NAME);
var code = document.get(CODE);
var lat = document.getField(LAT).numericValue().doubleValue();
var lon = document.getField(LON).numericValue().doubleValue();
var modes = Arrays.asList(document.getValues(MODE));
var agencies = Arrays
.stream(document.getValues(AGENCY_IDS))
.map(id -> transitService.getAgencyForId(FeedScopedId.parse(id)))
.filter(Objects::nonNull)
.map(StopClusterMapper::toAgency)
var primaryId = FeedScopedId.parse(document.get(ID));
var primary = stopClusterMapper.toLocation(primaryId);

var secondaryIds = Arrays
.stream(document.getValues(SECONDARY_IDS))
.map(FeedScopedId::parse)
.map(stopClusterMapper::toLocation)
.toList();
var feedPublisher = StopClusterMapper.toFeedPublisher(
transitService.getFeedInfo(clusterId.getFeedId())
);
return new StopCluster(
clusterId,
code,
name,
new Coordinate(lat, lon),
modes,
agencies,
feedPublisher
);

return new StopCluster(primary, secondaryIds);
}

static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) {
Expand All @@ -230,36 +213,33 @@ private static void addToIndex(
IndexWriter writer,
Class<?> type,
String id,
I18NString name,
@Nullable String code,
Collection<String> secondaryIds,
Collection<I18NString> names,
Collection<String> codes,
double latitude,
double longitude,
Collection<String> modes,
Collection<String> agencyIds
double longitude
) {
String typeName = type.getSimpleName();

Document document = new Document();
document.add(new StoredField(ID, id));
for (var secondaryId : secondaryIds) {
document.add(new StoredField(SECONDARY_IDS, secondaryId));
}
document.add(new TextField(TYPE, typeName, Store.YES));
document.add(new TextField(NAME, Objects.toString(name), Store.YES));
document.add(new TextField(NAME_NGRAM, Objects.toString(name), Store.YES));
document.add(new ContextSuggestField(SUGGEST, Objects.toString(name), 1, typeName));
for (var name : names) {
document.add(new TextField(NAME, Objects.toString(name), Store.YES));
document.add(new TextField(NAME_NGRAM, Objects.toString(name), Store.YES));
document.add(new ContextSuggestField(SUGGEST, Objects.toString(name), 1, typeName));
}
document.add(new StoredField(LAT, latitude));
document.add(new StoredField(LON, longitude));

if (code != null) {
for (var code : codes) {
document.add(new TextField(CODE, code, Store.YES));
document.add(new ContextSuggestField(SUGGEST, code, 1, typeName));
}

for (var mode : modes) {
document.add(new TextField(MODE, mode, Store.YES));
}
for (var ids : agencyIds) {
document.add(new TextField(AGENCY_IDS, ids, Store.YES));
}

try {
writer.addDocument(document);
} catch (IOException ex) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
package org.opentripplanner.ext.geocoder;

import java.util.Collection;
import javax.annotation.Nullable;
import org.opentripplanner.transit.model.framework.FeedScopedId;
import org.opentripplanner.framework.i18n.I18NString;

/**
* A package-private helper type for transporting data before serializing.
*/
record LuceneStopCluster(
FeedScopedId id,
@Nullable String code,
String name,
StopCluster.Coordinate coordinate,
Collection<String> modes,
Collection<String> agencyIds
String primaryId,
Collection<String> secondaryIds,
Collection<I18NString> names,
Collection<String> codes,
StopCluster.Coordinate coordinate
) {}
36 changes: 27 additions & 9 deletions src/ext/java/org/opentripplanner/ext/geocoder/StopCluster.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.util.Collection;
import java.util.List;
import java.util.Objects;
import javax.annotation.Nullable;
import org.opentripplanner.transit.model.framework.FeedScopedId;

Expand All @@ -14,15 +15,7 @@
* - if a stop has a parent station only the parent is returned
* - if stops are closer than 10 meters to each and have an identical name, only one is returned
*/
record StopCluster(
FeedScopedId id,
@Nullable String code,
String name,
Coordinate coordinate,
Collection<String> modes,
List<Agency> agencies,
@Nullable FeedPublisher feedPublisher
) {
record StopCluster(Location primary, Collection<Location> secondaries) {
/**
* Easily serializable version of a coordinate
*/
Expand All @@ -37,4 +30,29 @@ public record Agency(FeedScopedId id, String name) {}
* Easily serializable version of a feed publisher
*/
public record FeedPublisher(String name) {}

public enum LocationType {
STATION,
STOP,
}

public record Location(
FeedScopedId id,
@Nullable String code,
LocationType type,
String name,
Coordinate coordinate,
Collection<String> modes,
List<Agency> agencies,
@Nullable FeedPublisher feedPublisher
) {
public Location {
Objects.requireNonNull(id);
Objects.requireNonNull(name);
Objects.requireNonNull(type);
Objects.requireNonNull(coordinate);
Objects.requireNonNull(modes);
Objects.requireNonNull(agencies);
}
}
}
Loading
Loading