From 074f4d80dfd735d520e4e1bf08d8e3a6db185331 Mon Sep 17 00:00:00 2001 From: jcpitre Date: Mon, 9 Sep 2024 23:42:11 -0400 Subject: [PATCH 1/7] Removed missing required field false positive. Added foreign key validation on location_group_id. --- .../notice/ForbiddenGeograaphyIdNotice.java | 52 +++++++++++++++ .../table/GtfsStopTimeSchema.java | 22 ++++++- ...StopTimesGeographyIdPresenceValidator.java | 64 +++++++++++++++++++ .../validator/NoticeFieldsTest.java | 4 +- 4 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 core/src/main/java/org/mobilitydata/gtfsvalidator/notice/ForbiddenGeograaphyIdNotice.java create mode 100644 main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/ForbiddenGeograaphyIdNotice.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/ForbiddenGeograaphyIdNotice.java new file mode 100644 index 0000000000..bbb27f4456 --- /dev/null +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/ForbiddenGeograaphyIdNotice.java @@ -0,0 +1,52 @@ +/* + * Copyright 2020 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.mobilitydata.gtfsvalidator.notice; + +import static org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice.SectionRef.FILE_REQUIREMENTS; +import static org.mobilitydata.gtfsvalidator.notice.SeverityLevel.ERROR; + +import org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice; +import org.mobilitydata.gtfsvalidator.annotation.GtfsValidationNotice.SectionRefs; + +/** + * A stop_time entry has more than one geographical id defined. + * + *

In stop_times.txt, you can have only one of stop_id, location_group_id or location_id defined + * for given entry. + */ +@GtfsValidationNotice(severity = ERROR, sections = @SectionRefs(FILE_REQUIREMENTS)) +public class ForbiddenGeograaphyIdNotice extends ValidationNotice { + + /** The row of the faulty record. */ + private final int csvRowNumber; + + /** The sThe id that already exists. */ + private final String stopId; + + /** The id that already exists. */ + private final String locationGroupId; + + /** The id that already exists. */ + private final String locationId; + + public ForbiddenGeograaphyIdNotice( + int csvRowNumber, String stopId, String locationGroupId, String locationId) { + this.csvRowNumber = csvRowNumber; + this.stopId = stopId; + this.locationGroupId = locationGroupId; + this.locationId = locationId; + } +} diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java index 7f6a05589c..5904507c06 100644 --- a/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java +++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java @@ -52,11 +52,17 @@ public interface GtfsStopTimeSchema extends GtfsEntity { @FieldType(FieldTypeEnum.ID) @Index - @Required + @ConditionallyRequired @ForeignKey(table = "stops.txt", field = "stop_id") String stopId(); @FieldType(FieldTypeEnum.ID) + @ConditionallyRequired + @ForeignKey(table = "location_groups.txt", field = "location_group_id") + String locationGroupId(); + + @FieldType(FieldTypeEnum.ID) + @ConditionallyRequired String locationId(); @PrimaryKey(isSequenceUsedForSorting = true, translationRecordIdType = RECORD_SUB_ID) @@ -67,6 +73,12 @@ public interface GtfsStopTimeSchema extends GtfsEntity { @CachedField String stopHeadsign(); + @ConditionallyRequired + GtfsTime startPickupDropOffWindow(); + + @ConditionallyRequired + GtfsTime endPickupDropOffWindow(); + GtfsPickupDropOff pickupType(); GtfsPickupDropOff dropOffType(); @@ -83,4 +95,12 @@ public interface GtfsStopTimeSchema extends GtfsEntity { @DefaultValue("1") @RecommendedColumn GtfsStopTimeTimepoint timepoint(); + + @FieldType(FieldTypeEnum.ID) + @ForeignKey(table = "booking_rules.txt", field = "booking_rule_id") + String pickupBookingRuleId(); + + @FieldType(FieldTypeEnum.ID) + @ForeignKey(table = "booking_rules.txt", field = "booking_rule_id") + String dropOffBookingRuleId(); } diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java new file mode 100644 index 0000000000..51430c5068 --- /dev/null +++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java @@ -0,0 +1,64 @@ +/* + * Copyright 2021 Google LLC, MobilityData IO + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.mobilitydata.gtfsvalidator.validator; + +import org.mobilitydata.gtfsvalidator.annotation.GtfsValidator; +import org.mobilitydata.gtfsvalidator.notice.ForbiddenGeograaphyIdNotice; +import org.mobilitydata.gtfsvalidator.notice.MissingRequiredFieldNotice; +import org.mobilitydata.gtfsvalidator.notice.NoticeContainer; +import org.mobilitydata.gtfsvalidator.table.GtfsStopTime; + +/** + * Validates that only one of stop_id, location_group_id or location_id is defined in a given record + * of stop_times.txt + * + *

Generated notice: {@link MissingRequiredFieldNotice}. + * + *

Generated notice: {@link ForbiddenGeograaphyIdNotice}. + */ +@GtfsValidator +public class StopTimesGeographyIdPresenceValidator extends SingleEntityValidator { + + @Override + public void validate(GtfsStopTime stopTime, NoticeContainer noticeContainer) { + int presenceCount = 0; + if (stopTime.hasStopId()) { + presenceCount++; + } + if (stopTime.hasLocationGroupId()) { + presenceCount++; + } + + if (stopTime.hasLocationId()) { + presenceCount++; + } + + if (presenceCount == 0) { + // None of the 3 geography IDs are present, but we need at least stop_id + noticeContainer.addValidationNotice( + new MissingRequiredFieldNotice( + GtfsStopTime.FILENAME, stopTime.csvRowNumber(), GtfsStopTime.STOP_ID_FIELD_NAME)); + } else if (presenceCount > 1) { + // More than one geography ID is present, but only one is allowed + noticeContainer.addValidationNotice( + new ForbiddenGeograaphyIdNotice( + stopTime.csvRowNumber(), + stopTime.stopId(), + stopTime.locationGroupId(), + stopTime.locationId())); + } + } +} diff --git a/main/src/test/java/org/mobilitydata/gtfsvalidator/validator/NoticeFieldsTest.java b/main/src/test/java/org/mobilitydata/gtfsvalidator/validator/NoticeFieldsTest.java index 6eab0628ec..0e55eda83f 100644 --- a/main/src/test/java/org/mobilitydata/gtfsvalidator/validator/NoticeFieldsTest.java +++ b/main/src/test/java/org/mobilitydata/gtfsvalidator/validator/NoticeFieldsTest.java @@ -197,7 +197,9 @@ public void testNoticeClassFieldNames() { "fileNameA", "fileNameB", "pathwayMode", - "isBidirectional"); + "isBidirectional", + "locationGroupId", + "locationId"); } private static List discoverValidationNoticeFieldNames() { From d9f4f7a196e2e63ceb13d20053ce9cd14d794d58 Mon Sep 17 00:00:00 2001 From: jcpitre Date: Tue, 10 Sep 2024 11:48:07 -0400 Subject: [PATCH 2/7] Added unit tests --- ...e.java => ForbiddenGeographyIdNotice.java} | 4 +- ...StopTimesGeographyIdPresenceValidator.java | 6 +- ...TimesGeographyIdPresenceValidatorTest.java | 100 ++++++++++++++++++ 3 files changed, 105 insertions(+), 5 deletions(-) rename core/src/main/java/org/mobilitydata/gtfsvalidator/notice/{ForbiddenGeograaphyIdNotice.java => ForbiddenGeographyIdNotice.java} (94%) create mode 100644 main/src/test/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidatorTest.java diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/ForbiddenGeograaphyIdNotice.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/ForbiddenGeographyIdNotice.java similarity index 94% rename from core/src/main/java/org/mobilitydata/gtfsvalidator/notice/ForbiddenGeograaphyIdNotice.java rename to core/src/main/java/org/mobilitydata/gtfsvalidator/notice/ForbiddenGeographyIdNotice.java index bbb27f4456..28825f32bf 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/ForbiddenGeograaphyIdNotice.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/ForbiddenGeographyIdNotice.java @@ -28,7 +28,7 @@ * for given entry. */ @GtfsValidationNotice(severity = ERROR, sections = @SectionRefs(FILE_REQUIREMENTS)) -public class ForbiddenGeograaphyIdNotice extends ValidationNotice { +public class ForbiddenGeographyIdNotice extends ValidationNotice { /** The row of the faulty record. */ private final int csvRowNumber; @@ -42,7 +42,7 @@ public class ForbiddenGeograaphyIdNotice extends ValidationNotice { /** The id that already exists. */ private final String locationId; - public ForbiddenGeograaphyIdNotice( + public ForbiddenGeographyIdNotice( int csvRowNumber, String stopId, String locationGroupId, String locationId) { this.csvRowNumber = csvRowNumber; this.stopId = stopId; diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java index 51430c5068..945110d47e 100644 --- a/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java +++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java @@ -16,7 +16,7 @@ package org.mobilitydata.gtfsvalidator.validator; import org.mobilitydata.gtfsvalidator.annotation.GtfsValidator; -import org.mobilitydata.gtfsvalidator.notice.ForbiddenGeograaphyIdNotice; +import org.mobilitydata.gtfsvalidator.notice.ForbiddenGeographyIdNotice; import org.mobilitydata.gtfsvalidator.notice.MissingRequiredFieldNotice; import org.mobilitydata.gtfsvalidator.notice.NoticeContainer; import org.mobilitydata.gtfsvalidator.table.GtfsStopTime; @@ -27,7 +27,7 @@ * *

Generated notice: {@link MissingRequiredFieldNotice}. * - *

Generated notice: {@link ForbiddenGeograaphyIdNotice}. + *

Generated notice: {@link ForbiddenGeographyIdNotice}. */ @GtfsValidator public class StopTimesGeographyIdPresenceValidator extends SingleEntityValidator { @@ -54,7 +54,7 @@ public void validate(GtfsStopTime stopTime, NoticeContainer noticeContainer) { } else if (presenceCount > 1) { // More than one geography ID is present, but only one is allowed noticeContainer.addValidationNotice( - new ForbiddenGeograaphyIdNotice( + new ForbiddenGeographyIdNotice( stopTime.csvRowNumber(), stopTime.stopId(), stopTime.locationGroupId(), diff --git a/main/src/test/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidatorTest.java b/main/src/test/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidatorTest.java new file mode 100644 index 0000000000..d03d2be952 --- /dev/null +++ b/main/src/test/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidatorTest.java @@ -0,0 +1,100 @@ +/* + * Copyright 2020 Google LLC, MobilityData IO + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.mobilitydata.gtfsvalidator.validator; + +import static com.google.common.truth.Truth.assertThat; + +import java.util.List; +import org.junit.Test; +import org.mobilitydata.gtfsvalidator.notice.ForbiddenGeographyIdNotice; +import org.mobilitydata.gtfsvalidator.notice.MissingRequiredFieldNotice; +import org.mobilitydata.gtfsvalidator.notice.NoticeContainer; +import org.mobilitydata.gtfsvalidator.notice.ValidationNotice; +import org.mobilitydata.gtfsvalidator.table.GtfsStopTime; + +public class StopTimesGeographyIdPresenceValidatorTest { + + @Test + public void NoGeographyIdShouldGenerateMissingRequiredFieldNotice() { + assertThat(validationNoticesFor(new GtfsStopTime.Builder().setCsvRowNumber(2).build())) + .containsExactly(new MissingRequiredFieldNotice("stop_times.txt", 2, "stop_id")); + } + + @Test + public void OneGeographyIdShouldGenerateNothing() { + assertThat( + validationNoticesFor( + new GtfsStopTime.Builder().setCsvRowNumber(2).setStopId("stop_id").build())) + .isEmpty(); + assertThat( + validationNoticesFor( + new GtfsStopTime.Builder().setCsvRowNumber(2).setLocationGroupId("id").build())) + .isEmpty(); + assertThat( + validationNoticesFor( + new GtfsStopTime.Builder().setCsvRowNumber(2).setLocationId("id").build())) + .isEmpty(); + } + + @Test + public void MultipleGeographyIdShouldGenerateNotice() { + assertThat( + validationNoticesFor( + new GtfsStopTime.Builder() + .setStopId("stop_id") + .setLocationGroupId("location_group_id") + .setCsvRowNumber(2) + .build())) + .containsExactly(new ForbiddenGeographyIdNotice(2, "stop_id", "location_group_id", "")); + + assertThat( + validationNoticesFor( + new GtfsStopTime.Builder() + .setStopId("stop_id") + .setLocationId("location_id") + .setCsvRowNumber(2) + .build())) + .containsExactly(new ForbiddenGeographyIdNotice(2, "stop_id", "", "location_id")); + + assertThat( + validationNoticesFor( + new GtfsStopTime.Builder() + .setLocationGroupId("location_group_id") + .setLocationId("location_id") + .setCsvRowNumber(2) + .build())) + .containsExactly(new ForbiddenGeographyIdNotice(2, "", "location_group_id", "location_id")); + + assertThat( + validationNoticesFor( + new GtfsStopTime.Builder() + .setStopId("stop_id") + .setLocationGroupId("location_group_id") + .setLocationId("location_id") + .setCsvRowNumber(2) + .build())) + .containsExactly( + new ForbiddenGeographyIdNotice(2, "stop_id", "location_group_id", "location_id")); + } + + private List validationNoticesFor(GtfsStopTime entity) { + StopTimesGeographyIdPresenceValidator validator = new StopTimesGeographyIdPresenceValidator(); + NoticeContainer noticeContainer = new NoticeContainer(); + validator.validate(entity, noticeContainer); + return noticeContainer.getValidationNotices(); + } +} From b69a81712c6cac5b47a83104d08dd4a0d531793b Mon Sep 17 00:00:00 2001 From: jcpitre Date: Tue, 10 Sep 2024 13:37:41 -0400 Subject: [PATCH 3/7] Modified according to PR comments --- .../gtfsvalidator/notice/ForbiddenGeographyIdNotice.java | 2 +- .../mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java | 2 -- .../validator/StopTimesGeographyIdPresenceValidator.java | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/ForbiddenGeographyIdNotice.java b/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/ForbiddenGeographyIdNotice.java index 28825f32bf..41647bbbee 100644 --- a/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/ForbiddenGeographyIdNotice.java +++ b/core/src/main/java/org/mobilitydata/gtfsvalidator/notice/ForbiddenGeographyIdNotice.java @@ -1,5 +1,5 @@ /* - * Copyright 2020 Google LLC + * Copyright 2024 MobilityData * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java index 5904507c06..80561fe0ee 100644 --- a/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java +++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java @@ -73,10 +73,8 @@ public interface GtfsStopTimeSchema extends GtfsEntity { @CachedField String stopHeadsign(); - @ConditionallyRequired GtfsTime startPickupDropOffWindow(); - @ConditionallyRequired GtfsTime endPickupDropOffWindow(); GtfsPickupDropOff pickupType(); diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java index 945110d47e..72538e1473 100644 --- a/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java +++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java @@ -1,5 +1,5 @@ /* - * Copyright 2021 Google LLC, MobilityData IO + * Copyright 2024 MobilityData * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 87da004ab61b604a01a2c666602e9b7db7243333 Mon Sep 17 00:00:00 2001 From: jcpitre Date: Tue, 10 Sep 2024 16:53:51 -0400 Subject: [PATCH 4/7] Temporarily removed a validator to investigate increase in run time --- ...StopTimesGeographyIdPresenceValidator.java | 64 ----------- ...TimesGeographyIdPresenceValidatorTest.java | 100 ------------------ 2 files changed, 164 deletions(-) delete mode 100644 main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java delete mode 100644 main/src/test/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidatorTest.java diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java deleted file mode 100644 index 72538e1473..0000000000 --- a/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright 2024 MobilityData - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.mobilitydata.gtfsvalidator.validator; - -import org.mobilitydata.gtfsvalidator.annotation.GtfsValidator; -import org.mobilitydata.gtfsvalidator.notice.ForbiddenGeographyIdNotice; -import org.mobilitydata.gtfsvalidator.notice.MissingRequiredFieldNotice; -import org.mobilitydata.gtfsvalidator.notice.NoticeContainer; -import org.mobilitydata.gtfsvalidator.table.GtfsStopTime; - -/** - * Validates that only one of stop_id, location_group_id or location_id is defined in a given record - * of stop_times.txt - * - *

Generated notice: {@link MissingRequiredFieldNotice}. - * - *

Generated notice: {@link ForbiddenGeographyIdNotice}. - */ -@GtfsValidator -public class StopTimesGeographyIdPresenceValidator extends SingleEntityValidator { - - @Override - public void validate(GtfsStopTime stopTime, NoticeContainer noticeContainer) { - int presenceCount = 0; - if (stopTime.hasStopId()) { - presenceCount++; - } - if (stopTime.hasLocationGroupId()) { - presenceCount++; - } - - if (stopTime.hasLocationId()) { - presenceCount++; - } - - if (presenceCount == 0) { - // None of the 3 geography IDs are present, but we need at least stop_id - noticeContainer.addValidationNotice( - new MissingRequiredFieldNotice( - GtfsStopTime.FILENAME, stopTime.csvRowNumber(), GtfsStopTime.STOP_ID_FIELD_NAME)); - } else if (presenceCount > 1) { - // More than one geography ID is present, but only one is allowed - noticeContainer.addValidationNotice( - new ForbiddenGeographyIdNotice( - stopTime.csvRowNumber(), - stopTime.stopId(), - stopTime.locationGroupId(), - stopTime.locationId())); - } - } -} diff --git a/main/src/test/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidatorTest.java b/main/src/test/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidatorTest.java deleted file mode 100644 index d03d2be952..0000000000 --- a/main/src/test/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidatorTest.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright 2020 Google LLC, MobilityData IO - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.mobilitydata.gtfsvalidator.validator; - -import static com.google.common.truth.Truth.assertThat; - -import java.util.List; -import org.junit.Test; -import org.mobilitydata.gtfsvalidator.notice.ForbiddenGeographyIdNotice; -import org.mobilitydata.gtfsvalidator.notice.MissingRequiredFieldNotice; -import org.mobilitydata.gtfsvalidator.notice.NoticeContainer; -import org.mobilitydata.gtfsvalidator.notice.ValidationNotice; -import org.mobilitydata.gtfsvalidator.table.GtfsStopTime; - -public class StopTimesGeographyIdPresenceValidatorTest { - - @Test - public void NoGeographyIdShouldGenerateMissingRequiredFieldNotice() { - assertThat(validationNoticesFor(new GtfsStopTime.Builder().setCsvRowNumber(2).build())) - .containsExactly(new MissingRequiredFieldNotice("stop_times.txt", 2, "stop_id")); - } - - @Test - public void OneGeographyIdShouldGenerateNothing() { - assertThat( - validationNoticesFor( - new GtfsStopTime.Builder().setCsvRowNumber(2).setStopId("stop_id").build())) - .isEmpty(); - assertThat( - validationNoticesFor( - new GtfsStopTime.Builder().setCsvRowNumber(2).setLocationGroupId("id").build())) - .isEmpty(); - assertThat( - validationNoticesFor( - new GtfsStopTime.Builder().setCsvRowNumber(2).setLocationId("id").build())) - .isEmpty(); - } - - @Test - public void MultipleGeographyIdShouldGenerateNotice() { - assertThat( - validationNoticesFor( - new GtfsStopTime.Builder() - .setStopId("stop_id") - .setLocationGroupId("location_group_id") - .setCsvRowNumber(2) - .build())) - .containsExactly(new ForbiddenGeographyIdNotice(2, "stop_id", "location_group_id", "")); - - assertThat( - validationNoticesFor( - new GtfsStopTime.Builder() - .setStopId("stop_id") - .setLocationId("location_id") - .setCsvRowNumber(2) - .build())) - .containsExactly(new ForbiddenGeographyIdNotice(2, "stop_id", "", "location_id")); - - assertThat( - validationNoticesFor( - new GtfsStopTime.Builder() - .setLocationGroupId("location_group_id") - .setLocationId("location_id") - .setCsvRowNumber(2) - .build())) - .containsExactly(new ForbiddenGeographyIdNotice(2, "", "location_group_id", "location_id")); - - assertThat( - validationNoticesFor( - new GtfsStopTime.Builder() - .setStopId("stop_id") - .setLocationGroupId("location_group_id") - .setLocationId("location_id") - .setCsvRowNumber(2) - .build())) - .containsExactly( - new ForbiddenGeographyIdNotice(2, "stop_id", "location_group_id", "location_id")); - } - - private List validationNoticesFor(GtfsStopTime entity) { - StopTimesGeographyIdPresenceValidator validator = new StopTimesGeographyIdPresenceValidator(); - NoticeContainer noticeContainer = new NoticeContainer(); - validator.validate(entity, noticeContainer); - return noticeContainer.getValidationNotices(); - } -} From 315fc5fbd63117469a03bbd222080d85626123cd Mon Sep 17 00:00:00 2001 From: jcpitre Date: Thu, 12 Sep 2024 09:13:39 -0400 Subject: [PATCH 5/7] Temporarily removed a new foreign key annotations to investigate increase in run time --- .../gtfsvalidator/table/GtfsStopTimeSchema.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java index 80561fe0ee..af81c5764a 100644 --- a/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java +++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java @@ -58,7 +58,6 @@ public interface GtfsStopTimeSchema extends GtfsEntity { @FieldType(FieldTypeEnum.ID) @ConditionallyRequired - @ForeignKey(table = "location_groups.txt", field = "location_group_id") String locationGroupId(); @FieldType(FieldTypeEnum.ID) @@ -94,11 +93,8 @@ public interface GtfsStopTimeSchema extends GtfsEntity { @RecommendedColumn GtfsStopTimeTimepoint timepoint(); - @FieldType(FieldTypeEnum.ID) - @ForeignKey(table = "booking_rules.txt", field = "booking_rule_id") String pickupBookingRuleId(); - @FieldType(FieldTypeEnum.ID) - @ForeignKey(table = "booking_rules.txt", field = "booking_rule_id") + String dropOffBookingRuleId(); } From df91474134ff7536c88b780037264409978280f2 Mon Sep 17 00:00:00 2001 From: jcpitre Date: Thu, 12 Sep 2024 09:57:40 -0400 Subject: [PATCH 6/7] Put back everything and increase heap to 12G --- .../table/GtfsStopTimeSchema.java | 6 +- ...StopTimesGeographyIdPresenceValidator.java | 64 +++++++++++ ...TimesGeographyIdPresenceValidatorTest.java | 100 ++++++++++++++++++ .../harvest_latest_versions.py | 5 +- scripts/queue_runner.sh | 4 +- 5 files changed, 175 insertions(+), 4 deletions(-) create mode 100644 main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java create mode 100644 main/src/test/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidatorTest.java diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java index af81c5764a..80561fe0ee 100644 --- a/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java +++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/table/GtfsStopTimeSchema.java @@ -58,6 +58,7 @@ public interface GtfsStopTimeSchema extends GtfsEntity { @FieldType(FieldTypeEnum.ID) @ConditionallyRequired + @ForeignKey(table = "location_groups.txt", field = "location_group_id") String locationGroupId(); @FieldType(FieldTypeEnum.ID) @@ -93,8 +94,11 @@ public interface GtfsStopTimeSchema extends GtfsEntity { @RecommendedColumn GtfsStopTimeTimepoint timepoint(); + @FieldType(FieldTypeEnum.ID) + @ForeignKey(table = "booking_rules.txt", field = "booking_rule_id") String pickupBookingRuleId(); - + @FieldType(FieldTypeEnum.ID) + @ForeignKey(table = "booking_rules.txt", field = "booking_rule_id") String dropOffBookingRuleId(); } diff --git a/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java b/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java new file mode 100644 index 0000000000..72538e1473 --- /dev/null +++ b/main/src/main/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidator.java @@ -0,0 +1,64 @@ +/* + * Copyright 2024 MobilityData + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.mobilitydata.gtfsvalidator.validator; + +import org.mobilitydata.gtfsvalidator.annotation.GtfsValidator; +import org.mobilitydata.gtfsvalidator.notice.ForbiddenGeographyIdNotice; +import org.mobilitydata.gtfsvalidator.notice.MissingRequiredFieldNotice; +import org.mobilitydata.gtfsvalidator.notice.NoticeContainer; +import org.mobilitydata.gtfsvalidator.table.GtfsStopTime; + +/** + * Validates that only one of stop_id, location_group_id or location_id is defined in a given record + * of stop_times.txt + * + *

Generated notice: {@link MissingRequiredFieldNotice}. + * + *

Generated notice: {@link ForbiddenGeographyIdNotice}. + */ +@GtfsValidator +public class StopTimesGeographyIdPresenceValidator extends SingleEntityValidator { + + @Override + public void validate(GtfsStopTime stopTime, NoticeContainer noticeContainer) { + int presenceCount = 0; + if (stopTime.hasStopId()) { + presenceCount++; + } + if (stopTime.hasLocationGroupId()) { + presenceCount++; + } + + if (stopTime.hasLocationId()) { + presenceCount++; + } + + if (presenceCount == 0) { + // None of the 3 geography IDs are present, but we need at least stop_id + noticeContainer.addValidationNotice( + new MissingRequiredFieldNotice( + GtfsStopTime.FILENAME, stopTime.csvRowNumber(), GtfsStopTime.STOP_ID_FIELD_NAME)); + } else if (presenceCount > 1) { + // More than one geography ID is present, but only one is allowed + noticeContainer.addValidationNotice( + new ForbiddenGeographyIdNotice( + stopTime.csvRowNumber(), + stopTime.stopId(), + stopTime.locationGroupId(), + stopTime.locationId())); + } + } +} diff --git a/main/src/test/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidatorTest.java b/main/src/test/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidatorTest.java new file mode 100644 index 0000000000..d03d2be952 --- /dev/null +++ b/main/src/test/java/org/mobilitydata/gtfsvalidator/validator/StopTimesGeographyIdPresenceValidatorTest.java @@ -0,0 +1,100 @@ +/* + * Copyright 2020 Google LLC, MobilityData IO + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.mobilitydata.gtfsvalidator.validator; + +import static com.google.common.truth.Truth.assertThat; + +import java.util.List; +import org.junit.Test; +import org.mobilitydata.gtfsvalidator.notice.ForbiddenGeographyIdNotice; +import org.mobilitydata.gtfsvalidator.notice.MissingRequiredFieldNotice; +import org.mobilitydata.gtfsvalidator.notice.NoticeContainer; +import org.mobilitydata.gtfsvalidator.notice.ValidationNotice; +import org.mobilitydata.gtfsvalidator.table.GtfsStopTime; + +public class StopTimesGeographyIdPresenceValidatorTest { + + @Test + public void NoGeographyIdShouldGenerateMissingRequiredFieldNotice() { + assertThat(validationNoticesFor(new GtfsStopTime.Builder().setCsvRowNumber(2).build())) + .containsExactly(new MissingRequiredFieldNotice("stop_times.txt", 2, "stop_id")); + } + + @Test + public void OneGeographyIdShouldGenerateNothing() { + assertThat( + validationNoticesFor( + new GtfsStopTime.Builder().setCsvRowNumber(2).setStopId("stop_id").build())) + .isEmpty(); + assertThat( + validationNoticesFor( + new GtfsStopTime.Builder().setCsvRowNumber(2).setLocationGroupId("id").build())) + .isEmpty(); + assertThat( + validationNoticesFor( + new GtfsStopTime.Builder().setCsvRowNumber(2).setLocationId("id").build())) + .isEmpty(); + } + + @Test + public void MultipleGeographyIdShouldGenerateNotice() { + assertThat( + validationNoticesFor( + new GtfsStopTime.Builder() + .setStopId("stop_id") + .setLocationGroupId("location_group_id") + .setCsvRowNumber(2) + .build())) + .containsExactly(new ForbiddenGeographyIdNotice(2, "stop_id", "location_group_id", "")); + + assertThat( + validationNoticesFor( + new GtfsStopTime.Builder() + .setStopId("stop_id") + .setLocationId("location_id") + .setCsvRowNumber(2) + .build())) + .containsExactly(new ForbiddenGeographyIdNotice(2, "stop_id", "", "location_id")); + + assertThat( + validationNoticesFor( + new GtfsStopTime.Builder() + .setLocationGroupId("location_group_id") + .setLocationId("location_id") + .setCsvRowNumber(2) + .build())) + .containsExactly(new ForbiddenGeographyIdNotice(2, "", "location_group_id", "location_id")); + + assertThat( + validationNoticesFor( + new GtfsStopTime.Builder() + .setStopId("stop_id") + .setLocationGroupId("location_group_id") + .setLocationId("location_id") + .setCsvRowNumber(2) + .build())) + .containsExactly( + new ForbiddenGeographyIdNotice(2, "stop_id", "location_group_id", "location_id")); + } + + private List validationNoticesFor(GtfsStopTime entity) { + StopTimesGeographyIdPresenceValidator validator = new StopTimesGeographyIdPresenceValidator(); + NoticeContainer noticeContainer = new NoticeContainer(); + validator.validate(entity, noticeContainer); + return noticeContainer.getValidationNotices(); + } +} diff --git a/scripts/mobility-database-harvester/harvest_latest_versions.py b/scripts/mobility-database-harvester/harvest_latest_versions.py index 0ddef96fba..dcf3de331c 100644 --- a/scripts/mobility-database-harvester/harvest_latest_versions.py +++ b/scripts/mobility-database-harvester/harvest_latest_versions.py @@ -134,7 +134,10 @@ def harvest_latest_versions(to_sample): for index, latest_url in catalogs_gtfs[LATEST_URL].items(): source_file_name = latest_url.replace(URL_PREFIX, "").replace(URL_SUFFIX, "") - latest_versions[source_file_name] = latest_url + # Only keep the source that contains gb-unknown-uk-aggregate-feed-gtfs-2014 for testing + # Remove before merging + if "gb-unknown-uk-aggregate-feed-gtfs-2014" in source_file_name: + latest_versions[source_file_name] = latest_url # Some sources/datasets are too big for the workflow so we are excluding them. for source in SOURCES_TO_EXCLUDE: diff --git a/scripts/queue_runner.sh b/scripts/queue_runner.sh index c8a4183af0..3a2748ce4c 100644 --- a/scripts/queue_runner.sh +++ b/scripts/queue_runner.sh @@ -12,10 +12,10 @@ do ID=$(jq '.id' <<< "$item") URL=$(jq '.url' <<< "$item") path_name=${ID//\"/} - java -Xmx10G -Xms8G -jar gtfs-validator-snapshot/gtfs-validator*.jar --url $URL --output_base $OUTPUT_BASE/output/$path_name --validation_report_name latest.json --system_errors_report_name latest_errors.json --skip_validator_update + java -Xmx12G -Xms8G -jar gtfs-validator-snapshot/gtfs-validator*.jar --url $URL --output_base $OUTPUT_BASE/output/$path_name --validation_report_name latest.json --system_errors_report_name latest_errors.json --skip_validator_update if [ "$master" = "--include-master" ]; then - java -Xmx10G -Xms8G -jar gtfs-validator-master/gtfs-validator*.jar --url $URL --output_base $OUTPUT_BASE/output/$path_name --validation_report_name reference.json --system_errors_report_name reference_errors.json --skip_validator_update + java -Xmx12G -Xms8G -jar gtfs-validator-master/gtfs-validator*.jar --url $URL --output_base $OUTPUT_BASE/output/$path_name --validation_report_name reference.json --system_errors_report_name reference_errors.json --skip_validator_update fi; wait done From 92c8c678875959b2a7b0d053c30d52a09efa9dee Mon Sep 17 00:00:00 2001 From: jcpitre Date: Thu, 12 Sep 2024 10:25:17 -0400 Subject: [PATCH 7/7] Removed temporary testing code. --- .../mobility-database-harvester/harvest_latest_versions.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scripts/mobility-database-harvester/harvest_latest_versions.py b/scripts/mobility-database-harvester/harvest_latest_versions.py index dcf3de331c..0ddef96fba 100644 --- a/scripts/mobility-database-harvester/harvest_latest_versions.py +++ b/scripts/mobility-database-harvester/harvest_latest_versions.py @@ -134,10 +134,7 @@ def harvest_latest_versions(to_sample): for index, latest_url in catalogs_gtfs[LATEST_URL].items(): source_file_name = latest_url.replace(URL_PREFIX, "").replace(URL_SUFFIX, "") - # Only keep the source that contains gb-unknown-uk-aggregate-feed-gtfs-2014 for testing - # Remove before merging - if "gb-unknown-uk-aggregate-feed-gtfs-2014" in source_file_name: - latest_versions[source_file_name] = latest_url + latest_versions[source_file_name] = latest_url # Some sources/datasets are too big for the workflow so we are excluding them. for source in SOURCES_TO_EXCLUDE: