From d8e682bc67d52214231a9b29cd53058e9f3e9416 Mon Sep 17 00:00:00 2001 From: Ryan Goetz Date: Thu, 25 Jul 2024 08:26:36 -1000 Subject: [PATCH 1/5] Find an activity's stacktrace from a span --- .../merlin/driver/engine/DirectiveDetail.java | 10 +++++++ .../driver/engine/SimulationEngine.java | 28 ++++++++++++------- 2 files changed, 28 insertions(+), 10 deletions(-) create mode 100644 merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/engine/DirectiveDetail.java diff --git a/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/engine/DirectiveDetail.java b/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/engine/DirectiveDetail.java new file mode 100644 index 0000000000..ab2531069c --- /dev/null +++ b/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/engine/DirectiveDetail.java @@ -0,0 +1,10 @@ +package gov.nasa.jpl.aerie.merlin.driver.engine; + +import gov.nasa.jpl.aerie.merlin.driver.ActivityDirectiveId; +import gov.nasa.jpl.aerie.merlin.driver.SerializedActivity; +import gov.nasa.jpl.aerie.merlin.protocol.types.SerializedValue; + +import java.util.List; +import java.util.Optional; + +public record DirectiveDetail(Optional directiveId, List activityStackTrace) {} diff --git a/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/engine/SimulationEngine.java b/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/engine/SimulationEngine.java index 585012c67f..d72059e897 100644 --- a/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/engine/SimulationEngine.java +++ b/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/engine/SimulationEngine.java @@ -53,6 +53,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.function.Consumer; +import java.util.stream.Collectors; /** * A representation of the work remaining to do during a simulation, and its accumulated results. @@ -676,29 +677,36 @@ void extractOutput(final SerializableTopic topic, final Event ev, final SpanI } } + /** * Get an Activity Directive Id from a SpanId, if the span is a descendent of a directive. */ - public Optional getDirectiveIdFromSpan( + public DirectiveDetail getDirectiveDetailsFromSpan( final Topic activityTopic, final Iterable> serializableTopics, final SpanId spanId ) { // Collect per-span information from the event graph. - final var spanInfo = new SpanInfo(); - for (final var point : this.timeline) { - if (!(point instanceof TemporalEventSource.TimePoint.Commit p)) continue; - - final var trait = new SpanInfo.Trait(serializableTopics, activityTopic); - p.events().evaluate(trait, trait::atom).accept(spanInfo); - } + final var spanInfo = computeSpanInfo(activityTopic, serializableTopics, this.timeline); - // Identify the nearest ancestor directive + // Identify the nearest ancestor directive by walking up the parent + // span tree. Save the activity trace along the way Optional directiveSpanId = Optional.of(spanId); + final var activityStackTrace = new LinkedList(); while (directiveSpanId.isPresent() && !spanInfo.isDirective(directiveSpanId.get())) { + activityStackTrace.add(spanInfo.input().get(directiveSpanId.get())); directiveSpanId = this.getSpan(directiveSpanId.get()).parent(); } - return directiveSpanId.map(spanInfo::getDirective); + + // Add final top level parent activity to the stack trace if present + if( directiveSpanId.isPresent()){ + activityStackTrace.add(spanInfo.input().get(directiveSpanId.get())); + } + + return new DirectiveDetail( + directiveSpanId.map(spanInfo::getDirective), + // remove null activities from the stack trace and reverse order + activityStackTrace.stream().filter(a -> a != null).collect(Collectors.toList()).reversed()); } public record SimulationActivityExtract( From c1b4786ed1794b50f28702157e903d01cbd48cbe Mon Sep 17 00:00:00 2001 From: Ryan Goetz Date: Thu, 25 Jul 2024 08:27:27 -1000 Subject: [PATCH 2/5] Pass along stack trace to the SimulationException --- .../aerie/merlin/driver/CachedSimulationEngine.java | 11 ++++++++--- .../merlin/driver/CheckpointSimulationDriver.java | 11 ++++++++--- .../jpl/aerie/merlin/driver/SimulationDriver.java | 11 ++++++++--- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/CachedSimulationEngine.java b/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/CachedSimulationEngine.java index 9b706ec425..2ce636965a 100644 --- a/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/CachedSimulationEngine.java +++ b/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/CachedSimulationEngine.java @@ -40,9 +40,14 @@ public static CachedSimulationEngine empty(final MissionModel missionModel, f } catch (SpanException ex) { // Swallowing the spanException as the internal `spanId` is not user meaningful info. final var topics = missionModel.getTopics(); - final var directiveId = engine.getDirectiveIdFromSpan(activityTopic, topics, ex.spanId); - if (directiveId.isPresent()) { - throw new SimulationException(Duration.ZERO, simulationStartTime, directiveId.get(), ex.cause); + final var directiveDetail = engine.getDirectiveDetailsFromSpan(activityTopic, topics, ex.spanId); + if (directiveDetail.directiveId().isPresent()) { + throw new SimulationException( + Duration.ZERO, + simulationStartTime, + directiveDetail.directiveId().get(), + directiveDetail.activityStackTrace(), + ex.cause); } throw new SimulationException(Duration.ZERO, simulationStartTime, ex.cause); } catch (Throwable ex) { diff --git a/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/CheckpointSimulationDriver.java b/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/CheckpointSimulationDriver.java index ba8af75fff..6f6d42cee8 100644 --- a/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/CheckpointSimulationDriver.java +++ b/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/CheckpointSimulationDriver.java @@ -305,9 +305,14 @@ public static SimulationResultsComputerInputs simulateWithCheckpoints( elapsedTime = engine.getElapsedTime(); // Swallowing the spanException as the internal `spanId` is not user meaningful info. final var topics = missionModel.getTopics(); - final var directiveId = engine.getDirectiveIdFromSpan(activityTopic, topics, ex.spanId); - if (directiveId.isPresent()) { - throw new SimulationException(elapsedTime, simulationStartTime, directiveId.get(), ex.cause); + final var directiveDetail = engine.getDirectiveDetailsFromSpan(activityTopic, topics, ex.spanId); + if (directiveDetail.directiveId().isPresent()) { + throw new SimulationException( + elapsedTime, + simulationStartTime, + directiveDetail.directiveId().get(), + directiveDetail.activityStackTrace(), + ex.cause); } throw new SimulationException(elapsedTime, simulationStartTime, ex.cause); } catch (Throwable ex) { diff --git a/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/SimulationDriver.java b/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/SimulationDriver.java index e92e748758..29f7ea7dcf 100644 --- a/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/SimulationDriver.java +++ b/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/SimulationDriver.java @@ -109,9 +109,14 @@ public static SimulationResults simulate( } catch (SpanException ex) { // Swallowing the spanException as the internal `spanId` is not user meaningful info. final var topics = missionModel.getTopics(); - final var directiveId = engine.getDirectiveIdFromSpan(activityTopic, topics, ex.spanId); - if(directiveId.isPresent()) { - throw new SimulationException(engine.getElapsedTime(), simulationStartTime, directiveId.get(), ex.cause); + final var directiveDetail = engine.getDirectiveDetailsFromSpan(activityTopic, topics, ex.spanId); + if(directiveDetail.directiveId().isPresent()) { + throw new SimulationException( + engine.getElapsedTime(), + simulationStartTime, + directiveDetail.directiveId().get(), + directiveDetail.activityStackTrace(), + ex.cause); } throw new SimulationException(engine.getElapsedTime(), simulationStartTime, ex.cause); } catch (Throwable ex) { From 684f1234f43f16c9f77ee18c66018c1a637b550e Mon Sep 17 00:00:00 2001 From: Ryan Goetz Date: Thu, 25 Jul 2024 08:28:49 -1000 Subject: [PATCH 3/5] Add additional messages to SimulationException * Added effected activity type * Added a stack trace showing that hierarchy --- .../merlin/driver/SimulationException.java | 18 +++++++++++++++++- .../server/services/SimulationAgent.java | 2 ++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/SimulationException.java b/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/SimulationException.java index acd8e1fadf..87548c924e 100644 --- a/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/SimulationException.java +++ b/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/SimulationException.java @@ -7,7 +7,9 @@ import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; import java.time.temporal.ChronoField; +import java.util.List; import java.util.Optional; +import java.util.stream.Collectors; import static gov.nasa.jpl.aerie.merlin.protocol.types.Duration.HOUR; import static gov.nasa.jpl.aerie.merlin.protocol.types.Duration.MICROSECOND; @@ -28,20 +30,34 @@ public class SimulationException extends RuntimeException { public final Instant instant; public final Throwable cause; public final Optional directiveId; + public final Optional activityType; + public final Optional activityStackTrace; public SimulationException(final Duration elapsedTime, final Instant startTime, final Throwable cause) { super("Exception occurred " + formatDuration(elapsedTime) + " into the simulation at " + formatInstant(addDurationToInstant(startTime, elapsedTime)), cause); this.directiveId = Optional.empty(); + this.activityType = Optional.empty(); + this.activityStackTrace = Optional.empty(); this.elapsedTime = elapsedTime; this.instant = addDurationToInstant(startTime, elapsedTime); this.cause = cause; } - public SimulationException(final Duration elapsedTime, final Instant startTime, final ActivityDirectiveId directiveId, final Throwable cause) { + public SimulationException( + final Duration elapsedTime, + final Instant startTime, + final ActivityDirectiveId directiveId, + final List activityStackTrace, + final Throwable cause) { super("Exception occurred " + formatDuration(elapsedTime) + " into the simulation at " + formatInstant(addDurationToInstant(startTime, elapsedTime)) + " while simulating activity directive with id " +directiveId.id(), cause); this.directiveId = Optional.of(directiveId); + this.activityType = activityStackTrace.isEmpty() ? Optional.empty() : Optional.of(activityStackTrace.getFirst().getTypeName()); + this.activityStackTrace = activityStackTrace.isEmpty() ? Optional.empty(): Optional.of(activityStackTrace.stream().map( serializedActivity -> { + final var index = activityStackTrace.indexOf(serializedActivity); + return (index > 0 ? "|" : "") +"-".repeat(index) + serializedActivity.getTypeName(); + }).collect(Collectors.joining("\n"))); this.elapsedTime = elapsedTime; this.instant = addDurationToInstant(startTime, elapsedTime); this.cause = cause; diff --git a/merlin-server/src/main/java/gov/nasa/jpl/aerie/merlin/server/services/SimulationAgent.java b/merlin-server/src/main/java/gov/nasa/jpl/aerie/merlin/server/services/SimulationAgent.java index 0023d6a04e..b999b3e541 100644 --- a/merlin-server/src/main/java/gov/nasa/jpl/aerie/merlin/server/services/SimulationAgent.java +++ b/merlin-server/src/main/java/gov/nasa/jpl/aerie/merlin/server/services/SimulationAgent.java @@ -97,6 +97,8 @@ public void simulate( .add("elapsedTime", SimulationException.formatDuration(ex.elapsedTime)) .add("utcTimeDoy", SimulationException.formatInstant(ex.instant)); ex.directiveId.ifPresent(directiveId -> errorMsgBuilder.add("executingDirectiveId", directiveId.id())); + ex.activityType.ifPresent(activityType -> errorMsgBuilder.add("executingActivityType", activityType)); + ex.activityStackTrace.ifPresent(activityStackTrace -> errorMsgBuilder.add("activityStackTrace", activityStackTrace)); writer.failWith(b -> b .type("SIMULATION_EXCEPTION") .message(ex.cause.getMessage()) From 3cff3f43c73a106b0ebe3d452ebd43f1519e4674 Mon Sep 17 00:00:00 2001 From: Ryan Goetz Date: Thu, 25 Jul 2024 08:34:35 -1000 Subject: [PATCH 4/5] Updated the e2e test * Create a Parent Activity, which will be used to spawn children 2 levels deep. This allows for the error message to have a more detail stacktrace * Look for the stacktrace and effected activity within the error message. --- .../nasa/jpl/aerie/e2e/SimulationTests.java | 31 +++++++++++++++---- .../activities/DaemonTaskActivity.java | 25 +++++++++++++++ .../aerie/foomissionmodel/package-info.java | 2 ++ 3 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 examples/foo-missionmodel/src/main/java/gov/nasa/jpl/aerie/foomissionmodel/activities/DaemonTaskActivity.java diff --git a/e2e-tests/src/test/java/gov/nasa/jpl/aerie/e2e/SimulationTests.java b/e2e-tests/src/test/java/gov/nasa/jpl/aerie/e2e/SimulationTests.java index 2ee2202b6f..d509cd36e2 100644 --- a/e2e-tests/src/test/java/gov/nasa/jpl/aerie/e2e/SimulationTests.java +++ b/e2e-tests/src/test/java/gov/nasa/jpl/aerie/e2e/SimulationTests.java @@ -469,8 +469,13 @@ void directiveIdIncluded() throws IOException { final var exception = reason.data(); // The error includes the directive id that was executing + // and the executing activity type and stack trace assertTrue(exception.containsKey("executingDirectiveId")); + assertTrue(exception.containsKey("executingActivityType")); assertEquals(dirId, exception.getInt("executingDirectiveId")); + assertEquals("DaemonCheckerActivity", exception.getString("executingActivityType")); + assertTrue(exception.containsKey("activityStackTrace")); + assertEquals("DaemonCheckerActivity", exception.getString("activityStackTrace")); // The error message is correct assertEquals("Minutes elapsed is incorrect. TimeTrackerDaemon may have stopped.\n\tExpected: 1 Actual: 59", @@ -496,7 +501,7 @@ void directiveIdOnDescendant() throws IOException { // Setup: Insert a directive that will throw int dirId = hasura.insertActivity( fooPlan, - "DaemonCheckerSpawner", + "DaemonTaskActivity", "01:00:00", Json.createObjectBuilder().add("minutesElapsed", 1).add("spawnDelay", 1).build()); @@ -512,21 +517,29 @@ void directiveIdOnDescendant() throws IOException { final var exception = reason.data(); // The error includes the directive id that was executing + // and the executing activity type and stack trace assertTrue(exception.containsKey("executingDirectiveId")); assertEquals(dirId, exception.getInt("executingDirectiveId")); + assertTrue(exception.containsKey("executingActivityType")); + assertEquals(dirId, exception.getInt("executingDirectiveId")); + assertEquals("DaemonTaskActivity", exception.getString("executingActivityType")); + assertTrue(exception.containsKey("activityStackTrace")); + assertEquals("DaemonTaskActivity\n" + + "|-DaemonCheckerSpawner\n" + + "|--DaemonCheckerActivity", exception.getString("activityStackTrace")); // The error message is correct - assertEquals("Minutes elapsed is incorrect. TimeTrackerDaemon may have stopped.\n\tExpected: 1 Actual: 60", + assertEquals("Minutes elapsed is incorrect. TimeTrackerDaemon may have stopped.\n\tExpected: 1 Actual: 61", reason.message()); // The error was thrown at the correct time assertTrue(exception.containsKey("utcTimeDoy")); - assertEquals("2023-001T01:01:00", exception.getString("utcTimeDoy")); + assertEquals("2023-001T01:02:00", exception.getString("utcTimeDoy")); // The trace starts at the original exception and doesn't include the intermediary SpanException and SimulationException final var expectedStart = """ java.lang.RuntimeException: Minutes elapsed is incorrect. TimeTrackerDaemon may have stopped. - \tExpected: 1 Actual: 60 + \tExpected: 1 Actual: 61 \tat gov.nasa.jpl.aerie.foomissionmodel.activities.DaemonCheckerActivity.run(DaemonCheckerActivity.java"""; assertTrue(reason.trace().startsWith(expectedStart)); } @@ -551,8 +564,11 @@ void noDirectiveIdOnDaemon() throws IOException { final var exception = reason.data(); - // The error does not include any directive ids + // The error does not include any directive ids, + // executingActivityType, and activityStackTrace assertFalse(exception.containsKey("executingDirectiveId")); + assertFalse(exception.containsKey("executingActivityType")); + assertFalse(exception.containsKey("activityStackTrace")); // The error message is correct assertEquals("Daemon task exception raised.", reason.message()); @@ -594,8 +610,11 @@ void noDirectiveIdDaemonMidDirective() throws IOException { final var exception = reason.data(); - // The error does not include any directive ids + // The error does not include any directive ids, + // executingActivityType, and activityStackTrace assertFalse(exception.containsKey("executingDirectiveId")); + assertFalse(exception.containsKey("executingActivityType")); + assertFalse(exception.containsKey("activityStackTrace")); // The error message is correct assertEquals("Daemon task exception raised.", reason.message()); diff --git a/examples/foo-missionmodel/src/main/java/gov/nasa/jpl/aerie/foomissionmodel/activities/DaemonTaskActivity.java b/examples/foo-missionmodel/src/main/java/gov/nasa/jpl/aerie/foomissionmodel/activities/DaemonTaskActivity.java new file mode 100644 index 0000000000..a9be5dac1f --- /dev/null +++ b/examples/foo-missionmodel/src/main/java/gov/nasa/jpl/aerie/foomissionmodel/activities/DaemonTaskActivity.java @@ -0,0 +1,25 @@ +package gov.nasa.jpl.aerie.foomissionmodel.activities; + +import gov.nasa.jpl.aerie.foomissionmodel.Mission; +import gov.nasa.jpl.aerie.merlin.framework.annotations.ActivityType; +import gov.nasa.jpl.aerie.merlin.protocol.types.Duration; + +import static gov.nasa.jpl.aerie.foomissionmodel.generated.ActivityActions.call; +import static gov.nasa.jpl.aerie.merlin.framework.ModelActions.delay; + +/** + * An activity that spawns a DaemonCheckerSpawner which spawns a DaemonCheckerActivity after delaying. + * Useful for testing the behavior of exceptions thrown by child activities and the stack trace back + * to this activity. + * + * @param minutesElapsed The expected number of minutes elapsed when the DaemonCheckerActivity begins + * @param spawnDelay The number of minutes to delay between the start of this activity and spawning the DaemonCheckerActivity + */ +@ActivityType("DaemonTaskActivity") +public record DaemonTaskActivity(int minutesElapsed, int spawnDelay) { + @ActivityType.EffectModel + public void run(final Mission mission) { + delay(Duration.of(spawnDelay, Duration.MINUTE)); + call(mission, new DaemonCheckerSpawner(minutesElapsed,spawnDelay)); + } +} diff --git a/examples/foo-missionmodel/src/main/java/gov/nasa/jpl/aerie/foomissionmodel/package-info.java b/examples/foo-missionmodel/src/main/java/gov/nasa/jpl/aerie/foomissionmodel/package-info.java index 582e3422f5..1c09bdc0d5 100644 --- a/examples/foo-missionmodel/src/main/java/gov/nasa/jpl/aerie/foomissionmodel/package-info.java +++ b/examples/foo-missionmodel/src/main/java/gov/nasa/jpl/aerie/foomissionmodel/package-info.java @@ -16,6 +16,7 @@ @WithActivityType(ZeroDurationUncontrollableActivity.class) @WithActivityType(DaemonCheckerActivity.class) @WithActivityType(DaemonCheckerSpawner.class) +@WithActivityType(DaemonTaskActivity.class) @WithActivityType(DecompositionTestActivities.ParentActivity.class) @WithActivityType(DecompositionTestActivities.ChildActivity.class) @@ -30,6 +31,7 @@ import gov.nasa.jpl.aerie.foomissionmodel.activities.ControllableDurationActivity; import gov.nasa.jpl.aerie.foomissionmodel.activities.DaemonCheckerActivity; import gov.nasa.jpl.aerie.foomissionmodel.activities.DaemonCheckerSpawner; +import gov.nasa.jpl.aerie.foomissionmodel.activities.DaemonTaskActivity; import gov.nasa.jpl.aerie.foomissionmodel.activities.DecompositionTestActivities; import gov.nasa.jpl.aerie.foomissionmodel.activities.FooActivity; import gov.nasa.jpl.aerie.foomissionmodel.activities.LateRiserActivity; From e2a8247b568d277e9c9d1520be8a075a19c2a741 Mon Sep 17 00:00:00 2001 From: Ryan Goetz <70245883+goetzrrGit@users.noreply.github.com> Date: Mon, 29 Jul 2024 11:31:18 -0700 Subject: [PATCH 5/5] Update merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/engine/SimulationEngine.java minor update with spacing Co-authored-by: Mythicaeda --- .../nasa/jpl/aerie/merlin/driver/engine/SimulationEngine.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/engine/SimulationEngine.java b/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/engine/SimulationEngine.java index d72059e897..8344f129b0 100644 --- a/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/engine/SimulationEngine.java +++ b/merlin-driver/src/main/java/gov/nasa/jpl/aerie/merlin/driver/engine/SimulationEngine.java @@ -699,7 +699,7 @@ public DirectiveDetail getDirectiveDetailsFromSpan( } // Add final top level parent activity to the stack trace if present - if( directiveSpanId.isPresent()){ + if (directiveSpanId.isPresent()) { activityStackTrace.add(spanInfo.input().get(directiveSpanId.get())); }