From 1642eafd90b805fa382a6f13a1597150f2231a68 Mon Sep 17 00:00:00 2001 From: Krishnanand V P <44740264+incrypto32@users.noreply.github.com> Date: Fri, 11 Oct 2024 11:36:50 +0100 Subject: [PATCH] store: Add more debug logs when subgraph is marked unhealthy (#5662) --- store/postgres/src/deployment.rs | 29 ++++++++++++++++++++++---- store/postgres/src/deployment_store.rs | 10 ++++++++- store/postgres/src/relational.rs | 3 ++- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/store/postgres/src/deployment.rs b/store/postgres/src/deployment.rs index 998070658eb..efe05a666b9 100644 --- a/store/postgres/src/deployment.rs +++ b/store/postgres/src/deployment.rs @@ -14,8 +14,11 @@ use diesel::{ sql_types::{Nullable, Text}, }; use graph::{ - blockchain::block_stream::FirehoseCursor, data::subgraph::schema::SubgraphError, env::ENV_VARS, + blockchain::block_stream::FirehoseCursor, + data::subgraph::schema::SubgraphError, + env::ENV_VARS, schema::EntityType, + slog::{debug, Logger}, }; use graph::{ data::store::scalar::ToPrimitive, @@ -890,16 +893,24 @@ pub fn update_deployment_status( /// is healthy as of that block; errors are inserted according to the /// `block_ptr` they contain pub(crate) fn insert_subgraph_errors( + logger: &Logger, conn: &mut PgConnection, id: &DeploymentHash, deterministic_errors: &[SubgraphError], latest_block: BlockNumber, ) -> Result<(), StoreError> { + debug!( + logger, + "Inserting deterministic errors to the db"; + "subgraph" => id.to_string(), + "errors" => deterministic_errors.len() + ); + for error in deterministic_errors { insert_subgraph_error(conn, error)?; } - check_health(conn, id, latest_block) + check_health(logger, conn, id, latest_block) } #[cfg(debug_assertions)] @@ -918,6 +929,7 @@ pub(crate) fn error_count( /// Checks if the subgraph is healthy or unhealthy as of the given block, or the subgraph latest /// block if `None`, based on the presence of deterministic errors. Has no effect on failed subgraphs. fn check_health( + logger: &Logger, conn: &mut PgConnection, id: &DeploymentHash, block: BlockNumber, @@ -927,7 +939,15 @@ fn check_health( let has_errors = has_deterministic_errors(conn, id, block)?; let (new, old) = match has_errors { - true => (SubgraphHealth::Unhealthy, SubgraphHealth::Healthy), + true => { + debug!( + logger, + "Subgraph has deterministic errors. Marking as unhealthy"; + "subgraph" => id.to_string(), + "block" => block + ); + (SubgraphHealth::Unhealthy, SubgraphHealth::Healthy) + } false => (SubgraphHealth::Healthy, SubgraphHealth::Unhealthy), }; @@ -979,6 +999,7 @@ pub(crate) fn entities_with_causality_region( /// Reverts the errors and updates the subgraph health if necessary. pub(crate) fn revert_subgraph_errors( + logger: &Logger, conn: &mut PgConnection, id: &DeploymentHash, reverted_block: BlockNumber, @@ -997,7 +1018,7 @@ pub(crate) fn revert_subgraph_errors( // The result will be the same at `reverted_block` or `reverted_block - 1` since the errors at // `reverted_block` were just deleted, but semantically we care about `reverted_block - 1` which // is the block being reverted to. - check_health(conn, id, reverted_block - 1)?; + check_health(&logger, conn, id, reverted_block - 1)?; // If the deployment is failed in both `failed` and `status` columns, // update both values respectively to `false` and `healthy`. Basically diff --git a/store/postgres/src/deployment_store.rs b/store/postgres/src/deployment_store.rs index 238d51397b2..5d418987e35 100644 --- a/store/postgres/src/deployment_store.rs +++ b/store/postgres/src/deployment_store.rs @@ -1138,6 +1138,7 @@ impl DeploymentStore { if !batch.deterministic_errors.is_empty() { deployment::insert_subgraph_errors( + &self.logger, conn, &site.deployment, &batch.deterministic_errors, @@ -1145,6 +1146,12 @@ impl DeploymentStore { )?; if batch.is_non_fatal_errors_active { + debug!( + logger, + "Updating non-fatal errors for subgraph"; + "subgraph" => site.deployment.to_string(), + "block" => batch.block_ptr.number, + ); deployment::update_non_fatal_errors( conn, &site.deployment, @@ -1273,6 +1280,7 @@ impl DeploymentStore { firehose_cursor: &FirehoseCursor, truncate: bool, ) -> Result { + let logger = self.logger.cheap_clone(); let event = deployment::with_lock(conn, &site, |conn| { conn.transaction(|conn| -> Result<_, StoreError> { // The revert functions want the number of the first block that we need to get rid of @@ -1303,7 +1311,7 @@ impl DeploymentStore { // importantly creation of dynamic data sources. We ensure in the // rest of the code that we only record history for those meta data // changes that might need to be reverted - Layout::revert_metadata(conn, &site, block)?; + Layout::revert_metadata(&logger, conn, &site, block)?; Ok(event) }) diff --git a/store/postgres/src/relational.rs b/store/postgres/src/relational.rs index 8ceb8d9c714..593ad386889 100644 --- a/store/postgres/src/relational.rs +++ b/store/postgres/src/relational.rs @@ -930,12 +930,13 @@ impl Layout { /// For metadata, reversion always means deletion since the metadata that /// is subject to reversion is only ever created but never updated pub fn revert_metadata( + logger: &Logger, conn: &mut PgConnection, site: &Site, block: BlockNumber, ) -> Result<(), StoreError> { crate::dynds::revert(conn, site, block)?; - crate::deployment::revert_subgraph_errors(conn, &site.deployment, block)?; + crate::deployment::revert_subgraph_errors(logger, conn, &site.deployment, block)?; Ok(()) }