Skip to content

Commit

Permalink
store: Add more debug logs when subgraph is marked unhealthy (#5662)
Browse files Browse the repository at this point in the history
  • Loading branch information
incrypto32 authored Oct 11, 2024
1 parent fbb4589 commit 1642eaf
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 6 deletions.
29 changes: 25 additions & 4 deletions store/postgres/src/deployment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@ use diesel::{
sql_types::{Nullable, Text},
};
use graph::{
blockchain::block_stream::FirehoseCursor, data::subgraph::schema::SubgraphError, env::ENV_VARS,
blockchain::block_stream::FirehoseCursor,
data::subgraph::schema::SubgraphError,
env::ENV_VARS,
schema::EntityType,
slog::{debug, Logger},
};
use graph::{
data::store::scalar::ToPrimitive,
Expand Down Expand Up @@ -890,16 +893,24 @@ pub fn update_deployment_status(
/// is healthy as of that block; errors are inserted according to the
/// `block_ptr` they contain
pub(crate) fn insert_subgraph_errors(
logger: &Logger,
conn: &mut PgConnection,
id: &DeploymentHash,
deterministic_errors: &[SubgraphError],
latest_block: BlockNumber,
) -> Result<(), StoreError> {
debug!(
logger,
"Inserting deterministic errors to the db";
"subgraph" => id.to_string(),
"errors" => deterministic_errors.len()
);

for error in deterministic_errors {
insert_subgraph_error(conn, error)?;
}

check_health(conn, id, latest_block)
check_health(logger, conn, id, latest_block)
}

#[cfg(debug_assertions)]
Expand All @@ -918,6 +929,7 @@ pub(crate) fn error_count(
/// Checks if the subgraph is healthy or unhealthy as of the given block, or the subgraph latest
/// block if `None`, based on the presence of deterministic errors. Has no effect on failed subgraphs.
fn check_health(
logger: &Logger,
conn: &mut PgConnection,
id: &DeploymentHash,
block: BlockNumber,
Expand All @@ -927,7 +939,15 @@ fn check_health(
let has_errors = has_deterministic_errors(conn, id, block)?;

let (new, old) = match has_errors {
true => (SubgraphHealth::Unhealthy, SubgraphHealth::Healthy),
true => {
debug!(
logger,
"Subgraph has deterministic errors. Marking as unhealthy";
"subgraph" => id.to_string(),
"block" => block
);
(SubgraphHealth::Unhealthy, SubgraphHealth::Healthy)
}
false => (SubgraphHealth::Healthy, SubgraphHealth::Unhealthy),
};

Expand Down Expand Up @@ -979,6 +999,7 @@ pub(crate) fn entities_with_causality_region(

/// Reverts the errors and updates the subgraph health if necessary.
pub(crate) fn revert_subgraph_errors(
logger: &Logger,
conn: &mut PgConnection,
id: &DeploymentHash,
reverted_block: BlockNumber,
Expand All @@ -997,7 +1018,7 @@ pub(crate) fn revert_subgraph_errors(
// The result will be the same at `reverted_block` or `reverted_block - 1` since the errors at
// `reverted_block` were just deleted, but semantically we care about `reverted_block - 1` which
// is the block being reverted to.
check_health(conn, id, reverted_block - 1)?;
check_health(&logger, conn, id, reverted_block - 1)?;

// If the deployment is failed in both `failed` and `status` columns,
// update both values respectively to `false` and `healthy`. Basically
Expand Down
10 changes: 9 additions & 1 deletion store/postgres/src/deployment_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1138,13 +1138,20 @@ impl DeploymentStore {

if !batch.deterministic_errors.is_empty() {
deployment::insert_subgraph_errors(
&self.logger,
conn,
&site.deployment,
&batch.deterministic_errors,
batch.block_ptr.number,
)?;

if batch.is_non_fatal_errors_active {
debug!(
logger,
"Updating non-fatal errors for subgraph";
"subgraph" => site.deployment.to_string(),
"block" => batch.block_ptr.number,
);
deployment::update_non_fatal_errors(
conn,
&site.deployment,
Expand Down Expand Up @@ -1273,6 +1280,7 @@ impl DeploymentStore {
firehose_cursor: &FirehoseCursor,
truncate: bool,
) -> Result<StoreEvent, StoreError> {
let logger = self.logger.cheap_clone();
let event = deployment::with_lock(conn, &site, |conn| {
conn.transaction(|conn| -> Result<_, StoreError> {
// The revert functions want the number of the first block that we need to get rid of
Expand Down Expand Up @@ -1303,7 +1311,7 @@ impl DeploymentStore {
// importantly creation of dynamic data sources. We ensure in the
// rest of the code that we only record history for those meta data
// changes that might need to be reverted
Layout::revert_metadata(conn, &site, block)?;
Layout::revert_metadata(&logger, conn, &site, block)?;

Ok(event)
})
Expand Down
3 changes: 2 additions & 1 deletion store/postgres/src/relational.rs
Original file line number Diff line number Diff line change
Expand Up @@ -930,12 +930,13 @@ impl Layout {
/// For metadata, reversion always means deletion since the metadata that
/// is subject to reversion is only ever created but never updated
pub fn revert_metadata(
logger: &Logger,
conn: &mut PgConnection,
site: &Site,
block: BlockNumber,
) -> Result<(), StoreError> {
crate::dynds::revert(conn, site, block)?;
crate::deployment::revert_subgraph_errors(conn, &site.deployment, block)?;
crate::deployment::revert_subgraph_errors(logger, conn, &site.deployment, block)?;

Ok(())
}
Expand Down

0 comments on commit 1642eaf

Please sign in to comment.