diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index 912dd985f66c06..e75bafdac96284 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -43,16 +43,16 @@ jobs: extra_pip_requirements: "apache-airflow~=2.4.3" extra_pip_extras: plugin-v2,test-airflow24 - python-version: "3.10" - extra_pip_requirements: 'apache-airflow~=2.6.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.10.txt' + extra_pip_requirements: "apache-airflow~=2.6.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.10.txt" extra_pip_extras: plugin-v2 - python-version: "3.10" - extra_pip_requirements: 'apache-airflow~=2.7.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt' + extra_pip_requirements: "apache-airflow~=2.7.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt" extra_pip_extras: plugin-v2 - python-version: "3.10" - extra_pip_requirements: 'apache-airflow~=2.8.1 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.8.1/constraints-3.10.txt' + extra_pip_requirements: "apache-airflow~=2.8.1 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.8.1/constraints-3.10.txt" extra_pip_extras: plugin-v2 - python-version: "3.11" - extra_pip_requirements: 'apache-airflow~=2.9.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.11.txt' + extra_pip_requirements: "apache-airflow~=2.9.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.11.txt" extra_pip_extras: plugin-v2 fail-fast: false steps: @@ -73,7 +73,7 @@ jobs: run: ./gradlew -Pextra_pip_requirements='${{ matrix.extra_pip_requirements }}' -Pextra_pip_extras='${{ matrix.extra_pip_extras }}' :metadata-ingestion-modules:airflow-plugin:build - name: pip freeze show list installed if: always() - run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && pip freeze + run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && uv pip freeze - uses: actions/upload-artifact@v3 if: ${{ always() && matrix.python-version == '3.10' && matrix.extra_pip_requirements == 'apache-airflow>=2.7.0' }} with: diff --git a/.github/workflows/dagster-plugin.yml b/.github/workflows/dagster-plugin.yml index f0b9038b610d22..37b6c93ec841ab 100644 --- a/.github/workflows/dagster-plugin.yml +++ b/.github/workflows/dagster-plugin.yml @@ -55,7 +55,7 @@ jobs: run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:dagster-plugin:lint :metadata-ingestion-modules:dagster-plugin:testQuick - name: pip freeze show list installed if: always() - run: source metadata-ingestion-modules/dagster-plugin/venv/bin/activate && pip freeze + run: source metadata-ingestion-modules/dagster-plugin/venv/bin/activate && uv pip freeze - uses: actions/upload-artifact@v3 if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }} with: diff --git a/.github/workflows/gx-plugin.yml b/.github/workflows/gx-plugin.yml index 06a10883e70f54..99121f81099f2d 100644 --- a/.github/workflows/gx-plugin.yml +++ b/.github/workflows/gx-plugin.yml @@ -57,7 +57,7 @@ jobs: run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:gx-plugin:lint :metadata-ingestion-modules:gx-plugin:testQuick - name: pip freeze show list installed if: always() - run: source metadata-ingestion-modules/gx-plugin/venv/bin/activate && pip freeze + run: source metadata-ingestion-modules/gx-plugin/venv/bin/activate && uv pip freeze - uses: actions/upload-artifact@v3 if: ${{ always() && matrix.python-version == '3.11' && matrix.extraPythonRequirement == 'great-expectations~=0.17.0' }} with: diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index cfb3693d89381f..c718febca398a9 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -83,7 +83,7 @@ jobs: - name: Debug info if: always() run: | - source metadata-ingestion/venv/bin/activate && pip freeze + source metadata-ingestion/venv/bin/activate && uv pip freeze set -x df -hl docker image ls diff --git a/.github/workflows/prefect-plugin.yml b/.github/workflows/prefect-plugin.yml index 09af0ad3f354a3..b0af00f92b7727 100644 --- a/.github/workflows/prefect-plugin.yml +++ b/.github/workflows/prefect-plugin.yml @@ -43,7 +43,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/gradle-build-action@v2 + - uses: gradle/actions/setup-gradle@v3 - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: @@ -55,7 +55,7 @@ jobs: run: ./gradlew :metadata-ingestion-modules:prefect-plugin:lint :metadata-ingestion-modules:prefect-plugin:testQuick - name: pip freeze show list installed if: always() - run: source metadata-ingestion-modules/prefect-plugin/venv/bin/activate && pip freeze + run: source metadata-ingestion-modules/prefect-plugin/venv/bin/activate && uv pip freeze - uses: actions/upload-artifact@v3 if: ${{ always() && matrix.python-version == '3.10'}} with: diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java index 32dfba00d47dbf..d0d17fda263926 100644 --- a/datahub-frontend/app/auth/AuthModule.java +++ b/datahub-frontend/app/auth/AuthModule.java @@ -25,7 +25,7 @@ import java.util.Collections; import io.datahubproject.metadata.context.ActorContext; -import io.datahubproject.metadata.context.AuthorizerContext; +import io.datahubproject.metadata.context.AuthorizationContext; import io.datahubproject.metadata.context.EntityRegistryContext; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.OperationContextConfig; @@ -183,10 +183,10 @@ protected OperationContext provideOperationContext( return OperationContext.builder() .operationContextConfig(systemConfig) .systemActorContext(systemActorContext) + // Authorizer.EMPTY is fine since it doesn't actually apply to system auth + .authorizationContext(AuthorizationContext.builder().authorizer(Authorizer.EMPTY).build()) .searchContext(SearchContext.EMPTY) .entityRegistryContext(EntityRegistryContext.builder().build(EmptyEntityRegistry.EMPTY)) - // Authorizer.EMPTY doesn't actually apply to system auth - .authorizerContext(AuthorizerContext.builder().authorizer(Authorizer.EMPTY).build()) .build(systemAuthentication); } diff --git a/datahub-frontend/app/controllers/Application.java b/datahub-frontend/app/controllers/Application.java index d17e600aadc072..017847367de053 100644 --- a/datahub-frontend/app/controllers/Application.java +++ b/datahub-frontend/app/controllers/Application.java @@ -9,12 +9,15 @@ import akka.util.ByteString; import auth.Authenticator; import com.datahub.authentication.AuthenticationConstants; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; import com.linkedin.util.Pair; import com.typesafe.config.Config; import java.io.InputStream; import java.net.URI; import java.time.Duration; +import java.time.Instant; import java.util.List; import java.util.Map; import java.util.Optional; @@ -33,6 +36,7 @@ import play.libs.ws.StandaloneWSClient; import play.libs.ws.ahc.StandaloneAhcWSClient; import play.mvc.Controller; +import play.mvc.Http.Cookie; import play.mvc.Http; import play.mvc.ResponseHeader; import play.mvc.Result; @@ -132,6 +136,9 @@ public CompletableFuture proxy(String path, Http.Request request) headers.put(Http.HeaderNames.X_FORWARDED_PROTO, List.of(schema)); } + // Get the current time to measure the duration of the request + Instant start = Instant.now(); + return _ws.url( String.format( "%s://%s:%s%s", protocol, metadataServiceHost, metadataServicePort, resolvedUri)) @@ -160,6 +167,15 @@ AuthenticationConstants.LEGACY_X_DATAHUB_ACTOR_HEADER, getDataHubActorHeader(req .execute() .thenApply( apiResponse -> { + // Log the query if it takes longer than the configured threshold and verbose logging is enabled + boolean verboseGraphQLLogging = _config.getBoolean("graphql.verbose.logging"); + int verboseGraphQLLongQueryMillis = _config.getInt("graphql.verbose.slowQueryMillis"); + Instant finish = Instant.now(); + long timeElapsed = Duration.between(start, finish).toMillis(); + if (verboseGraphQLLogging && timeElapsed >= verboseGraphQLLongQueryMillis) { + logSlowQuery(request, resolvedUri, timeElapsed); + } + final ResponseHeader header = new ResponseHeader( apiResponse.getStatus(), @@ -359,4 +375,34 @@ private String mapPath(@Nonnull final String path) { // Otherwise, return original path return path; } + + + /** + * Called if verbose logging is enabled and request takes longer that the slow query milliseconds defined in the config + * @param request GraphQL request that was made + * @param resolvedUri URI that was requested + * @param duration How long the query took to complete + */ + private void logSlowQuery(Http.Request request, String resolvedUri, float duration) { + StringBuilder jsonBody = new StringBuilder(); + Optional actorCookie = request.getCookie("actor"); + String actorValue = actorCookie.isPresent() ? actorCookie.get().value() : "N/A"; + + try { + ObjectMapper mapper = new ObjectMapper(); + JsonNode jsonNode = request.body().asJson(); + ((ObjectNode) jsonNode).remove("query"); + jsonBody.append(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(jsonNode)); + } + catch (Exception e) { + _logger.info("GraphQL Request Received: {}, Unable to parse JSON body", resolvedUri); + } + String jsonBodyStr = jsonBody.toString(); + _logger.info("Slow GraphQL Request Received: {}, Request query string: {}, Request actor: {}, Request JSON: {}, Request completed in {} ms", + resolvedUri, + request.queryString(), + actorValue, + jsonBodyStr, + duration); + } } diff --git a/datahub-frontend/conf/application.conf b/datahub-frontend/conf/application.conf index 63ff2c9166fbc9..be57a33b13564d 100644 --- a/datahub-frontend/conf/application.conf +++ b/datahub-frontend/conf/application.conf @@ -298,4 +298,10 @@ entityClient.numRetries = ${?ENTITY_CLIENT_NUM_RETRIES} entityClient.restli.get.batchSize = 50 entityClient.restli.get.batchSize = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE} entityClient.restli.get.batchConcurrency = 2 -entityClient.restli.get.batchConcurrency = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_CONCURRENCY} \ No newline at end of file +entityClient.restli.get.batchConcurrency = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_CONCURRENCY} + +# Enable verbose authentication logging +graphql.verbose.logging = false +graphql.verbose.logging = ${?GRAPHQL_VERBOSE_LOGGING} +graphql.verbose.slowQueryMillis = 2500 +graphql.verbose.slowQueryMillis = ${?GRAPHQL_VERBOSE_LONG_QUERY_MILLIS} \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java index 4fb49d79a0aa70..ca60acaa805387 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java @@ -11,7 +11,6 @@ import com.datahub.authorization.ConjunctivePrivilegeGroup; import com.datahub.authorization.DisjunctivePrivilegeGroup; import com.datahub.authorization.EntitySpec; -import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; @@ -21,7 +20,6 @@ import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.List; -import java.util.Set; import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.reflect.ConstructorUtils; @@ -40,29 +38,25 @@ public class AuthorizationUtils { public static boolean canManageUsersAndGroups(@Nonnull QueryContext context) { return AuthUtil.isAuthorizedEntityType( - context.getActorUrn(), - context.getAuthorizer(), + context.getOperationContext(), MANAGE, List.of(CORP_USER_ENTITY_NAME, CORP_GROUP_ENTITY_NAME)); } public static boolean canManagePolicies(@Nonnull QueryContext context) { return AuthUtil.isAuthorizedEntityType( - context.getActorUrn(), context.getAuthorizer(), MANAGE, List.of(POLICY_ENTITY_NAME)); + context.getOperationContext(), MANAGE, List.of(POLICY_ENTITY_NAME)); } public static boolean canGeneratePersonalAccessToken(@Nonnull QueryContext context) { return AuthUtil.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.GENERATE_PERSONAL_ACCESS_TOKENS_PRIVILEGE) - || AuthUtil.isAuthorized( - context.getAuthorizer(), context.getActorUrn(), MANAGE_ACCESS_TOKENS); + context.getOperationContext(), PoliciesConfig.GENERATE_PERSONAL_ACCESS_TOKENS_PRIVILEGE) + || AuthUtil.isAuthorized(context.getOperationContext(), MANAGE_ACCESS_TOKENS); } public static boolean canManageTokens(@Nonnull QueryContext context) { return AuthUtil.isAuthorizedEntityType( - context.getActorUrn(), context.getAuthorizer(), MANAGE, List.of(ACCESS_TOKEN_ENTITY_NAME)); + context.getOperationContext(), MANAGE, List.of(ACCESS_TOKEN_ENTITY_NAME)); } /** @@ -78,13 +72,12 @@ public static boolean canCreateDomains(@Nonnull QueryContext context) { new ConjunctivePrivilegeGroup( ImmutableList.of(PoliciesConfig.MANAGE_DOMAINS_PRIVILEGE.getType())))); - return AuthUtil.isAuthorized( - context.getAuthorizer(), context.getActorUrn(), orPrivilegeGroups, null); + return AuthUtil.isAuthorized(context.getOperationContext(), orPrivilegeGroups, null); } public static boolean canManageDomains(@Nonnull QueryContext context) { return AuthUtil.isAuthorized( - context.getAuthorizer(), context.getActorUrn(), PoliciesConfig.MANAGE_DOMAINS_PRIVILEGE); + context.getOperationContext(), PoliciesConfig.MANAGE_DOMAINS_PRIVILEGE); } /** @@ -100,25 +93,22 @@ public static boolean canCreateTags(@Nonnull QueryContext context) { new ConjunctivePrivilegeGroup( ImmutableList.of(PoliciesConfig.MANAGE_TAGS_PRIVILEGE.getType())))); - return AuthUtil.isAuthorized( - context.getAuthorizer(), context.getActorUrn(), orPrivilegeGroups, null); + return AuthUtil.isAuthorized(context.getOperationContext(), orPrivilegeGroups, null); } public static boolean canManageTags(@Nonnull QueryContext context) { return AuthUtil.isAuthorized( - context.getAuthorizer(), context.getActorUrn(), PoliciesConfig.MANAGE_TAGS_PRIVILEGE); + context.getOperationContext(), PoliciesConfig.MANAGE_TAGS_PRIVILEGE); } public static boolean canDeleteEntity(@Nonnull Urn entityUrn, @Nonnull QueryContext context) { return AuthUtil.isAuthorizedEntityUrns( - context.getAuthorizer(), context.getActorUrn(), DELETE, List.of(entityUrn)); + context.getOperationContext(), DELETE, List.of(entityUrn)); } public static boolean canManageUserCredentials(@Nonnull QueryContext context) { return AuthUtil.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.MANAGE_USER_CREDENTIALS_PRIVILEGE); + context.getOperationContext(), PoliciesConfig.MANAGE_USER_CREDENTIALS_PRIVILEGE); } public static boolean canEditGroupMembers( @@ -130,12 +120,7 @@ public static boolean canEditGroupMembers( new ConjunctivePrivilegeGroup( ImmutableList.of(PoliciesConfig.EDIT_GROUP_MEMBERS_PRIVILEGE.getType())))); - return isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - CORP_GROUP_ENTITY_NAME, - groupUrnStr, - orPrivilegeGroups); + return isAuthorized(context, CORP_GROUP_ENTITY_NAME, groupUrnStr, orPrivilegeGroups); } public static boolean canCreateGlobalAnnouncements(@Nonnull QueryContext context) { @@ -149,27 +134,21 @@ public static boolean canCreateGlobalAnnouncements(@Nonnull QueryContext context ImmutableList.of( PoliciesConfig.MANAGE_GLOBAL_ANNOUNCEMENTS_PRIVILEGE.getType())))); - return AuthUtil.isAuthorized( - context.getAuthorizer(), context.getActorUrn(), orPrivilegeGroups, null); + return AuthUtil.isAuthorized(context.getOperationContext(), orPrivilegeGroups, null); } public static boolean canManageGlobalAnnouncements(@Nonnull QueryContext context) { return AuthUtil.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.MANAGE_GLOBAL_ANNOUNCEMENTS_PRIVILEGE); + context.getOperationContext(), PoliciesConfig.MANAGE_GLOBAL_ANNOUNCEMENTS_PRIVILEGE); } public static boolean canManageGlobalViews(@Nonnull QueryContext context) { - return AuthUtil.isAuthorized( - context.getAuthorizer(), context.getActorUrn(), PoliciesConfig.MANAGE_GLOBAL_VIEWS); + return AuthUtil.isAuthorized(context.getOperationContext(), PoliciesConfig.MANAGE_GLOBAL_VIEWS); } public static boolean canManageOwnershipTypes(@Nonnull QueryContext context) { return AuthUtil.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.MANAGE_GLOBAL_OWNERSHIP_TYPES); + context.getOperationContext(), PoliciesConfig.MANAGE_GLOBAL_OWNERSHIP_TYPES); } public static boolean canEditProperties(@Nonnull Urn targetUrn, @Nonnull QueryContext context) { @@ -183,11 +162,7 @@ public static boolean canEditProperties(@Nonnull Urn targetUrn, @Nonnull QueryCo ImmutableList.of(PoliciesConfig.EDIT_ENTITY_PROPERTIES_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - targetUrn.getEntityType(), - targetUrn.toString(), - orPrivilegeGroups); + context, targetUrn.getEntityType(), targetUrn.toString(), orPrivilegeGroups); } public static boolean canEditEntityQueries( @@ -202,11 +177,7 @@ public static boolean canEditEntityQueries( .allMatch( entityUrn -> isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - entityUrn.getEntityType(), - entityUrn.toString(), - orPrivilegeGroups)); + context, entityUrn.getEntityType(), entityUrn.toString(), orPrivilegeGroups)); } public static boolean canCreateQuery( @@ -251,28 +222,7 @@ public static boolean canView(@Nonnull OperationContext opContext, @Nonnull Urn && !opContext.isSystemAuth() && VIEW_RESTRICTED_ENTITY_TYPES.contains(urn.getEntityType())) { - return opContext - .getViewAuthorizationContext() - .map( - viewAuthContext -> { - - // check cache - if (viewAuthContext.canView(Set.of(urn))) { - return true; - } - - if (!canViewEntity( - opContext.getSessionAuthentication().getActor().toUrnStr(), - opContext.getAuthorizerContext().getAuthorizer(), - urn)) { - return false; - } - - // cache viewable urn - viewAuthContext.addViewableUrns(Set.of(urn)); - return true; - }) - .orElse(false); + return canViewEntity(opContext, urn); } return true; } @@ -386,38 +336,32 @@ public static T restrictEntity(@Nonnull Object entity, Class clazz) { public static boolean canManageStructuredProperties(@Nonnull QueryContext context) { return AuthUtil.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.MANAGE_STRUCTURED_PROPERTIES_PRIVILEGE); + context.getOperationContext(), PoliciesConfig.MANAGE_STRUCTURED_PROPERTIES_PRIVILEGE); } public static boolean canManageForms(@Nonnull QueryContext context) { return AuthUtil.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.MANAGE_DOCUMENTATION_FORMS_PRIVILEGE); + context.getOperationContext(), PoliciesConfig.MANAGE_DOCUMENTATION_FORMS_PRIVILEGE); } public static boolean canManageFeatures(@Nonnull QueryContext context) { return AuthUtil.isAuthorized( - context.getAuthorizer(), context.getActorUrn(), PoliciesConfig.MANAGE_FEATURES_PRIVILEGE); + context.getOperationContext(), PoliciesConfig.MANAGE_FEATURES_PRIVILEGE); } public static boolean isAuthorized( - @Nonnull Authorizer authorizer, - @Nonnull String actor, + @Nonnull QueryContext context, @Nonnull String resourceType, @Nonnull String resource, @Nonnull DisjunctivePrivilegeGroup privilegeGroup) { final EntitySpec resourceSpec = new EntitySpec(resourceType, resource); - return AuthUtil.isAuthorized(authorizer, actor, privilegeGroup, resourceSpec); + return AuthUtil.isAuthorized(context.getOperationContext(), privilegeGroup, resourceSpec); } public static boolean isViewDatasetUsageAuthorized( final QueryContext context, final Urn resourceUrn) { return AuthUtil.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), + context.getOperationContext(), PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE, new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString())); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java index 1f4ebbb88bf1a6..b1101ae3ee8657 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/MeResolver.java @@ -108,20 +108,19 @@ public CompletableFuture get(DataFetchingEnvironment environm /** Returns true if the authenticated user has privileges to view analytics. */ private boolean canViewAnalytics(final QueryContext context) { - return isAuthorized(context.getActorUrn(), context.getAuthorizer(), ANALYTICS, READ); + return isAuthorized(context.getOperationContext(), ANALYTICS, READ); } /** Returns true if the authenticated user has privileges to manage policies analytics. */ private boolean canManagePolicies(final QueryContext context) { return isAuthorizedEntityType( - context.getActorUrn(), context.getAuthorizer(), MANAGE, List.of(POLICY_ENTITY_NAME)); + context.getOperationContext(), MANAGE, List.of(POLICY_ENTITY_NAME)); } /** Returns true if the authenticated user has privileges to manage users & groups. */ private boolean canManageUsersGroups(final QueryContext context) { return isAuthorizedEntityType( - context.getActorUrn(), - context.getAuthorizer(), + context.getOperationContext(), MANAGE, List.of(CORP_USER_ENTITY_NAME, CORP_GROUP_ENTITY_NAME)); } @@ -129,46 +128,37 @@ private boolean canManageUsersGroups(final QueryContext context) { /** Returns true if the authenticated user has privileges to generate personal access tokens */ private boolean canGeneratePersonalAccessToken(final QueryContext context) { return isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.GENERATE_PERSONAL_ACCESS_TOKENS_PRIVILEGE); + context.getOperationContext(), PoliciesConfig.GENERATE_PERSONAL_ACCESS_TOKENS_PRIVILEGE); } /** Returns true if the authenticated user has privileges to view tests. */ private boolean canViewTests(final QueryContext context) { - return isAuthorized( - context.getAuthorizer(), context.getActorUrn(), PoliciesConfig.VIEW_TESTS_PRIVILEGE); + return isAuthorized(context.getOperationContext(), PoliciesConfig.VIEW_TESTS_PRIVILEGE); } /** Returns true if the authenticated user has privileges to manage (add or remove) tests. */ private boolean canManageTests(final QueryContext context) { - return isAuthorized( - context.getAuthorizer(), context.getActorUrn(), PoliciesConfig.MANAGE_TESTS_PRIVILEGE); + return isAuthorized(context.getOperationContext(), PoliciesConfig.MANAGE_TESTS_PRIVILEGE); } /** Returns true if the authenticated user has privileges to manage domains */ private boolean canManageDomains(final QueryContext context) { - return isAuthorized( - context.getAuthorizer(), context.getActorUrn(), PoliciesConfig.MANAGE_DOMAINS_PRIVILEGE); + return isAuthorized(context.getOperationContext(), PoliciesConfig.MANAGE_DOMAINS_PRIVILEGE); } /** Returns true if the authenticated user has privileges to manage access tokens */ private boolean canManageTokens(final QueryContext context) { - return isAuthorized( - context.getAuthorizer(), context.getActorUrn(), PoliciesConfig.MANAGE_ACCESS_TOKENS); + return isAuthorized(context.getOperationContext(), PoliciesConfig.MANAGE_ACCESS_TOKENS); } /** Returns true if the authenticated user has privileges to manage glossaries */ private boolean canManageGlossaries(final QueryContext context) { - return isAuthorized( - context.getAuthorizer(), context.getActorUrn(), PoliciesConfig.MANAGE_GLOSSARIES_PRIVILEGE); + return isAuthorized(context.getOperationContext(), PoliciesConfig.MANAGE_GLOSSARIES_PRIVILEGE); } /** Returns true if the authenticated user has privileges to manage user credentials */ private boolean canManageUserCredentials(@Nonnull QueryContext context) { return isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.MANAGE_USER_CREDENTIALS_PRIVILEGE); + context.getOperationContext(), PoliciesConfig.MANAGE_USER_CREDENTIALS_PRIVILEGE); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionUtils.java index 757ff38de60065..a632ab5487000e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionUtils.java @@ -18,10 +18,6 @@ public static boolean isAuthorizedToEditAssertionFromAssertee( new ConjunctivePrivilegeGroup( ImmutableList.of(PoliciesConfig.EDIT_ENTITY_ASSERTIONS_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - asserteeUrn.getEntityType(), - asserteeUrn.toString(), - orPrivilegeGroups); + context, asserteeUrn.getEntityType(), asserteeUrn.toString(), orPrivilegeGroups); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolver.java index 1cf233221d4d33..d36611da0dc4db 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/DeleteAssertionResolver.java @@ -113,11 +113,7 @@ private boolean isAuthorizedToDeleteAssertionFromAssertee( new ConjunctivePrivilegeGroup( ImmutableList.of(PoliciesConfig.EDIT_ENTITY_ASSERTIONS_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - asserteeUrn.getEntityType(), - asserteeUrn.toString(), - orPrivilegeGroups); + context, asserteeUrn.getEntityType(), asserteeUrn.toString(), orPrivilegeGroups); } private Urn getAsserteeUrnFromInfo(final AssertionInfo info) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/BusinessAttributeAuthorizationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/BusinessAttributeAuthorizationUtils.java index 041f5e9ade77f0..364a5c982b0e03 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/BusinessAttributeAuthorizationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/businessattribute/BusinessAttributeAuthorizationUtils.java @@ -20,8 +20,7 @@ public static boolean canCreateBusinessAttribute(@Nonnull QueryContext context) new ConjunctivePrivilegeGroup( ImmutableList.of( PoliciesConfig.MANAGE_BUSINESS_ATTRIBUTE_PRIVILEGE.getType())))); - return AuthUtil.isAuthorized( - context.getAuthorizer(), context.getActorUrn(), orPrivilegeGroups, null); + return AuthUtil.isAuthorized(context.getOperationContext(), orPrivilegeGroups, null); } public static boolean canManageBusinessAttribute(@Nonnull QueryContext context) { @@ -31,7 +30,6 @@ public static boolean canManageBusinessAttribute(@Nonnull QueryContext context) new ConjunctivePrivilegeGroup( ImmutableList.of( PoliciesConfig.MANAGE_BUSINESS_ATTRIBUTE_PRIVILEGE.getType())))); - return AuthUtil.isAuthorized( - context.getAuthorizer(), context.getActorUrn(), orPrivilegeGroups, null); + return AuthUtil.isAuthorized(context.getOperationContext(), orPrivilegeGroups, null); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/connection/ConnectionUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/connection/ConnectionUtils.java index bcdd6460ae75ed..30c2fb672a0c30 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/connection/ConnectionUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/connection/ConnectionUtils.java @@ -14,9 +14,7 @@ public class ConnectionUtils { */ public static boolean canManageConnections(@Nonnull QueryContext context) { return AuthUtil.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.MANAGE_CONNECTIONS_PRIVILEGE); + context.getOperationContext(), PoliciesConfig.MANAGE_CONNECTIONS_PRIVILEGE); } private ConnectionUtils() {} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/DataContractUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/DataContractUtils.java index 3dd7cd9df63838..a04024a29dc97b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/DataContractUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/DataContractUtils.java @@ -21,11 +21,7 @@ public static boolean canEditDataContract(@Nonnull QueryContext context, Urn ent PoliciesConfig.EDIT_ENTITY_DATA_CONTRACT_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - entityUrn.getEntityType(), - entityUrn.toString(), - orPrivilegeGroups); + context, entityUrn.getEntityType(), entityUrn.toString(), orPrivilegeGroups); } private DataContractUtils() {} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/DataProductAuthorizationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/DataProductAuthorizationUtils.java index f6fe11a587a39b..a8357fc0a1a3ff 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/DataProductAuthorizationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/DataProductAuthorizationUtils.java @@ -30,11 +30,7 @@ public static boolean isAuthorizedToUpdateDataProductsForEntity( PoliciesConfig.EDIT_ENTITY_DATA_PRODUCTS_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - entityUrn.getEntityType(), - entityUrn.toString(), - orPrivilegeGroups); + context, entityUrn.getEntityType(), entityUrn.toString(), orPrivilegeGroups); } public static boolean isAuthorizedToManageDataProducts( @@ -47,11 +43,7 @@ public static boolean isAuthorizedToManageDataProducts( ImmutableList.of(PoliciesConfig.MANAGE_DATA_PRODUCTS_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - domainUrn.getEntityType(), - domainUrn.toString(), - orPrivilegeGroups); + context, domainUrn.getEntityType(), domainUrn.toString(), orPrivilegeGroups); } public static boolean isAuthorizedToEditDataProduct( @@ -60,10 +52,6 @@ public static boolean isAuthorizedToEditDataProduct( new DisjunctivePrivilegeGroup(ImmutableList.of(ALL_PRIVILEGES_GROUP)); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - dataProductUrn.getEntityType(), - dataProductUrn.toString(), - orPrivilegeGroups); + context, dataProductUrn.getEntityType(), dataProductUrn.toString(), orPrivilegeGroups); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java index 7d3603ec050e94..bab7ecaf302f54 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolver.java @@ -49,7 +49,7 @@ public CompletableFuture get(DataFetchingEnvironment enviro log.debug( "User {} is not authorized to view profile information for dataset {}", context.getActorUrn(), - resourceUrn.toString()); + resourceUrn); return null; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolver.java index c568ff6db3a27d..a2230cf6b6e886 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/deprecation/UpdateDeprecationResolver.java @@ -100,11 +100,7 @@ private boolean isAuthorizedToUpdateDeprecationForEntity( ImmutableList.of(PoliciesConfig.EDIT_ENTITY_DEPRECATION_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - entityUrn.getEntityType(), - entityUrn.toString(), - orPrivilegeGroups); + context, entityUrn.getEntityType(), entityUrn.toString(), orPrivilegeGroups); } public static Boolean validateUpdateDeprecationInput( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityPrivilegesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityPrivilegesResolver.java index b25f5598b44bc0..67ab9bb2878141 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityPrivilegesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/EntityPrivilegesResolver.java @@ -106,8 +106,7 @@ private EntityPrivileges getGlossaryNodePrivileges(Urn nodeUrn, QueryContext con } private boolean canEditEntityLineage(Urn urn, QueryContext context) { - return AuthUtil.isAuthorizedUrns( - context.getAuthorizer(), context.getActorUrn(), LINEAGE, UPDATE, List.of(urn)); + return AuthUtil.isAuthorizedUrns(context.getOperationContext(), LINEAGE, UPDATE, List.of(urn)); } private EntityPrivileges getDatasetPrivileges(Urn urn, QueryContext context) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/RaiseIncidentResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/RaiseIncidentResolver.java index 454ba693da95a7..68aef26bf4aa17 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/RaiseIncidentResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/RaiseIncidentResolver.java @@ -123,10 +123,6 @@ private boolean isAuthorizedToCreateIncidentForResource( ImmutableList.of(PoliciesConfig.EDIT_ENTITY_INCIDENTS_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - resourceUrn.getEntityType(), - resourceUrn.toString(), - orPrivilegeGroups); + context, resourceUrn.getEntityType(), resourceUrn.toString(), orPrivilegeGroups); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/UpdateIncidentStatusResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/UpdateIncidentStatusResolver.java index d51ceab31e60ec..dee92247ba311a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/UpdateIncidentStatusResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/incident/UpdateIncidentStatusResolver.java @@ -103,10 +103,6 @@ private boolean isAuthorizedToUpdateIncident(final Urn resourceUrn, final QueryC new ConjunctivePrivilegeGroup( ImmutableList.of(PoliciesConfig.EDIT_ENTITY_INCIDENTS_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - resourceUrn.getEntityType(), - resourceUrn.toString(), - orPrivilegeGroups); + context, resourceUrn.getEntityType(), resourceUrn.toString(), orPrivilegeGroups); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionAuthUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionAuthUtils.java index 24d0e946145054..be8d4fa7b8c68d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionAuthUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionAuthUtils.java @@ -14,15 +14,12 @@ public class IngestionAuthUtils { public static boolean canManageIngestion(@Nonnull QueryContext context) { return AuthUtil.isAuthorizedEntityType( - context.getActorUrn(), - context.getAuthorizer(), - MANAGE, - List.of(INGESTION_SOURCE_ENTITY_NAME)); + context.getOperationContext(), MANAGE, List.of(INGESTION_SOURCE_ENTITY_NAME)); } public static boolean canManageSecrets(@Nonnull QueryContext context) { return isAuthorizedEntityType( - context.getActorUrn(), context.getAuthorizer(), MANAGE, List.of(SECRETS_ENTITY_NAME)); + context.getOperationContext(), MANAGE, List.of(SECRETS_ENTITY_NAME)); } private IngestionAuthUtils() {} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java index 09039e530631d0..d7c76c0235dcc0 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java @@ -4,9 +4,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; -import com.linkedin.datahub.graphql.generated.DataProcessInstance; -import com.linkedin.datahub.graphql.generated.DataProcessInstanceResult; -import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.*; import com.linkedin.datahub.graphql.types.dataprocessinst.mappers.DataProcessInstanceMapper; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.client.EntityClient; @@ -33,6 +31,8 @@ import java.util.Objects; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** GraphQL Resolver used for fetching a list of Task Runs associated with a Data Job */ public class DataJobRunsResolver @@ -40,6 +40,8 @@ public class DataJobRunsResolver private static final String PARENT_TEMPLATE_URN_SEARCH_INDEX_FIELD_NAME = "parentTemplate"; private static final String CREATED_TIME_SEARCH_INDEX_FIELD_NAME = "created"; + private static final String HAS_RUN_EVENTS_FIELD_NAME = "hasRunEvents"; + private static final Logger log = LoggerFactory.getLogger(DataJobRunsResolver.class); private final EntityClient _entityClient; @@ -117,7 +119,12 @@ private Filter buildTaskRunsEntityFilter(final String entityUrn) { new Criterion() .setField(PARENT_TEMPLATE_URN_SEARCH_INDEX_FIELD_NAME) .setCondition(Condition.EQUAL) - .setValue(entityUrn))); + .setValue(entityUrn), + new Criterion() + .setField(HAS_RUN_EVENTS_FIELD_NAME) + .setCondition(Condition.EQUAL) + .setValue(Boolean.TRUE.toString()))); + final Filter filter = new Filter(); filter.setOr( new ConjunctiveCriterionArray(ImmutableList.of(new ConjunctiveCriterion().setAnd(array)))); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/lineage/UpdateLineageResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/lineage/UpdateLineageResolver.java index d462fb0820aa03..928e33d44c84e7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/lineage/UpdateLineageResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/lineage/UpdateLineageResolver.java @@ -211,11 +211,7 @@ private boolean isAuthorized( @Nonnull final Urn urn, @Nonnull final DisjunctivePrivilegeGroup orPrivilegesGroup) { return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - urn.getEntityType(), - urn.toString(), - orPrivilegesGroup); + context, urn.getEntityType(), urn.toString(), orPrivilegesGroup); } private void checkLineageEdgePrivileges( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java index 8fc26e3cec9d06..4d4b898618bf9c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java @@ -78,8 +78,7 @@ private boolean isAuthorized(QueryContext context, String urn) { if (_entityName.equals(Constants.DATASET_ENTITY_NAME) && _aspectName.equals(Constants.DATASET_PROFILE_ASPECT_NAME)) { return AuthUtil.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), + context.getOperationContext(), PoliciesConfig.VIEW_DATASET_PROFILE_PRIVILEGE, new EntitySpec(_entityName, urn)); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/DescriptionUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/DescriptionUtils.java index 6e2fc77e703af3..917f1b1c1d574d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/DescriptionUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/DescriptionUtils.java @@ -336,11 +336,7 @@ public static boolean isAuthorizedToUpdateFieldDescription( PoliciesConfig.EDIT_DATASET_COL_DESCRIPTION_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - targetUrn.getEntityType(), - targetUrn.toString(), - orPrivilegeGroups); + context, targetUrn.getEntityType(), targetUrn.toString(), orPrivilegeGroups); } public static boolean isAuthorizedToUpdateDomainDescription( @@ -353,11 +349,7 @@ public static boolean isAuthorizedToUpdateDomainDescription( ImmutableList.of(PoliciesConfig.EDIT_ENTITY_DOCS_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - targetUrn.getEntityType(), - targetUrn.toString(), - orPrivilegeGroups); + context, targetUrn.getEntityType(), targetUrn.toString(), orPrivilegeGroups); } public static boolean isAuthorizedToUpdateContainerDescription( @@ -370,11 +362,7 @@ public static boolean isAuthorizedToUpdateContainerDescription( ImmutableList.of(PoliciesConfig.EDIT_ENTITY_DOCS_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - targetUrn.getEntityType(), - targetUrn.toString(), - orPrivilegeGroups); + context, targetUrn.getEntityType(), targetUrn.toString(), orPrivilegeGroups); } public static boolean isAuthorizedToUpdateDescription( @@ -387,11 +375,7 @@ public static boolean isAuthorizedToUpdateDescription( ImmutableList.of(PoliciesConfig.EDIT_ENTITY_DOCS_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - targetUrn.getEntityType(), - targetUrn.toString(), - orPrivilegeGroups); + context, targetUrn.getEntityType(), targetUrn.toString(), orPrivilegeGroups); } public static void updateMlModelDescription( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeleteUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeleteUtils.java index 73e7f9ec1cca7c..1d3a9c229e63e2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeleteUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeleteUtils.java @@ -25,7 +25,7 @@ private DeleteUtils() {} public static boolean isAuthorizedToDeleteEntity(@Nonnull QueryContext context, Urn entityUrn) { return AuthUtil.isAuthorizedEntityUrns( - context.getAuthorizer(), context.getActorUrn(), DELETE, List.of(entityUrn)); + context.getOperationContext(), DELETE, List.of(entityUrn)); } public static void updateStatusForResources( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeprecationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeprecationUtils.java index 3912ffa6226bff..541224b02c1b52 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeprecationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DeprecationUtils.java @@ -42,11 +42,7 @@ public static boolean isAuthorizedToUpdateDeprecationForEntity( ImmutableList.of(PoliciesConfig.EDIT_ENTITY_DEPRECATION_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - entityUrn.getEntityType(), - entityUrn.toString(), - orPrivilegeGroups); + context, entityUrn.getEntityType(), entityUrn.toString(), orPrivilegeGroups); } public static void updateDeprecationForResources( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java index 1114cf7344e8f4..1dcdd988f5e7c1 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java @@ -67,11 +67,7 @@ public static boolean isAuthorizedToUpdateDomainsForEntity( ImmutableList.of(PoliciesConfig.EDIT_ENTITY_DOMAINS_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - entityUrn.getEntityType(), - entityUrn.toString(), - orPrivilegeGroups); + context, entityUrn.getEntityType(), entityUrn.toString(), orPrivilegeGroups); } public static void setDomainForResources( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/EmbedUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/EmbedUtils.java index 15c93904fc3bdd..5ebb434b21c9f4 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/EmbedUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/EmbedUtils.java @@ -28,10 +28,6 @@ public static boolean isAuthorizedToUpdateEmbedForEntity( ImmutableList.of(PoliciesConfig.EDIT_ENTITY_EMBED_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - entityUrn.getEntityType(), - entityUrn.toString(), - orPrivilegeGroups); + context, entityUrn.getEntityType(), entityUrn.toString(), orPrivilegeGroups); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/GlossaryUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/GlossaryUtils.java index 16df9911f3bec3..0d8e505a948e5a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/GlossaryUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/GlossaryUtils.java @@ -33,7 +33,7 @@ private GlossaryUtils() {} */ public static boolean canManageGlossaries(@Nonnull QueryContext context) { return AuthUtil.isAuthorized( - context.getAuthorizer(), context.getActorUrn(), PoliciesConfig.MANAGE_GLOSSARIES_PRIVILEGE); + context.getOperationContext(), PoliciesConfig.MANAGE_GLOSSARIES_PRIVILEGE); } /** @@ -79,11 +79,7 @@ public static boolean hasManagePrivilege( ImmutableList.of(new ConjunctivePrivilegeGroup(ImmutableList.of(privilege.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - parentNodeUrn.getEntityType(), - parentNodeUrn.toString(), - orPrivilegeGroups); + context, parentNodeUrn.getEntityType(), parentNodeUrn.toString(), orPrivilegeGroups); } /** diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java index 3eac819a9cc48d..cffd019307f34a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java @@ -251,11 +251,7 @@ public static boolean isAuthorizedToUpdateTags( : PoliciesConfig.EDIT_ENTITY_TAGS_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - targetUrn.getEntityType(), - targetUrn.toString(), - orPrivilegeGroups); + context, targetUrn.getEntityType(), targetUrn.toString(), orPrivilegeGroups); } public static boolean isAuthorizedToUpdateTerms( @@ -277,11 +273,7 @@ public static boolean isAuthorizedToUpdateTerms( : PoliciesConfig.EDIT_ENTITY_GLOSSARY_TERMS_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - targetUrn.getEntityType(), - targetUrn.toString(), - orPrivilegeGroups); + context, targetUrn.getEntityType(), targetUrn.toString(), orPrivilegeGroups); } public static void validateResourceAndLabel( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LinkUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LinkUtils.java index a2d4692db5b7b7..e6f9d09412119a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LinkUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LinkUtils.java @@ -115,11 +115,7 @@ public static boolean isAuthorizedToUpdateLinks(@Nonnull QueryContext context, U ImmutableList.of(PoliciesConfig.EDIT_ENTITY_DOC_LINKS_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - resourceUrn.getEntityType(), - resourceUrn.toString(), - orPrivilegeGroups); + context, resourceUrn.getEntityType(), resourceUrn.toString(), orPrivilegeGroups); } public static Boolean validateAddRemoveInput( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java index ddb795189c0e3d..2f2b52f7ab5864 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java @@ -205,11 +205,7 @@ public static void validateAuthorizedToUpdateOwners( boolean authorized = AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - resourceUrn.getEntityType(), - resourceUrn.toString(), - orPrivilegeGroups); + context, resourceUrn.getEntityType(), resourceUrn.toString(), orPrivilegeGroups); if (!authorized) { throw new AuthorizationException( "Unauthorized to update owners. Please contact your DataHub administrator."); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/operation/ReportOperationResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/operation/ReportOperationResolver.java index 6ef3222bc068f2..48f231fee5093a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/operation/ReportOperationResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/operation/ReportOperationResolver.java @@ -137,10 +137,6 @@ private boolean isAuthorizedToReportOperationForResource( ImmutableList.of(PoliciesConfig.EDIT_ENTITY_OPERATIONS_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - resourceUrn.getEntityType(), - resourceUrn.toString(), - orPrivilegeGroups); + context, resourceUrn.getEntityType(), resourceUrn.toString(), orPrivilegeGroups); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/PolicyAuthUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/PolicyAuthUtils.java index 7babe63745f727..775a4aaf6e090b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/PolicyAuthUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/PolicyAuthUtils.java @@ -12,7 +12,7 @@ public class PolicyAuthUtils { static boolean canManagePolicies(@Nonnull QueryContext context) { return AuthUtil.isAuthorizedEntityType( - context.getActorUrn(), context.getAuthorizer(), MANAGE, List.of(POLICY_ENTITY_NAME)); + context.getOperationContext(), MANAGE, List.of(POLICY_ENTITY_NAME)); } private PolicyAuthUtils() {} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/SetTagColorResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/SetTagColorResolver.java index 7a059ed9a1aeda..9a8e0fc3647274 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/SetTagColorResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/SetTagColorResolver.java @@ -101,10 +101,6 @@ public static boolean isAuthorizedToSetTagColor(@Nonnull QueryContext context, U ImmutableList.of(PoliciesConfig.EDIT_TAG_COLOR_PRIVILEGE.getType())))); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - entityUrn.getEntityType(), - entityUrn.toString(), - orPrivilegeGroups); + context, entityUrn.getEntityType(), entityUrn.toString(), orPrivilegeGroups); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/TestUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/TestUtils.java index 020064ed643c88..80e5abd245b281 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/TestUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/test/TestUtils.java @@ -22,13 +22,13 @@ public class TestUtils { /** Returns true if the authenticated user is able to view tests. */ public static boolean canViewTests(@Nonnull QueryContext context) { return AuthUtil.isAuthorized( - context.getAuthorizer(), context.getActorUrn(), PoliciesConfig.VIEW_TESTS_PRIVILEGE); + context.getOperationContext(), PoliciesConfig.VIEW_TESTS_PRIVILEGE); } /** Returns true if the authenticated user is able to manage tests. */ public static boolean canManageTests(@Nonnull QueryContext context) { return AuthUtil.isAuthorized( - context.getAuthorizer(), context.getActorUrn(), PoliciesConfig.MANAGE_TESTS_PRIVILEGE); + context.getOperationContext(), PoliciesConfig.MANAGE_TESTS_PRIVILEGE); } public static TestDefinition mapDefinition(final TestDefinitionInput testDefInput) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/ChartType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/ChartType.java index fe9b511f4a7dde..054dcec15af32d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/ChartType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/chart/ChartType.java @@ -242,11 +242,7 @@ private boolean isAuthorized( // Decide whether the current principal should be allowed to update the Dataset. final DisjunctivePrivilegeGroup orPrivilegeGroups = getAuthorizedPrivileges(update); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.CHART_PRIVILEGES.getResourceType(), - urn, - orPrivilegeGroups); + context, PoliciesConfig.CHART_PRIVILEGES.getResourceType(), urn, orPrivilegeGroups); } private DisjunctivePrivilegeGroup getAuthorizedPrivileges(final ChartUpdateInput updateInput) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java index e6b75f9482f59f..9f5025ccf303a2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.types.common.mappers; +import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.SearchFlags; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; @@ -64,6 +65,10 @@ public com.linkedin.metadata.query.SearchFlags apply( .map(c -> GroupingCriterionInputMapper.map(context, c)) .collect(Collectors.toList())))); } + if (searchFlags.getCustomHighlightingFields() != null) { + result.setCustomHighlightingFields( + new StringArray(searchFlags.getCustomHighlightingFields())); + } return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/CorpGroupType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/CorpGroupType.java index 16d2940a392447..27b97bfb2124f7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/CorpGroupType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpgroup/CorpGroupType.java @@ -188,11 +188,7 @@ private boolean isAuthorizedToUpdate( // Decide whether the current principal should be allowed to update the Dataset. final DisjunctivePrivilegeGroup orPrivilegeGroups = getAuthorizedPrivileges(input); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.CORP_GROUP_PRIVILEGES.getResourceType(), - urn, - orPrivilegeGroups); + context, PoliciesConfig.CORP_GROUP_PRIVILEGES.getResourceType(), urn, orPrivilegeGroups); } private DisjunctivePrivilegeGroup getAuthorizedPrivileges( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java index 3c2bfd7225edf5..5a812daa264bbf 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/corpuser/CorpUserType.java @@ -183,8 +183,7 @@ private boolean isAuthorizedToUpdate( // information. return context.getActorUrn().equals(urn) || AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), + context, PoliciesConfig.CORP_GROUP_PRIVILEGES.getResourceType(), urn, orPrivilegeGroups); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/DashboardType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/DashboardType.java index 89a41732109964..6ad362e5905904 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/DashboardType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dashboard/DashboardType.java @@ -241,11 +241,7 @@ private boolean isAuthorized( // Decide whether the current principal should be allowed to update the Dataset. final DisjunctivePrivilegeGroup orPrivilegeGroups = getAuthorizedPrivileges(update); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.DASHBOARD_PRIVILEGES.getResourceType(), - urn, - orPrivilegeGroups); + context, PoliciesConfig.DASHBOARD_PRIVILEGES.getResourceType(), urn, orPrivilegeGroups); } private DisjunctivePrivilegeGroup getAuthorizedPrivileges( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java index f8248aedf289c0..3a697517bdecee 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataflow/DataFlowType.java @@ -228,11 +228,7 @@ private boolean isAuthorized( // Decide whether the current principal should be allowed to update the Dataset. final DisjunctivePrivilegeGroup orPrivilegeGroups = getAuthorizedPrivileges(update); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.DATA_FLOW_PRIVILEGES.getResourceType(), - urn, - orPrivilegeGroups); + context, PoliciesConfig.DATA_FLOW_PRIVILEGES.getResourceType(), urn, orPrivilegeGroups); } private DisjunctivePrivilegeGroup getAuthorizedPrivileges(final DataFlowUpdateInput updateInput) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java index 1e1de615b5911b..b32832a28d5d57 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datajob/DataJobType.java @@ -229,11 +229,7 @@ private boolean isAuthorized( // Decide whether the current principal should be allowed to update the Dataset. final DisjunctivePrivilegeGroup orPrivilegeGroups = getAuthorizedPrivileges(update); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.DATA_JOB_PRIVILEGES.getResourceType(), - urn, - orPrivilegeGroups); + context, PoliciesConfig.DATA_JOB_PRIVILEGES.getResourceType(), urn, orPrivilegeGroups); } private DisjunctivePrivilegeGroup getAuthorizedPrivileges(final DataJobUpdateInput updateInput) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java index 46c810ac00d621..65b5d39e315692 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java @@ -283,11 +283,7 @@ private boolean isAuthorized( // Decide whether the current principal should be allowed to update the Dataset. final DisjunctivePrivilegeGroup orPrivilegeGroups = getAuthorizedPrivileges(update); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.DATASET_PRIVILEGES.getResourceType(), - urn, - orPrivilegeGroups); + context, PoliciesConfig.DATASET_PRIVILEGES.getResourceType(), urn, orPrivilegeGroups); } private DisjunctivePrivilegeGroup getAuthorizedPrivileges(final DatasetUpdateInput updateInput) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/ermodelrelationship/ERModelRelationshipType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/ermodelrelationship/ERModelRelationshipType.java index fd340aca119b59..ed52cc5486e92f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/ermodelrelationship/ERModelRelationshipType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/ermodelrelationship/ERModelRelationshipType.java @@ -211,11 +211,7 @@ public static boolean canUpdateERModelRelation( new DisjunctivePrivilegeGroup( ImmutableList.of(editPrivilegesGroup, specificPrivilegeGroup)); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - resourceUrn.getEntityType(), - resourceUrn.toString(), - orPrivilegeGroups); + context, resourceUrn.getEntityType(), resourceUrn.toString(), orPrivilegeGroups); } public static boolean canCreateERModelRelation( @@ -232,18 +228,10 @@ public static boolean canCreateERModelRelation( new DisjunctivePrivilegeGroup(ImmutableList.of(editPrivilegesGroup, createPrivilegesGroup)); boolean sourcePrivilege = AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - sourceUrn.getEntityType(), - sourceUrn.toString(), - orPrivilegeGroups); + context, sourceUrn.getEntityType(), sourceUrn.toString(), orPrivilegeGroups); boolean destinationPrivilege = AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - destinationUrn.getEntityType(), - destinationUrn.toString(), - orPrivilegeGroups); + context, destinationUrn.getEntityType(), destinationUrn.toString(), orPrivilegeGroups); return sourcePrivilege && destinationPrivilege; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/notebook/NotebookType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/notebook/NotebookType.java index 8eeda9331ad8ff..a6f29c1917397f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/notebook/NotebookType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/notebook/NotebookType.java @@ -226,11 +226,7 @@ private boolean isAuthorized( // Decide whether the current principal should be allowed to update the Dataset. final DisjunctivePrivilegeGroup orPrivilegeGroups = getAuthorizedPrivileges(update); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - PoliciesConfig.NOTEBOOK_PRIVILEGES.getResourceType(), - urn, - orPrivilegeGroups); + context, PoliciesConfig.NOTEBOOK_PRIVILEGES.getResourceType(), urn, orPrivilegeGroups); } private DisjunctivePrivilegeGroup getAuthorizedPrivileges(final NotebookUpdateInput updateInput) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/TagType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/TagType.java index 3c07b242e9d813..9a5d0f1f41b8cc 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/TagType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/tag/TagType.java @@ -164,8 +164,7 @@ private boolean isAuthorized(@Nonnull TagUpdateInput update, @Nonnull QueryConte // Decide whether the current principal should be allowed to update the Dataset. final DisjunctivePrivilegeGroup orPrivilegeGroups = getAuthorizedPrivileges(update); return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), + context, PoliciesConfig.TAG_PRIVILEGES.getResourceType(), update.getUrn(), orPrivilegeGroups); diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 84e81e9096a3b9..9c4375e70d9baa 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -162,6 +162,11 @@ input SearchFlags { Whether to include restricted entities """ includeRestricted: Boolean + + """ + fields to include for custom Highlighting + """ + customHighlightingFields: [String!] } """ @@ -545,6 +550,21 @@ enum FilterOperator { Represent the relation less than or equal to, e.g. ownerCount <= 3 """ LESS_THAN_OR_EQUAL_TO + + """ + Represent the relation: URN field any nested children in addition to the given URN + """ + DESCENDANTS_INCL + + """ + Represent the relation: URN field matches any nested parent in addition to the given URN + """ + ANCESTORS_INCL + + """ + Represent the relation: URN field matches any nested child or parent in addition to the given URN + """ + RELATED_INCL } """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java index 837dec2f528ed3..76879addc5e6f3 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardStatsSummaryTest.java @@ -7,7 +7,6 @@ import com.datahub.authentication.Authentication; import com.datahub.authorization.AuthorizationResult; -import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.UrnUtils; import com.linkedin.dashboard.DashboardUsageStatistics; @@ -55,13 +54,10 @@ public void testGetSuccess() throws Exception { // Execute resolver DashboardStatsSummaryResolver resolver = new DashboardStatsSummaryResolver(mockClient); QueryContext mockContext = Mockito.mock(QueryContext.class); - Authorizer mockAuthorizor = mock(Authorizer.class); - when(mockAuthorizor.authorize(any())) - .thenAnswer( - args -> - new AuthorizationResult(args.getArgument(0), AuthorizationResult.Type.ALLOW, "")); - when(mockContext.getAuthorizer()).thenReturn(mockAuthorizor); - Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); + when(mockContext.getOperationContext()).thenReturn(mock(OperationContext.class)); + when(mockContext.getOperationContext().authorize(any(), any())) + .thenReturn(new AuthorizationResult(null, AuthorizationResult.Type.ALLOW, "")); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); Mockito.when(mockEnv.getSource()).thenReturn(TEST_SOURCE); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java index f8a7e4fc6a13c8..57dd5ebc86e86a 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetStatsSummaryResolverTest.java @@ -4,7 +4,6 @@ import com.datahub.authentication.Authentication; import com.datahub.authorization.AuthorizationResult; -import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; @@ -66,14 +65,15 @@ public void testGetSuccess() throws Exception { DatasetStatsSummaryResolver resolver = new DatasetStatsSummaryResolver(mockClient); QueryContext mockContext = Mockito.mock(QueryContext.class); Mockito.when(mockContext.getActorUrn()).thenReturn("urn:li:corpuser:test"); - Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); + AuthorizationResult mockAuthorizerResult = Mockito.mock(AuthorizationResult.class); Mockito.when(mockAuthorizerResult.getType()).thenReturn(AuthorizationResult.Type.ALLOW); - Mockito.when(mockAuthorizer.authorize(any())).thenReturn(mockAuthorizerResult); - Mockito.when(mockContext.getAuthorizer()).thenReturn(mockAuthorizer); - Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); + Mockito.when(mockContext.getOperationContext()) .thenReturn(Mockito.mock(OperationContext.class)); + Mockito.when(mockContext.getOperationContext().authorize(any(), any())) + .thenReturn(mockAuthorizerResult); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); Mockito.when(mockEnv.getSource()).thenReturn(TEST_SOURCE); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java index 448c3420625929..25d48ddec74069 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GlossaryUtilsTest.java @@ -2,13 +2,13 @@ import static com.linkedin.metadata.Constants.GLOSSARY_NODE_INFO_ASPECT_NAME; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import static org.testng.Assert.*; -import com.datahub.authentication.Authentication; -import com.datahub.authorization.AuthorizationRequest; import com.datahub.authorization.AuthorizationResult; import com.datahub.authorization.EntitySpec; -import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.GlossaryNodeUrn; import com.linkedin.common.urn.Urn; @@ -22,27 +22,27 @@ import com.linkedin.entity.client.EntityClient; import com.linkedin.glossary.GlossaryNodeInfo; import com.linkedin.metadata.Constants; +import io.datahubproject.metadata.context.OperationContext; import java.util.HashMap; import java.util.Map; -import java.util.Optional; import org.mockito.Mockito; +import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; public class GlossaryUtilsTest { private final String userUrn = "urn:li:corpuser:authorized"; private final QueryContext mockContext = Mockito.mock(QueryContext.class); - private final Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); private final EntityClient mockClient = Mockito.mock(EntityClient.class); private final Urn parentNodeUrn = UrnUtils.getUrn("urn:li:glossaryNode:parent_node"); private final Urn parentNodeUrn1 = UrnUtils.getUrn("urn:li:glossaryNode:parent_node1"); private final Urn parentNodeUrn2 = UrnUtils.getUrn("urn:li:glossaryNode:parent_node2"); private final Urn parentNodeUrn3 = UrnUtils.getUrn("urn:li:glossaryNode:parent_node3"); + @BeforeMethod private void setUpTests() throws Exception { - Mockito.when(mockContext.getAuthorizer()).thenReturn(mockAuthorizer); Mockito.when(mockContext.getActorUrn()).thenReturn(userUrn); - Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); + when(mockContext.getOperationContext()).thenReturn(mock(OperationContext.class)); GlossaryNodeInfo parentNode1 = new GlossaryNodeInfo() @@ -84,25 +84,25 @@ private void setUpTests() throws Exception { Mockito.when( mockClient.getV2( any(), - Mockito.eq(Constants.GLOSSARY_NODE_ENTITY_NAME), - Mockito.eq(parentNodeUrn1), - Mockito.eq(ImmutableSet.of(GLOSSARY_NODE_INFO_ASPECT_NAME)))) + eq(Constants.GLOSSARY_NODE_ENTITY_NAME), + eq(parentNodeUrn1), + eq(ImmutableSet.of(GLOSSARY_NODE_INFO_ASPECT_NAME)))) .thenReturn(new EntityResponse().setAspects(new EnvelopedAspectMap(parentNode1Aspects))); Mockito.when( mockClient.getV2( any(), - Mockito.eq(Constants.GLOSSARY_NODE_ENTITY_NAME), - Mockito.eq(parentNodeUrn2), - Mockito.eq(ImmutableSet.of(GLOSSARY_NODE_INFO_ASPECT_NAME)))) + eq(Constants.GLOSSARY_NODE_ENTITY_NAME), + eq(parentNodeUrn2), + eq(ImmutableSet.of(GLOSSARY_NODE_INFO_ASPECT_NAME)))) .thenReturn(new EntityResponse().setAspects(new EnvelopedAspectMap(parentNode2Aspects))); Mockito.when( mockClient.getV2( any(), - Mockito.eq(Constants.GLOSSARY_NODE_ENTITY_NAME), - Mockito.eq(parentNodeUrn3), - Mockito.eq(ImmutableSet.of(GLOSSARY_NODE_INFO_ASPECT_NAME)))) + eq(Constants.GLOSSARY_NODE_ENTITY_NAME), + eq(parentNodeUrn3), + eq(ImmutableSet.of(GLOSSARY_NODE_INFO_ASPECT_NAME)))) .thenReturn(new EntityResponse().setAspects(new EnvelopedAspectMap(parentNode3Aspects))); final EntitySpec resourceSpec3 = @@ -120,19 +120,14 @@ private void setUpTests() throws Exception { private void mockAuthRequest( String privilege, AuthorizationResult.Type allowOrDeny, EntitySpec resourceSpec) { - final AuthorizationRequest authorizationRequest = - new AuthorizationRequest( - userUrn, - privilege, - resourceSpec != null ? Optional.of(resourceSpec) : Optional.empty()); AuthorizationResult result = Mockito.mock(AuthorizationResult.class); Mockito.when(result.getType()).thenReturn(allowOrDeny); - Mockito.when(mockAuthorizer.authorize(Mockito.eq(authorizationRequest))).thenReturn(result); + when(mockContext.getOperationContext().authorize(eq(privilege), eq(resourceSpec))) + .thenReturn(result); } @Test public void testCanManageGlossariesAuthorized() throws Exception { - setUpTests(); mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.ALLOW, null); assertTrue(GlossaryUtils.canManageGlossaries(mockContext)); @@ -140,7 +135,6 @@ public void testCanManageGlossariesAuthorized() throws Exception { @Test public void testCanManageGlossariesUnauthorized() throws Exception { - setUpTests(); mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); assertFalse(GlossaryUtils.canManageGlossaries(mockContext)); @@ -148,7 +142,6 @@ public void testCanManageGlossariesUnauthorized() throws Exception { @Test public void testCanManageChildrenEntitiesWithManageGlossaries() throws Exception { - setUpTests(); // they have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.ALLOW, null); @@ -157,7 +150,6 @@ public void testCanManageChildrenEntitiesWithManageGlossaries() throws Exception @Test public void testCanManageChildrenEntitiesNoParentNode() throws Exception { - setUpTests(); // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); @@ -166,7 +158,6 @@ public void testCanManageChildrenEntitiesNoParentNode() throws Exception { @Test public void testCanManageChildrenEntitiesAuthorized() throws Exception { - setUpTests(); // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); @@ -179,7 +170,6 @@ public void testCanManageChildrenEntitiesAuthorized() throws Exception { @Test public void testCanManageChildrenEntitiesUnauthorized() throws Exception { - setUpTests(); // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); @@ -193,7 +183,6 @@ public void testCanManageChildrenEntitiesUnauthorized() throws Exception { @Test public void testCanManageChildrenRecursivelyEntitiesAuthorized() throws Exception { - setUpTests(); // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); @@ -214,7 +203,6 @@ public void testCanManageChildrenRecursivelyEntitiesAuthorized() throws Exceptio @Test public void testCanManageChildrenRecursivelyEntitiesUnauthorized() throws Exception { - setUpTests(); // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); @@ -235,7 +223,6 @@ public void testCanManageChildrenRecursivelyEntitiesUnauthorized() throws Except @Test public void testCanManageChildrenRecursivelyEntitiesAuthorizedLevel2() throws Exception { - setUpTests(); // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); @@ -252,7 +239,6 @@ public void testCanManageChildrenRecursivelyEntitiesAuthorizedLevel2() throws Ex @Test public void testCanManageChildrenRecursivelyEntitiesUnauthorizedLevel2() throws Exception { - setUpTests(); // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); @@ -269,7 +255,6 @@ public void testCanManageChildrenRecursivelyEntitiesUnauthorizedLevel2() throws @Test public void testCanManageChildrenRecursivelyEntitiesNoLevel2() throws Exception { - setUpTests(); // they do NOT have the MANAGE_GLOSSARIES platform privilege mockAuthRequest("MANAGE_GLOSSARIES", AuthorizationResult.Type.DENY, null); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestTestUtils.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestTestUtils.java index e0555f5886b8bb..963bdf93bc9f1f 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestTestUtils.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestTestUtils.java @@ -1,11 +1,13 @@ package com.linkedin.datahub.graphql.resolvers.ingest; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.nullable; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import static org.testng.Assert.*; -import com.datahub.authentication.Authentication; import com.datahub.authorization.AuthorizationResult; -import com.datahub.plugins.auth.authorization.Authorizer; +import com.datahub.authorization.EntitySpec; import com.google.common.collect.ImmutableMap; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringMap; @@ -38,14 +40,9 @@ public static QueryContext getMockAllowContext() { QueryContext mockContext = Mockito.mock(QueryContext.class); Mockito.when(mockContext.getActorUrn()).thenReturn("urn:li:corpuser:test"); - Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); - AuthorizationResult result = Mockito.mock(AuthorizationResult.class); - Mockito.when(result.getType()).thenReturn(AuthorizationResult.Type.ALLOW); - Mockito.when(mockAuthorizer.authorize(Mockito.any())).thenReturn(result); - - Mockito.when(mockContext.getAuthorizer()).thenReturn(mockAuthorizer); - Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); - Mockito.when(mockContext.getOperationContext()).thenReturn(mock(OperationContext.class)); + when(mockContext.getOperationContext()).thenReturn(mock(OperationContext.class)); + when(mockContext.getOperationContext().authorize(any(), nullable(EntitySpec.class))) + .thenReturn(new AuthorizationResult(null, AuthorizationResult.Type.ALLOW, "")); return mockContext; } @@ -53,13 +50,9 @@ public static QueryContext getMockDenyContext() { QueryContext mockContext = Mockito.mock(QueryContext.class); Mockito.when(mockContext.getActorUrn()).thenReturn("urn:li:corpuser:test"); - Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); - AuthorizationResult result = Mockito.mock(AuthorizationResult.class); - Mockito.when(result.getType()).thenReturn(AuthorizationResult.Type.DENY); - Mockito.when(mockAuthorizer.authorize(Mockito.any())).thenReturn(result); - - Mockito.when(mockContext.getAuthorizer()).thenReturn(mockAuthorizer); - Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); + when(mockContext.getOperationContext()).thenReturn(mock(OperationContext.class)); + when(mockContext.getOperationContext().authorize(any(), nullable(EntitySpec.class))) + .thenReturn(new AuthorizationResult(null, AuthorizationResult.Type.DENY, "")); return mockContext; } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionAuthUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionAuthUtilsTest.java index f3e27d91f39df0..ba7f80deec4b77 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionAuthUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionAuthUtilsTest.java @@ -1,13 +1,16 @@ package com.linkedin.datahub.graphql.resolvers.ingest; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import static org.testng.Assert.*; import com.datahub.authorization.AuthorizationRequest; import com.datahub.authorization.AuthorizationResult; import com.datahub.authorization.EntitySpec; -import com.datahub.plugins.auth.authorization.Authorizer; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.metadata.Constants; +import io.datahubproject.metadata.context.OperationContext; import java.util.Optional; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -17,19 +20,18 @@ public class IngestionAuthUtilsTest { @Test public void testCanManageIngestionAuthorized() throws Exception { QueryContext mockContext = Mockito.mock(QueryContext.class); - Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); - - AuthorizationRequest request = - new AuthorizationRequest( - "urn:li:corpuser:authorized", - "MANAGE_INGESTION", - Optional.of(new EntitySpec(Constants.INGESTION_SOURCE_ENTITY_NAME, ""))); + when(mockContext.getOperationContext()).thenReturn(mock(OperationContext.class)); AuthorizationResult result = Mockito.mock(AuthorizationResult.class); Mockito.when(result.getType()).thenReturn(AuthorizationResult.Type.ALLOW); - Mockito.when(mockAuthorizer.authorize(Mockito.eq(request))).thenReturn(result); + Mockito.when( + mockContext + .getOperationContext() + .authorize( + eq("MANAGE_INGESTION"), + eq(new EntitySpec(Constants.INGESTION_SOURCE_ENTITY_NAME, "")))) + .thenReturn(result); - Mockito.when(mockContext.getAuthorizer()).thenReturn(mockAuthorizer); Mockito.when(mockContext.getActorUrn()).thenReturn("urn:li:corpuser:authorized"); assertTrue(IngestionAuthUtils.canManageIngestion(mockContext)); @@ -38,19 +40,18 @@ public void testCanManageIngestionAuthorized() throws Exception { @Test public void testCanManageIngestionUnauthorized() throws Exception { QueryContext mockContext = Mockito.mock(QueryContext.class); - Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); - - AuthorizationRequest request = - new AuthorizationRequest( - "urn:li:corpuser:unauthorized", - "MANAGE_INGESTION", - Optional.of(new EntitySpec(Constants.INGESTION_SOURCE_ENTITY_NAME, ""))); + when(mockContext.getOperationContext()).thenReturn(mock(OperationContext.class)); AuthorizationResult result = Mockito.mock(AuthorizationResult.class); Mockito.when(result.getType()).thenReturn(AuthorizationResult.Type.DENY); - Mockito.when(mockAuthorizer.authorize(Mockito.eq(request))).thenReturn(result); + Mockito.when( + mockContext + .getOperationContext() + .authorize( + eq("MANAGE_INGESTION"), + eq(new EntitySpec(Constants.INGESTION_SOURCE_ENTITY_NAME, "")))) + .thenReturn(result); - Mockito.when(mockContext.getAuthorizer()).thenReturn(mockAuthorizer); Mockito.when(mockContext.getActorUrn()).thenReturn("urn:li:corpuser:unauthorized"); assertFalse(IngestionAuthUtils.canManageIngestion(mockContext)); @@ -59,19 +60,17 @@ public void testCanManageIngestionUnauthorized() throws Exception { @Test public void testCanManageSecretsAuthorized() throws Exception { QueryContext mockContext = Mockito.mock(QueryContext.class); - Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); - - AuthorizationRequest request = - new AuthorizationRequest( - "urn:li:corpuser:authorized", - "MANAGE_SECRETS", - Optional.of(new EntitySpec(Constants.SECRETS_ENTITY_NAME, ""))); + when(mockContext.getOperationContext()).thenReturn(mock(OperationContext.class)); AuthorizationResult result = Mockito.mock(AuthorizationResult.class); Mockito.when(result.getType()).thenReturn(AuthorizationResult.Type.ALLOW); - Mockito.when(mockAuthorizer.authorize(Mockito.eq(request))).thenReturn(result); + Mockito.when( + mockContext + .getOperationContext() + .authorize( + eq("MANAGE_SECRETS"), eq(new EntitySpec(Constants.SECRETS_ENTITY_NAME, "")))) + .thenReturn(result); - Mockito.when(mockContext.getAuthorizer()).thenReturn(mockAuthorizer); Mockito.when(mockContext.getActorUrn()).thenReturn("urn:li:corpuser:authorized"); assertTrue(IngestionAuthUtils.canManageSecrets(mockContext)); @@ -80,7 +79,7 @@ public void testCanManageSecretsAuthorized() throws Exception { @Test public void testCanManageSecretsUnauthorized() throws Exception { QueryContext mockContext = Mockito.mock(QueryContext.class); - Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); + when(mockContext.getOperationContext()).thenReturn(mock(OperationContext.class)); AuthorizationRequest request = new AuthorizationRequest( @@ -90,9 +89,13 @@ public void testCanManageSecretsUnauthorized() throws Exception { AuthorizationResult result = Mockito.mock(AuthorizationResult.class); Mockito.when(result.getType()).thenReturn(AuthorizationResult.Type.DENY); - Mockito.when(mockAuthorizer.authorize(Mockito.eq(request))).thenReturn(result); + Mockito.when( + mockContext + .getOperationContext() + .authorize( + eq("MANAGE_SECRETS"), eq(new EntitySpec(Constants.SECRETS_ENTITY_NAME, "")))) + .thenReturn(result); - Mockito.when(mockContext.getAuthorizer()).thenReturn(mockAuthorizer); Mockito.when(mockContext.getActorUrn()).thenReturn("urn:li:corpuser:unauthorized"); assertFalse(IngestionAuthUtils.canManageSecrets(mockContext)); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourceResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourceResolverTest.java index cd3b5c9dce47e8..05428788dc3c92 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourceResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourceResolverTest.java @@ -2,7 +2,6 @@ import static com.linkedin.datahub.graphql.resolvers.ingest.IngestTestUtils.*; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.mock; import static org.testng.Assert.*; import com.google.common.collect.ImmutableMap; @@ -22,7 +21,6 @@ import com.linkedin.metadata.search.SearchResult; import com.linkedin.r2.RemoteInvocationException; import graphql.schema.DataFetchingEnvironment; -import io.datahubproject.metadata.context.OperationContext; import java.util.HashSet; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -85,7 +83,7 @@ public void testGetSuccess() throws Exception { // Execute resolver QueryContext mockContext = getMockAllowContext(); - Mockito.when(mockContext.getOperationContext()).thenReturn(mock(OperationContext.class)); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_INPUT); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java index 034a8215c4a8ca..5617321c98e84f 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/CreateQueryResolverTest.java @@ -2,15 +2,15 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import static org.testng.Assert.*; import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; -import com.datahub.authorization.AuthorizationRequest; import com.datahub.authorization.AuthorizationResult; import com.datahub.authorization.EntitySpec; -import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.linkedin.common.AuditStamp; @@ -38,8 +38,10 @@ import com.linkedin.query.QuerySubject; import com.linkedin.query.QuerySubjectArray; import com.linkedin.query.QuerySubjects; +import com.linkedin.util.Pair; import graphql.schema.DataFetchingEnvironment; -import java.util.Optional; +import io.datahubproject.metadata.context.OperationContext; +import java.util.Map; import java.util.concurrent.CompletionException; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -214,22 +216,7 @@ private QueryService initMockService() { private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { QueryContext mockContext = Mockito.mock(QueryContext.class); Mockito.when(mockContext.getActorUrn()).thenReturn(TEST_ACTOR_URN.toString()); - - Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); - - AuthorizationRequest editQueriesRequest = - new AuthorizationRequest( - TEST_ACTOR_URN.toString(), - PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(), - Optional.of( - new EntitySpec(TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString()))); - - AuthorizationRequest editAllRequest = - new AuthorizationRequest( - TEST_ACTOR_URN.toString(), - PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(), - Optional.of( - new EntitySpec(TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString()))); + when(mockContext.getOperationContext()).thenReturn(mock(OperationContext.class)); AuthorizationResult editQueriesResult = Mockito.mock(AuthorizationResult.class); Mockito.when(editQueriesResult.getType()) @@ -237,8 +224,6 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { allowEditEntityQueries ? AuthorizationResult.Type.ALLOW : AuthorizationResult.Type.DENY); - Mockito.when(mockAuthorizer.authorize(Mockito.eq(editQueriesRequest))) - .thenReturn(editQueriesResult); AuthorizationResult editAllResult = Mockito.mock(AuthorizationResult.class); Mockito.when(editAllResult.getType()) @@ -246,9 +231,25 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { allowEditEntityQueries ? AuthorizationResult.Type.ALLOW : AuthorizationResult.Type.DENY); - Mockito.when(mockAuthorizer.authorize(Mockito.eq(editAllRequest))).thenReturn(editAllResult); - Mockito.when(mockContext.getAuthorizer()).thenReturn(mockAuthorizer); + Map, AuthorizationResult> responses = + Map.of( + Pair.of( + PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(), + new EntitySpec(TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString())), + editQueriesResult, + Pair.of( + PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(), + new EntitySpec(TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString())), + editAllResult); + + when(mockContext.getOperationContext().authorize(any(), any())) + .thenAnswer( + args -> + responses.getOrDefault( + Pair.of(args.getArgument(0), args.getArgument(1)), + new AuthorizationResult(null, AuthorizationResult.Type.DENY, ""))); + Mockito.when(mockContext.getAuthentication()) .thenReturn(new Authentication(new Actor(ActorType.USER, TEST_ACTOR_URN.getId()), "creds")); return mockContext; diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java index 491f06e800d709..2045e0ee52d683 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/DeleteQueryResolverTest.java @@ -2,15 +2,15 @@ import static com.linkedin.datahub.graphql.TestUtils.*; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import static org.testng.Assert.*; import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; -import com.datahub.authorization.AuthorizationRequest; import com.datahub.authorization.AuthorizationResult; import com.datahub.authorization.EntitySpec; -import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; @@ -20,8 +20,10 @@ import com.linkedin.query.QuerySubject; import com.linkedin.query.QuerySubjectArray; import com.linkedin.query.QuerySubjects; +import com.linkedin.util.Pair; import graphql.schema.DataFetchingEnvironment; -import java.util.Optional; +import io.datahubproject.metadata.context.OperationContext; +import java.util.Map; import java.util.concurrent.CompletionException; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -115,24 +117,7 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { QueryContext mockContext = Mockito.mock(QueryContext.class); Mockito.when(mockContext.getActorUrn()) .thenReturn(DeleteQueryResolverTest.TEST_ACTOR_URN.toString()); - - Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); - - AuthorizationRequest editQueriesRequest = - new AuthorizationRequest( - DeleteQueryResolverTest.TEST_ACTOR_URN.toString(), - PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(), - Optional.of( - new EntitySpec( - DeleteQueryResolverTest.TEST_DATASET_URN.getEntityType(), - DeleteQueryResolverTest.TEST_DATASET_URN.toString()))); - - AuthorizationRequest editAllRequest = - new AuthorizationRequest( - TEST_ACTOR_URN.toString(), - PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(), - Optional.of( - new EntitySpec(TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString()))); + when(mockContext.getOperationContext()).thenReturn(mock(OperationContext.class)); AuthorizationResult editQueriesResult = Mockito.mock(AuthorizationResult.class); Mockito.when(editQueriesResult.getType()) @@ -140,8 +125,6 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { allowEditEntityQueries ? AuthorizationResult.Type.ALLOW : AuthorizationResult.Type.DENY); - Mockito.when(mockAuthorizer.authorize(Mockito.eq(editQueriesRequest))) - .thenReturn(editQueriesResult); AuthorizationResult editAllResult = Mockito.mock(AuthorizationResult.class); Mockito.when(editAllResult.getType()) @@ -149,9 +132,25 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { allowEditEntityQueries ? AuthorizationResult.Type.ALLOW : AuthorizationResult.Type.DENY); - Mockito.when(mockAuthorizer.authorize(Mockito.eq(editAllRequest))).thenReturn(editAllResult); - Mockito.when(mockContext.getAuthorizer()).thenReturn(mockAuthorizer); + Map, AuthorizationResult> responses = + Map.of( + Pair.of( + PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(), + new EntitySpec(TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString())), + editQueriesResult, + Pair.of( + PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(), + new EntitySpec(TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString())), + editAllResult); + + when(mockContext.getOperationContext().authorize(any(), any())) + .thenAnswer( + args -> + responses.getOrDefault( + Pair.of(args.getArgument(0), args.getArgument(1)), + new AuthorizationResult(null, AuthorizationResult.Type.DENY, ""))); + Mockito.when(mockContext.getAuthentication()) .thenReturn(new Authentication(new Actor(ActorType.USER, TEST_ACTOR_URN.getId()), "creds")); return mockContext; diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java index ce21ed99595660..8b81523b58d105 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/UpdateQueryResolverTest.java @@ -2,15 +2,15 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import static org.testng.Assert.*; import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; -import com.datahub.authorization.AuthorizationRequest; import com.datahub.authorization.AuthorizationResult; import com.datahub.authorization.EntitySpec; -import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.linkedin.common.AuditStamp; @@ -38,8 +38,10 @@ import com.linkedin.query.QuerySubject; import com.linkedin.query.QuerySubjectArray; import com.linkedin.query.QuerySubjects; +import com.linkedin.util.Pair; import graphql.schema.DataFetchingEnvironment; -import java.util.Optional; +import io.datahubproject.metadata.context.OperationContext; +import java.util.Map; import java.util.concurrent.CompletionException; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -212,36 +214,7 @@ private QueryService initMockService() { private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { QueryContext mockContext = Mockito.mock(QueryContext.class); Mockito.when(mockContext.getActorUrn()).thenReturn(TEST_ACTOR_URN.toString()); - - Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); - - AuthorizationRequest editQueriesRequest1 = - new AuthorizationRequest( - TEST_ACTOR_URN.toString(), - PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(), - Optional.of( - new EntitySpec(TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString()))); - - AuthorizationRequest editAllRequest1 = - new AuthorizationRequest( - TEST_ACTOR_URN.toString(), - PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(), - Optional.of( - new EntitySpec(TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString()))); - - AuthorizationRequest editQueriesRequest2 = - new AuthorizationRequest( - TEST_ACTOR_URN.toString(), - PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(), - Optional.of( - new EntitySpec(TEST_DATASET_URN_2.getEntityType(), TEST_DATASET_URN_2.toString()))); - - AuthorizationRequest editAllRequest2 = - new AuthorizationRequest( - TEST_ACTOR_URN.toString(), - PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(), - Optional.of( - new EntitySpec(TEST_DATASET_URN_2.getEntityType(), TEST_DATASET_URN_2.toString()))); + when(mockContext.getOperationContext()).thenReturn(mock(OperationContext.class)); AuthorizationResult editQueriesResult1 = Mockito.mock(AuthorizationResult.class); Mockito.when(editQueriesResult1.getType()) @@ -249,8 +222,6 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { allowEditEntityQueries ? AuthorizationResult.Type.ALLOW : AuthorizationResult.Type.DENY); - Mockito.when(mockAuthorizer.authorize(Mockito.eq(editQueriesRequest1))) - .thenReturn(editQueriesResult1); AuthorizationResult editAllResult1 = Mockito.mock(AuthorizationResult.class); Mockito.when(editAllResult1.getType()) @@ -258,7 +229,6 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { allowEditEntityQueries ? AuthorizationResult.Type.ALLOW : AuthorizationResult.Type.DENY); - Mockito.when(mockAuthorizer.authorize(Mockito.eq(editAllRequest1))).thenReturn(editAllResult1); AuthorizationResult editQueriesResult2 = Mockito.mock(AuthorizationResult.class); Mockito.when(editQueriesResult2.getType()) @@ -266,8 +236,6 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { allowEditEntityQueries ? AuthorizationResult.Type.ALLOW : AuthorizationResult.Type.DENY); - Mockito.when(mockAuthorizer.authorize(Mockito.eq(editQueriesRequest2))) - .thenReturn(editQueriesResult2); AuthorizationResult editAllResult2 = Mockito.mock(AuthorizationResult.class); Mockito.when(editAllResult2.getType()) @@ -275,9 +243,35 @@ private QueryContext getMockQueryContext(boolean allowEditEntityQueries) { allowEditEntityQueries ? AuthorizationResult.Type.ALLOW : AuthorizationResult.Type.DENY); - Mockito.when(mockAuthorizer.authorize(Mockito.eq(editAllRequest2))).thenReturn(editAllResult2); - Mockito.when(mockContext.getAuthorizer()).thenReturn(mockAuthorizer); + Map, AuthorizationResult> responses = + Map.of( + Pair.of( + PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(), + new EntitySpec(TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString())), + editQueriesResult1, + Pair.of( + PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(), + new EntitySpec(TEST_DATASET_URN.getEntityType(), TEST_DATASET_URN.toString())), + editAllResult1, + Pair.of( + PoliciesConfig.EDIT_QUERIES_PRIVILEGE.getType(), + new EntitySpec( + TEST_DATASET_URN_2.getEntityType(), TEST_DATASET_URN_2.toString())), + editQueriesResult2, + Pair.of( + PoliciesConfig.EDIT_ENTITY_PRIVILEGE.getType(), + new EntitySpec( + TEST_DATASET_URN_2.getEntityType(), TEST_DATASET_URN_2.toString())), + editAllResult2); + + when(mockContext.getOperationContext().authorize(any(), any())) + .thenAnswer( + args -> + responses.getOrDefault( + Pair.of(args.getArgument(0), args.getArgument(1)), + new AuthorizationResult(null, AuthorizationResult.Type.DENY, ""))); + Mockito.when(mockContext.getAuthentication()) .thenReturn(new Authentication(new Actor(ActorType.USER, TEST_ACTOR_URN.getId()), "creds")); return mockContext; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillDataProcessInstancesConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillDataProcessInstancesConfig.java new file mode 100644 index 00000000000000..bc55ad38765ed5 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillDataProcessInstancesConfig.java @@ -0,0 +1,43 @@ +package com.linkedin.datahub.upgrade.config; + +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.datahub.upgrade.system.dataprocessinstances.BackfillDataProcessInstances; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; +import io.datahubproject.metadata.context.OperationContext; +import org.opensearch.client.RestHighLevelClient; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Conditional; +import org.springframework.context.annotation.Configuration; + +@Configuration +@Conditional(SystemUpdateCondition.NonBlockingSystemUpdateCondition.class) +public class BackfillDataProcessInstancesConfig { + + @Bean + public NonBlockingSystemUpgrade backfillProcessInstancesHasRunEvents( + final OperationContext opContext, + EntityService entityService, + ElasticSearchService elasticSearchService, + RestHighLevelClient restHighLevelClient, + @Value("${systemUpdate.processInstanceHasRunEvents.enabled}") final boolean enabled, + @Value("${systemUpdate.processInstanceHasRunEvents.reprocess.enabled}") + boolean reprocessEnabled, + @Value("${systemUpdate.processInstanceHasRunEvents.batchSize}") final Integer batchSize, + @Value("${systemUpdate.processInstanceHasRunEvents.delayMs}") final Integer delayMs, + @Value("${systemUpdate.processInstanceHasRunEvents.totalDays}") Integer totalDays, + @Value("${systemUpdate.processInstanceHasRunEvents.windowDays}") Integer windowDays) { + return new BackfillDataProcessInstances( + opContext, + entityService, + elasticSearchService, + restHighLevelClient, + enabled, + reprocessEnabled, + batchSize, + delayMs, + totalDays, + windowDays); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/dataprocessinstances/BackfillDataProcessInstances.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/dataprocessinstances/BackfillDataProcessInstances.java new file mode 100644 index 00000000000000..643a0ff5a4ce25 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/dataprocessinstances/BackfillDataProcessInstances.java @@ -0,0 +1,54 @@ +package com.linkedin.datahub.upgrade.system.dataprocessinstances; + +import com.google.common.collect.ImmutableList; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import org.opensearch.client.RestHighLevelClient; + +public class BackfillDataProcessInstances implements NonBlockingSystemUpgrade { + + private final List _steps; + + public BackfillDataProcessInstances( + OperationContext opContext, + EntityService entityService, + ElasticSearchService elasticSearchService, + RestHighLevelClient restHighLevelClient, + boolean enabled, + boolean reprocessEnabled, + Integer batchSize, + Integer batchDelayMs, + Integer totalDays, + Integer windowDays) { + if (enabled) { + _steps = + ImmutableList.of( + new BackfillDataProcessInstancesHasRunEventsStep( + opContext, + entityService, + elasticSearchService, + restHighLevelClient, + reprocessEnabled, + batchSize, + batchDelayMs, + totalDays, + windowDays)); + } else { + _steps = ImmutableList.of(); + } + } + + @Override + public String id() { + return "BackfillDataProcessInstances"; + } + + @Override + public List steps() { + return _steps; + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/dataprocessinstances/BackfillDataProcessInstancesHasRunEventsStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/dataprocessinstances/BackfillDataProcessInstancesHasRunEventsStep.java new file mode 100644 index 00000000000000..55cdcae931ab5b --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/dataprocessinstances/BackfillDataProcessInstancesHasRunEventsStep.java @@ -0,0 +1,213 @@ +package com.linkedin.datahub.upgrade.system.dataprocessinstances; + +import static com.linkedin.metadata.Constants.*; + +import com.google.common.base.Throwables; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.upgrade.UpgradeContext; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.UpgradeStepResult; +import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; +import com.linkedin.metadata.boot.BootstrapStep; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; +import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import com.linkedin.upgrade.DataHubUpgradeState; +import io.datahubproject.metadata.context.OperationContext; +import java.io.IOException; +import java.net.URISyntaxException; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.function.Function; +import lombok.extern.slf4j.Slf4j; +import org.codehaus.jackson.node.JsonNodeFactory; +import org.codehaus.jackson.node.ObjectNode; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.RequestOptions; +import org.opensearch.client.RestHighLevelClient; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.search.aggregations.Aggregation; +import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.bucket.composite.CompositeAggregation; +import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder; +import org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder; +import org.opensearch.search.builder.SearchSourceBuilder; + +@Slf4j +public class BackfillDataProcessInstancesHasRunEventsStep implements UpgradeStep { + + private static final String UPGRADE_ID = "BackfillDataProcessInstancesHasRunEvents"; + private static final Urn UPGRADE_ID_URN = BootstrapStep.getUpgradeUrn(UPGRADE_ID); + + private final OperationContext opContext; + private final EntityService entityService; + private final ElasticSearchService elasticSearchService; + private final RestHighLevelClient restHighLevelClient; + + private final boolean reprocessEnabled; + private final Integer batchSize; + private final Integer batchDelayMs; + + private final Integer totalDays; + private final Integer windowDays; + + public BackfillDataProcessInstancesHasRunEventsStep( + OperationContext opContext, + EntityService entityService, + ElasticSearchService elasticSearchService, + RestHighLevelClient restHighLevelClient, + boolean reprocessEnabled, + Integer batchSize, + Integer batchDelayMs, + Integer totalDays, + Integer windowDays) { + this.opContext = opContext; + this.entityService = entityService; + this.elasticSearchService = elasticSearchService; + this.restHighLevelClient = restHighLevelClient; + this.reprocessEnabled = reprocessEnabled; + this.batchSize = batchSize; + this.batchDelayMs = batchDelayMs; + this.totalDays = totalDays; + this.windowDays = windowDays; + } + + @SuppressWarnings("BusyWait") + @Override + public Function executable() { + return (context) -> { + TermsValuesSourceBuilder termsValuesSourceBuilder = + new TermsValuesSourceBuilder("urn").field("urn"); + + ObjectNode json = JsonNodeFactory.instance.objectNode(); + json.put("hasRunEvents", true); + + IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); + + String runEventsIndexName = + indexConvention.getTimeseriesAspectIndexName( + DATA_PROCESS_INSTANCE_ENTITY_NAME, DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME); + + DataHubUpgradeState upgradeState = DataHubUpgradeState.SUCCEEDED; + + Instant now = Instant.now(); + Instant overallStart = now.minus(totalDays, ChronoUnit.DAYS); + for (int i = 0; ; i++) { + Instant windowEnd = now.minus(i * windowDays, ChronoUnit.DAYS); + if (!windowEnd.isAfter(overallStart)) { + break; + } + Instant windowStart = windowEnd.minus(windowDays, ChronoUnit.DAYS); + if (windowStart.isBefore(overallStart)) { + // last iteration, cap at overallStart + windowStart = overallStart; + } + + QueryBuilder queryBuilder = + QueryBuilders.boolQuery() + .must( + QueryBuilders.rangeQuery("@timestamp") + .gte(windowStart.toString()) + .lt(windowEnd.toString())); + + CompositeAggregationBuilder aggregationBuilder = + AggregationBuilders.composite("aggs", List.of(termsValuesSourceBuilder)) + .size(batchSize); + + while (true) { + SearchRequest searchRequest = new SearchRequest(runEventsIndexName); + searchRequest.source( + new SearchSourceBuilder() + .size(0) + .aggregation(aggregationBuilder) + .query(queryBuilder)); + + SearchResponse response; + + try { + response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); + } catch (IOException e) { + log.error(Throwables.getStackTraceAsString(e)); + log.error("Error querying index {}", runEventsIndexName); + upgradeState = DataHubUpgradeState.FAILED; + break; + } + List aggregations = response.getAggregations().asList(); + if (aggregations.isEmpty()) { + break; + } + CompositeAggregation aggregation = (CompositeAggregation) aggregations.get(0); + Set urns = new HashSet<>(); + for (CompositeAggregation.Bucket bucket : aggregation.getBuckets()) { + for (Object value : bucket.getKey().values()) { + try { + urns.add(Urn.createFromString(String.valueOf(value))); + } catch (URISyntaxException e) { + log.warn("Ignoring invalid urn {}", value); + } + } + } + if (!urns.isEmpty()) { + urns = entityService.exists(opContext, urns); + urns.forEach( + urn -> + elasticSearchService.upsertDocument( + opContext, + DATA_PROCESS_INSTANCE_ENTITY_NAME, + json.toString(), + indexConvention.getEntityDocumentId(urn))); + } + if (aggregation.afterKey() == null) { + break; + } + aggregationBuilder.aggregateAfter(aggregation.afterKey()); + if (batchDelayMs > 0) { + log.info("Sleeping for {} ms", batchDelayMs); + try { + Thread.sleep(batchDelayMs); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + } + BootstrapStep.setUpgradeResult(context.opContext(), UPGRADE_ID_URN, entityService); + return new DefaultUpgradeStepResult(id(), upgradeState); + }; + } + + @Override + public String id() { + return UPGRADE_ID; + } + + /** + * Returns whether the upgrade should proceed if the step fails after exceeding the maximum + * retries. + */ + @Override + public boolean isOptional() { + return true; + } + + /** Returns whether the upgrade should be skipped. */ + @Override + public boolean skip(UpgradeContext context) { + if (reprocessEnabled) { + return false; + } + + boolean previouslyRun = + entityService.exists( + context.opContext(), UPGRADE_ID_URN, DATA_HUB_UPGRADE_RESULT_ASPECT_NAME, true); + if (previouslyRun) { + log.info("{} was already run. Skipping.", id()); + } + return previouslyRun; + } +} diff --git a/datahub-upgrade/src/main/resources/application.properties b/datahub-upgrade/src/main/resources/application.properties index b884c92f74bd48..847c264dfac38c 100644 --- a/datahub-upgrade/src/main/resources/application.properties +++ b/datahub-upgrade/src/main/resources/application.properties @@ -3,3 +3,4 @@ management.health.neo4j.enabled=false ingestion.enabled=false spring.main.allow-bean-definition-overriding=true entityClient.impl=restli +metadataChangeProposal.throttle.updateIntervalMs=0 \ No newline at end of file diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTest.java index dc4c3073ee351c..8b6899b4c78866 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTest.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTest.java @@ -1,12 +1,18 @@ package com.linkedin.datahub.upgrade; -import static org.testng.AssertJUnit.*; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; import com.linkedin.datahub.upgrade.restoreindices.RestoreIndices; import com.linkedin.datahub.upgrade.system.BlockingSystemUpgrade; +import com.linkedin.metadata.dao.throttle.NoOpSensor; +import com.linkedin.metadata.dao.throttle.ThrottleSensor; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import javax.inject.Named; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; @@ -28,6 +34,10 @@ public class UpgradeCliApplicationTest extends AbstractTestNGSpringContextTests @Autowired private ESIndexBuilder esIndexBuilder; + @Qualifier("kafkaThrottle") + @Autowired + private ThrottleSensor kafkaThrottle; + @Test public void testRestoreIndicesInit() { /* @@ -46,4 +56,10 @@ public void testBuildIndicesInit() { assertFalse( esIndexBuilder.getElasticSearchConfiguration().getBuildIndices().isAllowDocCountMismatch()); } + + @Test + public void testNoThrottle() { + assertEquals( + new NoOpSensor(), kafkaThrottle, "No kafka throttle controls expected in datahub-upgrade"); + } } diff --git a/datahub-web-react/src/app/entity/shared/propagation/PropagationDetails.tsx b/datahub-web-react/src/app/entity/shared/propagation/PropagationDetails.tsx index 646f47134938c4..b5629d806f7197 100644 --- a/datahub-web-react/src/app/entity/shared/propagation/PropagationDetails.tsx +++ b/datahub-web-react/src/app/entity/shared/propagation/PropagationDetails.tsx @@ -69,7 +69,7 @@ export default function PropagationDetails({ sourceDetail }: Props) { Learn more diff --git a/datahub-web-react/src/app/settings/features/Features.tsx b/datahub-web-react/src/app/settings/features/Features.tsx index 1d0a0bb469cf86..7f39fcc6d72855 100644 --- a/datahub-web-react/src/app/settings/features/Features.tsx +++ b/datahub-web-react/src/app/settings/features/Features.tsx @@ -89,7 +89,8 @@ export const Features = () => { }, ], isNew: true, - learnMoreLink: 'https://datahubproject.io/docs/automations/docs-propagation', + learnMoreLink: + 'https://datahubproject.io/docs/automations/docs-propagation?utm_source=datahub_core&utm_medium=docs&utm_campaign=features', }, ]; diff --git a/docker/monitoring/grafana/dashboards/datahub_dashboard.json b/docker/monitoring/grafana/dashboards/datahub_dashboard.json index 3b932756e5e117..352f84829b0923 100644 --- a/docker/monitoring/grafana/dashboards/datahub_dashboard.json +++ b/docker/monitoring/grafana/dashboards/datahub_dashboard.json @@ -24,7 +24,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 95, + "id": 56, "links": [], "liveNow": false, "panels": [ @@ -65,6 +65,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -78,6 +79,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -136,10 +138,12 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_get_Count{}[1m])/60", + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityV2Resource_get_Count{}[1m])/60", "interval": "", "legendFormat": "Get QPS", + "range": true, "refId": "A" }, { @@ -147,11 +151,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_get_failed_Count{}[1m])/60", + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityV2Resource_get_failed_Count{}[1m])/60", "hide": false, "interval": "", "legendFormat": "Get Failure", + "range": true, "refId": "B" }, { @@ -159,11 +165,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_Count{}[1m])/60", + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityV2Resource_batchGet_Count{}[1m])/60", "hide": false, "interval": "", "legendFormat": "BatchGet QPS", + "range": true, "refId": "C" }, { @@ -171,11 +179,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_failed_Count{}[1m])/60", + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityV2Resource_batchGet_failed_Count{}[1m])/60", "hide": false, "interval": "", "legendFormat": "BatchGet Failure", + "range": true, "refId": "D" } ], @@ -193,6 +203,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -206,6 +217,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -264,10 +276,12 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_Mean{}", + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityV2Resource_get_Mean{}", "interval": "", "legendFormat": "Get Avg", + "range": true, "refId": "A" }, { @@ -275,11 +289,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_75thPercentile{}", + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityV2Resource_get_75thPercentile{}", "hide": false, "interval": "", "legendFormat": "Get P75", + "range": true, "refId": "B" }, { @@ -287,11 +303,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_get_95thPercentile{}", + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityV2Resource_get_95thPercentile{}", "hide": false, "interval": "", "legendFormat": "Get P95", + "range": true, "refId": "C" }, { @@ -299,11 +317,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_Mean{}", + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityV2Resource_batchGet_Mean{}", "hide": false, "interval": "", "legendFormat": "BatchGet Avg", + "range": true, "refId": "D" }, { @@ -311,11 +331,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_75thPercentile{}", + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityV2Resource_batchGet_75thPercentile{}", "hide": false, "interval": "", "legendFormat": "BatchGet P75", + "range": true, "refId": "E" }, { @@ -323,11 +345,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_batchGet_95thPercentile{}", + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityV2Resource_batchGet_95thPercentile{}", "hide": false, "interval": "", "legendFormat": "BatchGet P95", + "range": true, "refId": "F" } ], @@ -371,6 +395,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -384,6 +409,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -442,20 +468,9 @@ "type": "prometheus", "uid": "${datasource}" }, - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "Ingest Count", - "refId": "E" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, + "editorMode": "code", "exemplar": false, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchIngest_Count{}[1m])/60", + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_AspectResource_ingestProposal_Count{}[1m])/60", "hide": false, "instant": false, "interval": "", @@ -467,24 +482,28 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_failed_Count[1m])/60", + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_AspectResource_ingestProposal_success_Count{}[1m])/60", "hide": false, "interval": "", - "legendFormat": "Ingest Failure", - "refId": "C" + "legendFormat": "Ingest Count", + "range": true, + "refId": "E" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "builder", "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_batchIngest_failed_Count[1m])/60", - "hide": false, + "expr": "increase(metrics_com_linkedin_metadata_resources_entity_AspectResource_ingestProposal_failed_Count{}[1m])/60", + "hide": true, "interval": "", - "legendFormat": "BatchIngest Failure", - "refId": "D" + "legendFormat": "Ingest Failure", + "range": true, + "refId": "C" } ], "title": "Ingest QPS", @@ -501,6 +520,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -514,6 +534,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -583,11 +604,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_75thPercentile{}", "hide": false, "interval": "", "legendFormat": "P75", + "range": true, "refId": "B" }, { @@ -595,11 +618,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_ingest_95thPercentile{}", + "expr": "metrics_com_linkedin_metadata_resources_entity_AspectResource_ingestProposal_95thPercentile{}", "hide": false, "interval": "", "legendFormat": "P95", + "range": true, "refId": "C" } ], @@ -617,6 +642,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -630,6 +656,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -688,11 +715,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_entity_EntityService_ingestAspectsToLocalDB_Mean{}", + "expr": "metrics_com_linkedin_metadata_entity_EntityServiceImpl_ingestAspectsToLocalDB_MeanRate{}", "hide": false, "interval": "", "legendFormat": "Ingest To DB", + "range": true, "refId": "B" }, { @@ -700,11 +729,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, "expr": "metrics_com_linkedin_metadata_entity_EntityService_produceMAE_Mean{}", - "hide": false, + "hide": true, "interval": "", "legendFormat": "Produce MAE", + "range": true, "refId": "C" } ], @@ -722,6 +753,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -735,6 +767,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -793,10 +826,12 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_maeProcess_Mean", + "expr": "metrics_postEntity_MeanRate", "interval": "", "legendFormat": "Avg", + "range": true, "refId": "A" }, { @@ -804,11 +839,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "builder", "exemplar": true, "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_maeProcess_75thPercentile", - "hide": false, + "hide": true, "interval": "", "legendFormat": "P75", + "range": true, "refId": "B" }, { @@ -816,11 +853,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, "expr": "metrics_com_linkedin_metadata_kafka_MetadataAuditEventsProcessor_maeProcess_95thPercentile", - "hide": false, + "hide": true, "interval": "", "legendFormat": "P95", + "range": true, "refId": "C" } ], @@ -838,6 +877,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -851,6 +891,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -958,6 +999,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -971,6 +1013,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1077,6 +1120,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1090,6 +1134,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1148,9 +1193,9 @@ "type": "prometheus", "uid": "${datasource}" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_MetadataTestHook_latency_Mean", + "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_UpdateIndicesHook_latency_Mean", "legendFormat": "Avg", "range": true, "refId": "A" @@ -1160,9 +1205,9 @@ "type": "prometheus", "uid": "${datasource}" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_MetadataTestHook_latency_75thPercentile", + "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_UpdateIndicesHook_latency_75thPercentile", "hide": false, "legendFormat": "P75", "range": true, @@ -1173,16 +1218,16 @@ "type": "prometheus", "uid": "${datasource}" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_MetadataTestHook_latency_95thPercentile", + "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_UpdateIndicesHook_latency_95thPercentile", "hide": false, "legendFormat": "P95", "range": true, "refId": "C" } ], - "title": "Metadata Test Latency", + "title": "Metadata Update Latency", "type": "timeseries" }, { @@ -1196,6 +1241,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1209,6 +1255,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1248,7 +1295,7 @@ "x": 7, "y": 26 }, - "id": 51, + "id": 56, "options": { "legend": { "calcs": [], @@ -1267,9 +1314,10 @@ "type": "prometheus", "uid": "${datasource}" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_NotificationGeneratorHook_latency_Mean", + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_delete_Mean{}", + "interval": "", "legendFormat": "Avg", "range": true, "refId": "A" @@ -1279,10 +1327,24 @@ "type": "prometheus", "uid": "${datasource}" }, - "editorMode": "builder", + "editorMode": "code", + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_delete_StdDev{}", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_NotificationGeneratorHook_latency_75thPercentile", + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_delete_75thPercentile{}", "hide": false, + "interval": "", "legendFormat": "P75", "range": true, "refId": "B" @@ -1292,16 +1354,17 @@ "type": "prometheus", "uid": "${datasource}" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_kafka_MetadataChangeLogProcessor_NotificationGeneratorHook_latency_95thPercentile", + "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_delete_95thPercentile{}", "hide": false, + "interval": "", "legendFormat": "P95", "range": true, "refId": "C" } ], - "title": "Notification Generator latency", + "title": "Delete Latency", "type": "timeseries" }, { @@ -1315,6 +1378,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1328,6 +1392,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1434,6 +1499,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1447,6 +1513,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1580,6 +1647,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1593,6 +1661,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1651,23 +1720,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_search_Count{}[1m])/60", + "expr": "increase(metrics_com_linkedin_metadata_search_elasticsearch_query_ESSearchDAO_searchRequest_Count{}[1m])/60", "interval": "", "legendFormat": "QPS", + "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_search_failed_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "Failure", - "refId": "B" } ], "title": "Search QPS", @@ -1684,6 +1743,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1697,6 +1757,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1755,10 +1816,12 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_Mean{}", + "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESSearchDAO_searchRequest_Mean{}", "interval": "", "legendFormat": "Avg", + "range": true, "refId": "A" }, { @@ -1766,11 +1829,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_75thPercentile{}", + "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESSearchDAO_searchRequest_75thPercentile{}", "hide": false, "interval": "", "legendFormat": "P75", + "range": true, "refId": "B" }, { @@ -1778,11 +1843,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_entity_EntityResource_search_95thPercentile{}", + "expr": "metrics_com_linkedin_metadata_search_elasticsearch_query_ESSearchDAO_searchRequest_95thPercentile{}", "hide": false, "interval": "", "legendFormat": "P95", + "range": true, "refId": "C" } ], @@ -1800,6 +1867,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1813,6 +1881,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1943,6 +2012,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1956,6 +2026,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2014,23 +2085,17 @@ "type": "prometheus", "uid": "${datasource}" }, + "disableTextWrap": false, + "editorMode": "code", "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_Count{}[1m])/60", + "expr": "increase(metrics_com_datahub_graphql_GraphQLController_browseV2_Count[1m]) / 60", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "", "legendFormat": "QPS", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_entity_EntityResource_browse_failed_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "Failure", - "refId": "B" + "range": true, + "refId": "A", + "useBackend": false } ], "title": "Browse QPS", @@ -2047,6 +2112,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2060,6 +2126,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2118,10 +2185,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_datahub_graphql_GraphQLController_browse_Mean{}", + "expr": "metrics_com_datahub_graphql_GraphQLController_browseV2_Mean{}", + "hide": false, "interval": "", "legendFormat": "Avg", + "range": true, "refId": "A" }, { @@ -2129,23 +2199,31 @@ "type": "prometheus", "uid": "${datasource}" }, + "disableTextWrap": false, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_datahub_graphql_GraphQLController_browse_75thPercentile{}", + "expr": "metrics_com_datahub_graphql_GraphQLController_browseV2_75thPercentile{}", + "fullMetaSearch": false, "hide": false, + "includeNullMetadata": true, "interval": "", "legendFormat": "P75", - "refId": "B" + "range": true, + "refId": "B", + "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_datahub_graphql_GraphQLController_browse_95thPercentile{}", + "expr": "metrics_com_datahub_graphql_GraphQLController_browseV2_95thPercentile{}", "hide": false, "interval": "", "legendFormat": "P95", + "range": true, "refId": "C" } ], @@ -2163,6 +2241,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2176,6 +2255,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2306,6 +2386,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2319,6 +2400,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2377,47 +2459,14 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_Count{}[1m])/60", + "expr": "increase(metrics_com_linkedin_metadata_graph_elastic_ESGraphQueryDAO_esQuery_Count{}[1m])/60", + "hide": false, "interval": "", "legendFormat": "Relationships QPS", + "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "Lineage QPS", - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_failed_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "Relationships Failure", - "refId": "C" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "exemplar": true, - "expr": "increase(metrics_com_linkedin_metadata_resources_lineage_Lineage_get_failed_Count{}[1m])/60", - "hide": false, - "interval": "", - "legendFormat": "Lineage Failure", - "refId": "D" } ], "title": "Graph QPS", @@ -2434,6 +2483,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2447,6 +2497,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2505,10 +2556,12 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_Mean{}", + "expr": "metrics_com_linkedin_metadata_entity_validation_ValidationUtils_validateLineageResult_Mean{}", "interval": "", "legendFormat": "Avg", + "range": true, "refId": "A" }, { @@ -2516,11 +2569,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_75thPercentile{}", + "expr": "metrics_com_linkedin_metadata_entity_validation_ValidationUtils_validateLineageResult_75thPercentile{}", "hide": false, "interval": "", "legendFormat": "P75", + "range": true, "refId": "B" }, { @@ -2528,11 +2583,13 @@ "type": "prometheus", "uid": "${datasource}" }, + "editorMode": "code", "exemplar": true, - "expr": "metrics_com_linkedin_metadata_resources_lineage_Relationships_getLineage_95thPercentile{}", + "expr": "metrics_com_linkedin_metadata_entity_validation_ValidationUtils_validateLineageResult_95thPercentile{}", "hide": false, "interval": "", "legendFormat": "P95", + "range": true, "refId": "C" } ], @@ -2576,6 +2633,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2589,6 +2647,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2668,6 +2727,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2681,6 +2741,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2760,6 +2821,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2773,6 +2835,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2876,6 +2939,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -2889,6 +2953,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2992,6 +3057,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3005,6 +3071,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3108,6 +3175,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3121,6 +3189,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3224,6 +3293,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -3237,6 +3307,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -3331,16 +3402,15 @@ } ], "refresh": "5s", - "schemaVersion": 37, - "style": "dark", + "schemaVersion": 39, "tags": [], "templating": { "list": [ { "current": { - "selected": true, + "selected": false, "text": "Prometheus", - "value": "Prometheus" + "value": "prometheus" }, "hide": 0, "includeAll": false, @@ -3358,13 +3428,13 @@ ] }, "time": { - "from": "now-30m", + "from": "now-12h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "DataHub Dashboard", "uid": "x4fS54Vnk", - "version": 2, + "version": 6, "weekStart": "" -} +} \ No newline at end of file diff --git a/docker/profiles/docker-compose.actions.yml b/docker/profiles/docker-compose.actions.yml index c0a0fd59328715..87385801ede6b5 100644 --- a/docker/profiles/docker-compose.actions.yml +++ b/docker/profiles/docker-compose.actions.yml @@ -1,7 +1,7 @@ x-datahub-actions-service: &datahub-actions-service hostname: actions - image: ${DATAHUB_ACTIONS_IMAGE:-${DATAHUB_ACTIONS_REPO:-acryldata}/datahub-actions}:${ACTIONS_VERSION:-v0.0.14} + image: ${DATAHUB_ACTIONS_IMAGE:-${DATAHUB_ACTIONS_REPO:-acryldata}/datahub-actions}:${ACTIONS_VERSION:-v0.1.1} env_file: - datahub-actions/env/docker.env - ${DATAHUB_LOCAL_COMMON_ENV:-empty.env} @@ -87,4 +87,4 @@ services: - debug-elasticsearch depends_on: datahub-gms-debug-elasticsearch: - condition: service_healthy \ No newline at end of file + condition: service_healthy diff --git a/docs-website/adoptionStoriesIndexes.json b/docs-website/adoptionStoriesIndexes.json index d54dd6bcfa4f0a..bb6e8d174e0ce8 100644 --- a/docs-website/adoptionStoriesIndexes.json +++ b/docs-website/adoptionStoriesIndexes.json @@ -22,6 +22,18 @@ "category": "Financial & Fintech", "description": "\"We found DataHub to provide excellent coverage for our needs. What we appreciate most about DataHub is its powerful API platform.\"

— Jean-Pierre Dijcks, Sr. Dir. Product Management at VISA

" }, + { + "name": "Notion", + "slug": "notion", + "imageUrl": "/img/logos/companies/notion.png", + "imageSize": "small", + "link": "https://blog.datahubproject.io/how-notion-uses-acryl-data-to-ensure-data-reliability-800427a9ba66", + "linkType": "blog", + "tagline": "How Notion Uses Acryl Data to Ensure Data Reliability", + "category": "B2B & B2C", + "platform": "cloud", + "description": "\"We rely on Acryl to gain insights and ensure our critical data is reliable. Acryl’s managed product takes DataHub to the next level through automation and emphasis on time-to-value.\"

— Ada Draginda, Senior Data Engineer at Notion

" + }, { "name": "Optum", "slug": "optum", @@ -99,6 +111,30 @@ "category": "Financial & Fintech", "description": "Discover how Checkout leverage DataHub for advanced data management and compliance, especially in managing sensitive data types." }, + { + "name": "MYOB", + "slug": "myob", + "imageUrl": "/img/logos/companies/myob.png", + "imageSize": "medium", + "link": "https://blog.datahubproject.io/how-myob-improved-data-reliability-for-dbt-and-snowflake-with-acryl-a1aa26285480", + "linkType": "blog", + "tagline": "How MYOB Improved Data Reliability for dbt and Snowflake", + "category": "Financial & Fintech", + "platform": "cloud", + "description": "\"Before bringing Acryl on board, MYOB’s data teams would see multiple breaking changes per week...Acryl has helped us significantly reduce the number of breaking changes, to the extent that they are no longer a burden on all teams.\"

— Asad Naveed, Engineering Manager at MYOB

" + }, + { + "name": "DPG Media", + "slug": "dpg-media", + "imageUrl": "/img/logos/companies/dpg-media.png", + "imageSize": "medium", + "link": "https://blog.datahubproject.io/how-acryl-data-helped-dpg-media-save-25-per-month-in-snowflake-costs-c29a1618a703", + "linkType": "blog", + "tagline": "How Acryl Data Helped DPG Media Save 25% Per Month in Snowflake Costs", + "category": "And More", + "platform": "cloud", + "description": "DPG Media used DataHub Cloud to identify and safely retire redundant assets from their data warehouse, reducing costs by 25%.

DataHub Cloud is used to bring federated governance to the data mesh architecture at DPG Media.
" + }, { "name": "MediaMarkt Saturn", "slug": "mediamarkt-saturn", diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index 1a40c986b31671..9b1b89afb329a5 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -23,6 +23,9 @@ module.exports = { src: "/scripts/rb2b.js", async: true, defer: true, + }, + { + src: "https://app.revenuehero.io/scheduler.min.js" } ], noIndex: isSaas, @@ -79,7 +82,7 @@ module.exports = { { to: "/cloud", activeBasePath: "cloud", - label: "Cloud", + html: "Cloud", position: "right", }, { diff --git a/docs-website/src/pages/adoption-stories/_components/LearnItemCard/styles.module.scss b/docs-website/src/pages/adoption-stories/_components/LearnItemCard/styles.module.scss index 881e90a7d09763..ca5774658b2f40 100644 --- a/docs-website/src/pages/adoption-stories/_components/LearnItemCard/styles.module.scss +++ b/docs-website/src/pages/adoption-stories/_components/LearnItemCard/styles.module.scss @@ -16,8 +16,9 @@ } .card_button { - padding: 1rem; + padding: 1.5rem; text-align: center; + margin-top: auto; } .card { @@ -53,6 +54,7 @@ .featureBody { padding: 0 2rem; + flex-grow: 1; } .card_image { diff --git a/docs-website/src/pages/adoption-stories/index.jsx b/docs-website/src/pages/adoption-stories/index.jsx index 27f4b876af20a6..2c5538b1f2f35f 100644 --- a/docs-website/src/pages/adoption-stories/index.jsx +++ b/docs-website/src/pages/adoption-stories/index.jsx @@ -3,18 +3,21 @@ import Layout from "@theme/Layout"; import BrowserOnly from "@docusaurus/BrowserOnly"; import LearnItemCard from "./_components/LearnItemCard"; import styles from "./styles.module.scss"; - +import clsx from "clsx"; import customerStoriesIndexes from "../../../adoptionStoriesIndexes.json"; function AdoptionStoriesListPageContent() { const companies = (customerStoriesIndexes?.companies || []).filter((company) => company.link); const [activeFilters, setActiveFilters] = useState([]); + const [platformFilter, setPlatformFilter] = useState(false); // New state for platform filter const categories = ["B2B & B2C", "E-Commerce", "Financial & Fintech", "And More"]; const selectedCardRef = useRef(null); - const filteredItems = activeFilters.length - ? companies.filter((company) => activeFilters.includes(company.category)) - : companies; + const filteredItems = companies.filter((company) => { + const categoryMatch = activeFilters.length ? activeFilters.includes(company.category) : true; + const platformMatch = platformFilter ? company.platform === "cloud" : true; + return categoryMatch && platformMatch; + }); const handleFilterToggle = (category) => { if (activeFilters.includes(category)) { @@ -24,6 +27,10 @@ function AdoptionStoriesListPageContent() { } }; + const handlePlatformFilterToggle = () => { + setPlatformFilter(!platformFilter); + }; + useEffect(() => { const selectedSlug = window.location.hash.substring(1); if (selectedCardRef.current) { @@ -45,7 +52,6 @@ function AdoptionStoriesListPageContent() {
- For: {categories.map((category) => (
diff --git a/docs-website/src/pages/adoption-stories/styles.module.scss b/docs-website/src/pages/adoption-stories/styles.module.scss index d08b48a011de07..514811b0cfa9a1 100644 --- a/docs-website/src/pages/adoption-stories/styles.module.scss +++ b/docs-website/src/pages/adoption-stories/styles.module.scss @@ -4,4 +4,4 @@ align-items: center; gap: 10px; flex-wrap: wrap; -} \ No newline at end of file +} diff --git a/docs-website/src/pages/cloud/CompanyLogos/customersData.json b/docs-website/src/pages/cloud/CompanyLogos/customersData.json index e8a7470eab4702..8255fa33b77a3c 100644 --- a/docs-website/src/pages/cloud/CompanyLogos/customersData.json +++ b/docs-website/src/pages/cloud/CompanyLogos/customersData.json @@ -69,7 +69,7 @@ "asset": { "_ref": "https://cdn.sanity.io/images/cqo9wkgf/production/b446f595b4b13a72ee82a285924715f950e012ca-540x270.png" }, - "alt": "DPG Megia" + "alt": "DPG Media" } }, { diff --git a/docs-website/src/pages/cloud/DemoForm/hubspotFormStyles.css b/docs-website/src/pages/cloud/DemoForm/hubspotFormStyles.css new file mode 100644 index 00000000000000..9d0488bf8064ba --- /dev/null +++ b/docs-website/src/pages/cloud/DemoForm/hubspotFormStyles.css @@ -0,0 +1,74 @@ +.hs-input { + width: 100% !important; +} + +.hs-form input, +.hs-form select, +.hs-form textarea { + border: .5px solid #DDD; + background-color: #FFF; + padding: .75rem 1rem; + margin: 0.4rem auto; + border-radius: 8px; + font-size: 16px; +} + +.hs-form ::placeholder { + font-style: Manrope; +} + +.hs-form input[type="submit"] { + background-color: #1990FF; + color: #fff; + border: none; + padding: 10px 20px; /* Custom padding */ + cursor: pointer; + border-radius: 8px; /* Rounded corners */ + font-size: 16px; + font-weight: 600; + width: 100px; +} + +.hs-form input[type="submit"]:hover { + background-color: #0056b3; /* Button hover color */ +} + +/* hide the label */ + +.hs-form label span { + display: none; +} + +/* error labels */ + +.hs-form .hs-error-msgs { + font-size: 15px; + color: red; + list-style-type: none; + padding: 0; + text-align: left; +} + +.hs-form .error { + border: red 1.5px solid !important; +} + +/* customize placeholder style */ + +.hs-form input::placeholder, +.hs-form textarea::placeholder, +.hs-form textarea { + font-size: 16px; + font-weight: 400; + font-family: sans-serif; + color: gray; +} + +.hs-form .hs_firstname { + padding-right: 0.5rem; +} + +.hs-form .hs_lastname { + padding-left: 0.5rem; +} + diff --git a/docs-website/src/pages/cloud/DemoForm/index.jsx b/docs-website/src/pages/cloud/DemoForm/index.jsx new file mode 100644 index 00000000000000..28777e722e962d --- /dev/null +++ b/docs-website/src/pages/cloud/DemoForm/index.jsx @@ -0,0 +1,94 @@ +import React, { useEffect } from 'react'; +import clsx from "clsx"; +import Link from "@docusaurus/Link"; +import styles from "./styles.module.scss"; +import ScrollingCustomers from '../CompanyLogos'; +import './hubspotFormStyles.css'; + +const DemoForm = ({ formId }) => { + useEffect(() => { + const formContainerId = `hubspotForm-${formId}`; + + const initializeHubspotForm = () => { + if (!document.querySelector(`#${formContainerId} .hs-form`)) { + window.hbspt.forms.create({ + region: "na1", + portalId: "14552909", + formId: "ed2447d6-e6f9-4771-8f77-825b114a9421", + target: `#${formContainerId}`, + }); + + setTimeout(() => { + const emailInput = document.querySelector(`#${formContainerId} .hs_email .input > input`); + const firstNameInput = document.querySelector(`#${formContainerId} .hs_firstname .input > input`); + const lastNameInput = document.querySelector(`#${formContainerId} .hs_lastname .input > input`); + const phoneInput = document.querySelector(`#${formContainerId} .hs_phone .input > input`); + const additionalInfoInput = document.querySelector(`#${formContainerId} .hs_additional_info .input > textarea`); + + if (emailInput) emailInput.placeholder = 'Company Email'; + if (firstNameInput) firstNameInput.placeholder = 'First Name'; + if (lastNameInput) lastNameInput.placeholder = 'Last Name'; + if (phoneInput) phoneInput.placeholder = 'Phone Number'; + if (additionalInfoInput) additionalInfoInput.placeholder = 'How can we help?'; + + const selectNoEElement = document.getElementById(`number_of_employees-ed2447d6-e6f9-4771-8f77-825b114a9421`); + if (selectNoEElement) { + const disabledOption = selectNoEElement.querySelector('option[disabled]'); + if (disabledOption) { + disabledOption.text = "Select Number of Employees"; + disabledOption.value = ""; + } + } + const selectfamiliarityElement = document.getElementById(`familiarity_with_acryl_datahub-ed2447d6-e6f9-4771-8f77-825b114a9421`); + if (selectfamiliarityElement) { + const disabledOption = selectfamiliarityElement.querySelector('option[disabled]'); + if (disabledOption) { + disabledOption.text = "How familiar are you with DataHub?"; + disabledOption.value = ""; + } + } + }, 1000); // Delay to ensure the form is fully loaded + + window.hero = new RevenueHero({ routerId: '982' }); + window.hero.schedule('hsForm_ed2447d6-e6f9-4771-8f77-825b114a9421'); + } + }; + + if (!window.hbspt) { + const script = document.createElement('script'); + script.src = "//js.hsforms.net/forms/embed/v2.js"; + script.async = true; + script.type = 'text/javascript'; + document.body.appendChild(script); + + script.onload = () => { + initializeHubspotForm(); + }; + } else { + initializeHubspotForm(); + } + + return () => { + const hubspotForm = document.querySelector(`#${formContainerId} .hs-form`); + if (hubspotForm) { + hubspotForm.remove(); + } + }; + }, [formId]); + + return ( +
+
+
+
Book a free Demo
+
+ Schedule a personalized demo and get a free a trial. +
+
+
{/* Use unique ID */} +
+
+ ); +}; + +export default DemoForm; diff --git a/docs-website/src/pages/cloud/DemoForm/styles.module.scss b/docs-website/src/pages/cloud/DemoForm/styles.module.scss new file mode 100644 index 00000000000000..4157a228ae739e --- /dev/null +++ b/docs-website/src/pages/cloud/DemoForm/styles.module.scss @@ -0,0 +1,55 @@ +.col { + padding: 0 2rem; +} + +.formContainer { + padding: 2rem; + margin: 0 auto; + background-color:#F3F3F3; + border: 1px solid #DDD; + align-items: center; + max-width: 540px; + border-radius: 16px; + + .formContent { + width: 90%; + margin: 0 auto; + } + + .formHeader { + justify-content: space-between; + text-align: left; + + .formTitle { + font-size: 24px; + font-weight: 600; + color: #000; + line-height: 28px; + margin-bottom: 12px; + } + + .formSubtitle { + font-size: 14px; + color: #666; + line-height: 14px; + margin-bottom: 8px; + } + } + +} + +.bookButton { + display: none; +} + +@media screen and (max-width: 999px) { + .bookButton { + display: block; + } + .formContainer { + display: none; + } + .productTourButton { + text-align: center!important; + } +} diff --git a/docs-website/src/pages/cloud/Hero/index.js b/docs-website/src/pages/cloud/Hero/index.js new file mode 100644 index 00000000000000..ce5af5c8b1248f --- /dev/null +++ b/docs-website/src/pages/cloud/Hero/index.js @@ -0,0 +1,40 @@ +import React, { useEffect } from 'react'; +import clsx from "clsx"; +import Link from "@docusaurus/Link"; +import styles from "./styles.module.scss"; +import ScrollingCustomers from '../CompanyLogos'; +import DemoForm from '../DemoForm'; + +const Hero = () => { + return ( +
+
+
+
+
+

DataHub Cloud

+
+ Experience the premium version of DataHub +
+ with Data Observability and Data Governance built-in. +
+
+ + Book Demo + + + Live Product Tour → + + +
+
+ +
+
+
+
+
+ ); +}; + +export default Hero; diff --git a/docs-website/src/pages/cloud/Hero/styles.module.scss b/docs-website/src/pages/cloud/Hero/styles.module.scss new file mode 100644 index 00000000000000..5da028d0171557 --- /dev/null +++ b/docs-website/src/pages/cloud/Hero/styles.module.scss @@ -0,0 +1,74 @@ +.col { + padding: 0 2rem; +} + +.hero { + .button { + margin: 0rem 1rem 2rem 1rem; + } + + .hero__title { + font-size: 3rem; + text-align: left; + } + + .hero__secondtitle { + font-size: 2rem; + font-weight: 300; + margin-bottom: 2rem; + text-align: left; + } + + .hero__subtitle { + margin-bottom: 2rem; + font-size: 1.75rem; + line-height: 2.5rem; + margin-left: 0; + text-align: left; + } + + .buttonLightBlue { + color: #1990FF !important; + background: #EAF3FF; + border-color: #EAF3FF; + + &:hover { + background: #D6E7FF; + } + } + .productTourButton { + background-color: transparent; + border: 0; + color: #1990FF !important; + text-align: left; + } + +} + +.hero__cta { + margin: auto; + display: flex; + flex-direction: column; + justify-content: flex-start; +} + +.bookButton { + display: none; +} + +@media screen and (max-width: 999px) { + .bookButton { + display: block; + } + + .productTourButton { + text-align: center!important; + } +} + +@media screen and (min-width: 1000px){ + .productTourButton { + padding-left: 0!important; + margin-left: 0!important; + } +} \ No newline at end of file diff --git a/docs-website/src/pages/cloud/index.js b/docs-website/src/pages/cloud/index.js index 00437c8a7640a7..8769092bedf1e8 100644 --- a/docs-website/src/pages/cloud/index.js +++ b/docs-website/src/pages/cloud/index.js @@ -4,18 +4,16 @@ import Link from "@docusaurus/Link"; import useDocusaurusContext from "@docusaurus/useDocusaurusContext"; import Enterprise from "./Enterprise"; import { Section } from "../_components/Section"; -import ScrollingCustomers from "./CompanyLogos"; import clsx from "clsx"; import styles from "./styles.module.scss"; import UnifiedTabs from "./UnifiedTabs"; import FeatureCards from "./FeatureCards"; - +import Hero from "./Hero"; +import DemoForm from "./DemoForm"; function Home() { const context = useDocusaurusContext(); const { siteConfig = {} } = context; - // const { colorMode } = useColorMode(); - if (siteConfig.customFields.isSaas) { window.location.replace("/docs"); @@ -26,32 +24,8 @@ function Home() { title={'DataHub Cloud - Unify Data Observability, Governance and Discovery'} description="DataHub cloud is Managed DataHub with Data Observability and Data Governance built-in." > -
-
-
-
-

Try DataHub Cloud

-
- Introducing DataHub as a Managed Service -
with Data Observability and Data Governance built-in.
- {/* */} -
- - Book Demo - - - Product Tour - -
-
-
-
- -
+ +
@@ -62,24 +36,22 @@ function Home() {
-
-
-

Get your free trial.

-
Data Discovery, Data Quality and Data Governance unified.
- - - Book Demo - - - Product Tour - +
+
+

Get your free trial.

+
Data Discovery, Data Quality and Data Governance unified.
+
+ + Book Demo + + + Live Product Tour → + +
- {/*
-
- An extension of the DataHub Core project.
- View Cloud Docs. - -
*/} +
+
+
diff --git a/docs-website/src/pages/cloud/styles.module.scss b/docs-website/src/pages/cloud/styles.module.scss index b805063750dd4b..66eafff4617db0 100644 --- a/docs-website/src/pages/cloud/styles.module.scss +++ b/docs-website/src/pages/cloud/styles.module.scss @@ -1,3 +1,8 @@ +.col { + padding: 0 2rem; +} + + .link { &:hover { text-decoration: none; @@ -9,44 +14,67 @@ background-color: #FAFAFA !important; } + .hero { - margin-top: 80px; - :global { - .button { - margin-right: 1rem; - } - } - .hero__title { - font-size: 4rem; - } - - .hero__secondtitle { - font-size: 2rem; - font-weight: 300; - margin-bottom: 2rem; - - } - - .hero__subtitle { - margin-bottom: 2rem; - font-size: 1.75rem; - line-height: 2.5rem; - } - - .buttonLightBlue { - color: #1990FF !important; - background: #EAF3FF; - border-color: #EAF3FF; - :hover { - background: #D6E7FF; - } - } - - .learnMore { - font-size: 1.25rem; - font-weight: 600; - margin-top: 0.5rem; - - } - } - + .button { + margin: 0rem 1rem 2rem 1rem; + } + + .hero__title { + font-size: 3rem; + text-align: left; + } + + .hero__secondtitle { + font-size: 2rem; + font-weight: 300; + margin-bottom: 2rem; + text-align: left; + } + + .hero__subtitle { + margin-bottom: 2rem; + font-size: 1.75rem; + line-height: 2.5rem; + margin-left: 0; + text-align: left; + } + + +.productTourButton { + background-color: transparent; + border: 0; + color: #1990FF !important; +} + + +} + +.hero__cta { + margin: auto; + display: flex; + flex-direction: column; + justify-content: flex-start; +} + +.bookButton { + display: none; +} + + +@media screen and (max-width: 999px) { + .bookButton, .productTourButton { + display: block; + } + + .productTourButton { + text-align: center!important; + } +} + +@media screen and (min-width: 1000px){ + .productTourButton { + padding-left: 0!important; + margin-left: 0!important; + } +} \ No newline at end of file diff --git a/docs-website/static/img/adoption-stories/adoption-stories-dpg-media.png b/docs-website/static/img/adoption-stories/adoption-stories-dpg-media.png new file mode 100644 index 00000000000000..43327794c247a4 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-dpg-media.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-myob.png b/docs-website/static/img/adoption-stories/adoption-stories-myob.png new file mode 100644 index 00000000000000..ad68b505706286 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-myob.png differ diff --git a/docs-website/static/img/adoption-stories/adoption-stories-notion.png b/docs-website/static/img/adoption-stories/adoption-stories-notion.png new file mode 100644 index 00000000000000..7bdbc8ab5fa5e3 Binary files /dev/null and b/docs-website/static/img/adoption-stories/adoption-stories-notion.png differ diff --git a/docs-website/static/img/logos/companies/dpg-media.png b/docs-website/static/img/logos/companies/dpg-media.png new file mode 100644 index 00000000000000..cc62a5d15f30aa Binary files /dev/null and b/docs-website/static/img/logos/companies/dpg-media.png differ diff --git a/docs-website/static/img/logos/companies/myob.png b/docs-website/static/img/logos/companies/myob.png new file mode 100644 index 00000000000000..6bb2a1dcb169c6 Binary files /dev/null and b/docs-website/static/img/logos/companies/myob.png differ diff --git a/docs-website/static/img/logos/companies/notion.png b/docs-website/static/img/logos/companies/notion.png new file mode 100644 index 00000000000000..8d4c391cc89c77 Binary files /dev/null and b/docs-website/static/img/logos/companies/notion.png differ diff --git a/docs/authentication/guides/sso/configure-oidc-react.md b/docs/authentication/guides/sso/configure-oidc-react.md index 3676bbdfcc9b8c..4dd882cb9a8642 100644 --- a/docs/authentication/guides/sso/configure-oidc-react.md +++ b/docs/authentication/guides/sso/configure-oidc-react.md @@ -79,7 +79,12 @@ At this point, your app registration should look like the following. Finally, cl :::note Optional Once registration is done, you will land on the app registration **Overview** tab. -On the left-side navigation bar, click on **Authentication** under **Manage** and add extra redirect URIs if need be (if you want to support both local testing and Azure deployments). Finally, click **Save**. +On the left-side navigation bar, click on **Authentication** under **Manage** and add extra redirect URIs if need be (if you want to support both local testing and Azure deployments). + +For logout URI: +- **Front-channel logout URL**. `https://your-datahub-domain.com/login` + +Finally, click **Save**.

diff --git a/docs/authorization/policies.md b/docs/authorization/policies.md index 45d0b59e408337..5c99241f75190f 100644 --- a/docs/authorization/policies.md +++ b/docs/authorization/policies.md @@ -146,15 +146,15 @@ These privileges are for DataHub operators to access & manage the administrative #### Access & Credentials -| Platform Privileges | Description | -|--------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Generate Personal Access Tokens | Allow actor to generate personal access tokens for use with DataHub APIs. | -| Manage Policies | Allow actor to create and remove access control policies. Be careful - Actors with this privilege are effectively super users. | -| Manage Secrets | Allow actor to create & remove Secrets stored inside DataHub. | -| Manage Users & Groups | Allow actor to create, remove, and update users and groups on DataHub. | -| Manage All Access Tokens | Allow actor to create, list and revoke access tokens on behalf of users in DataHub. Be careful - Actors with this privilege are effectively super users that can impersonate other users. | -| Manage User Credentials | Allow actor to manage credentials for native DataHub users, including inviting new users and resetting passwords | | -| Manage Connections | Allow actor to manage connections to external DataHub platforms. | +| Platform Privileges | Description | +|---------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Generate Personal Access Tokens | Allow actor to generate personal access tokens for use with DataHub APIs. | +| Manage Policies | Allow actor to create and remove access control policies. Be careful - Actors with this privilege are effectively super users. | +| Manage Secrets | Allow actor to create & remove Secrets stored inside DataHub. | +| Manage Users & Groups | Allow actor to create, remove, and update users and groups on DataHub. | +| Manage All Access Tokens | Allow actor to create, list and revoke access tokens on behalf of users in DataHub. Be careful - Actors with this privilege are effectively super users that can impersonate other users. | +| Manage User Credentials | Allow actor to manage credentials for native DataHub users, including inviting new users and resetting passwords | | +| Manage Connections | Allow actor to manage connections to external DataHub platforms. | #### Product Features @@ -191,15 +191,16 @@ These privileges are for DataHub operators to access & manage the administrative #### System Management -| Platform Privileges | Description | -|-----------------------------------------------|--------------------------------------------------------------------------| -| Restore Indices API[^1] | Allow actor to use the Restore Indices API. | | -| Get Timeseries index sizes API[^1] | Allow actor to use the get Timeseries indices size API. | -| Truncate timeseries aspect index size API[^1] | Allow actor to use the API to truncate a timeseries index. | -| Get ES task status API[^1] | Allow actor to use the get task status API for an ElasticSearch task. | -| Enable/Disable Writeability API[^1] | Allow actor to enable or disable GMS writeability for data migrations. | -| Apply Retention API[^1] | Allow actor to apply retention using the API. | -| Analytics API access[^1] | Allow actor to use API read access to raw analytics data. | +| Platform Privileges | Description | +|-----------------------------------------------|------------------------------------------------------------------------| +| Restore Indices API[^1] | Allow actor to use the Restore Indices API. | | +| Get Timeseries index sizes API[^1] | Allow actor to use the get Timeseries indices size API. | +| Truncate timeseries aspect index size API[^1] | Allow actor to use the API to truncate a timeseries index. | +| Get ES task status API[^1] | Allow actor to use the get task status API for an ElasticSearch task. | +| Enable/Disable Writeability API[^1] | Allow actor to enable or disable GMS writeability for data migrations. | +| Apply Retention API[^1] | Allow actor to apply retention using the API. | +| Analytics API access[^1] | Allow actor to use API read access to raw analytics data. | +| Manage System Operations | Allow actor to manage system operation controls. | [^1]: Only active if REST_API_AUTHORIZATION_ENABLED is true [^2]: DataHub Cloud only diff --git a/docs/dataproducts.md b/docs/dataproducts.md index af30ff2a0aa099..f80a65d9a7bc4b 100644 --- a/docs/dataproducts.md +++ b/docs/dataproducts.md @@ -90,6 +90,8 @@ Here is an example of a Data Product named "Pet of the Week" which belongs to th When bare domain names like `Marketing` is used, `datahub` will first check if a domain like `urn:li:domain:Marketing` is provisioned, failing that; it will check for a provisioned domain that has the same name. If we are unable to resolve bare domain names to provisioned domains, then yaml-based ingestion will refuse to proceeed until the domain is provisioned on DataHub. +This applies to other fields as well, such as owners, ownership types, tags, and terms. + ::: You can also provide fully-qualified domain names (e.g. `urn:li:domain:dcadded3-2b70-4679-8b28-02ac9abc92eb`) to ensure that no ingestion-time domain resolution is needed. diff --git a/docs/deploy/environment-vars.md b/docs/deploy/environment-vars.md index 21ed738e878f88..6429996c088b4a 100644 --- a/docs/deploy/environment-vars.md +++ b/docs/deploy/environment-vars.md @@ -14,21 +14,21 @@ DataHub works. | `UI_INGESTION_ENABLED` | `true` | boolean | [`GMS`, `MCE Consumer`] | Enable UI based ingestion. | | `DATAHUB_ANALYTICS_ENABLED` | `true` | boolean | [`Frontend`, `GMS`] | Collect DataHub usage to populate the analytics dashboard. | | `BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE` | `true` | boolean | [`GMS`, `MCE Consumer`, `MAE Consumer`] | Do not wait for the `system-update` to complete before starting. This should typically only be disabled during development. | -| `ER_MODEL_RELATIONSHIP_FEATURE_ENABLED` | `false` | boolean | [`Frontend`, `GMS`] | Enable ER Model Relation Feature that shows Relationships Tab within a Dataset UI. | +| `ER_MODEL_RELATIONSHIP_FEATURE_ENABLED` | `false` | boolean | [`Frontend`, `GMS`] | Enable ER Model Relation Feature that shows Relationships Tab within a Dataset UI. | ## Ingestion -| Variable | Default | Unit/Type | Components | Description | -|------------------------------------|---------|-----------|-------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `ASYNC_INGEST_DEFAULT` | `false` | boolean | [`GMS`] | Asynchronously process ingestProposals by writing the ingestion MCP to Kafka. Typically enabled with standalone consumers. | -| `MCP_CONSUMER_ENABLED` | `true` | boolean | [`GMS`, `MCE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MCE Consumer`. | -| `MCL_CONSUMER_ENABLED` | `true` | boolean | [`GMS`, `MAE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MAE Consumer`. | -| `PE_CONSUMER_ENABLED` | `true` | boolean | [`GMS`, `MAE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MAE Consumer`. | -| `ES_BULK_REQUESTS_LIMIT` | 1000 | docs | [`GMS`, `MAE Consumer`] | Number of bulk documents to index. `MAE Consumer` if standalone. | -| `ES_BULK_FLUSH_PERIOD` | 1 | seconds | [`GMS`, `MAE Consumer`] | How frequently indexed documents are made available for query. | -| `ALWAYS_EMIT_CHANGE_LOG` | `false` | boolean | [`GMS`] | Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur. | | -| `GRAPH_SERVICE_DIFF_MODE_ENABLED` | `true` | boolean | [`GMS`] | Enables diff mode for graph writes, uses a different code path that produces a diff from previous to next to write relationships instead of wholesale deleting edges and reading. | +| Variable | Default | Unit/Type | Components | Description | +|-----------------------------------|---------|-----------|-------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `ASYNC_INGEST_DEFAULT` | `false` | boolean | [`GMS`] | Asynchronously process ingestProposals by writing the ingestion MCP to Kafka. Typically enabled with standalone consumers. | +| `MCP_CONSUMER_ENABLED` | `true` | boolean | [`GMS`, `MCE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MCE Consumer`. | +| `MCL_CONSUMER_ENABLED` | `true` | boolean | [`GMS`, `MAE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MAE Consumer`. | +| `PE_CONSUMER_ENABLED` | `true` | boolean | [`GMS`, `MAE Consumer`] | When running in standalone mode, disabled on `GMS` and enabled on separate `MAE Consumer`. | +| `ES_BULK_REQUESTS_LIMIT` | 1000 | docs | [`GMS`, `MAE Consumer`] | Number of bulk documents to index. `MAE Consumer` if standalone. | +| `ES_BULK_FLUSH_PERIOD` | 1 | seconds | [`GMS`, `MAE Consumer`] | How frequently indexed documents are made available for query. | +| `ALWAYS_EMIT_CHANGE_LOG` | `false` | boolean | [`GMS`] | Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur. | | +| `GRAPH_SERVICE_DIFF_MODE_ENABLED` | `true` | boolean | [`GMS`] | Enables diff mode for graph writes, uses a different code path that produces a diff from previous to next to write relationships instead of wholesale deleting edges and reading. | ## Caching diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index dbf2d4a0c5169a..abb6bcd32a554f 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -19,17 +19,22 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ## Next ### Breaking Changes + - #9857 (#10773) `lower` method was removed from `get_db_name` of `SQLAlchemySource` class. This change will affect the urns of all related to `SQLAlchemySource` entities. - + Old `urn`, where `data_base_name` is `Some_Database`: + ``` - urn:li:dataJob:(urn:li:dataFlow:(mssql,demodata.Foo.stored_procedures,PROD),Proc.With.SpecialChar) ``` + New `urn`, where `data_base_name` is `Some_Database`: + ``` - urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar) ``` - Re-running with stateful ingestion should automatically clear up the entities with old URNS and add entities with new URNs, therefore not duplicating the containers or jobs. + + Re-running with stateful ingestion should automatically clear up the entities with old URNS and add entities with new URNs, therefore not duplicating the containers or jobs. - #11313 - `datahub get` will no longer return a key aspect for entities that don't exist. @@ -39,11 +44,11 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Other Notable Changes -## 0.14.0 +## 0.14.0.2 ### Breaking Changes -- Protobuf CLI will no longer create binary encoded protoc custom properties. Flag added `-protocProp` in case this +- Protobuf CLI will no longer create binary encoded protoc custom properties. Flag added `-protocProp` in case this behavior is required. - #10814 Data flow info and data job info aspect will produce an additional field that will require a corresponding upgrade of server. Otherwise server can reject the aspects. - #10868 - OpenAPI V3 - Creation of aspects will need to be wrapped within a `value` key and the API is now symmetric with respect to input and outputs. @@ -51,6 +56,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe Example Global Tags Aspect: Previous: + ```json { "tags": [ @@ -78,34 +84,38 @@ New (optional fields `systemMetadata` and `headers`): "headers": {} } ``` + - #10858 Profiling configuration for Glue source has been updated. -Previously, the configuration was: -```yaml -profiling: {} -``` + Previously, the configuration was: -Now, it needs to be: + ```yaml + profiling: {} + ``` -```yaml -profiling: - enabled: true -``` + Now, it needs to be: + + ```yaml + profiling: + enabled: true + ``` ### Potential Downtime ### Deprecations -- OpenAPI v1: OpenAPI v1 is collectively defined as all endpoints which are not prefixed with `/v2` or `/v3`. The v1 endpoints +- OpenAPI v1: OpenAPI v1 is collectively defined as all endpoints which are not prefixed with `/v2` or `/v3`. The v1 endpoints will be deprecated in no less than 6 months. Endpoints will be replaced with equivalents in the `/v2` or `/v3` APIs. No loss of functionality expected unless explicitly mentioned in Breaking Changes. ### Other Notable Changes + - #10498 - Tableau ingestion can now be configured to ingest multiple sites at once and add the sites as containers. The feature is currently only available for Tableau Server. - #10466 - Extends configuration in `~/.datahubenv` to match `DatahubClientConfig` object definition. See full configuration in https://datahubproject.io/docs/python-sdk/clients/. The CLI should now respect the updated configurations specified in `~/.datahubenv` across its functions and utilities. This means that for systems where ssl certification is disabled, setting `disable_ssl_verification: true` in `~./datahubenv` will apply to all CLI calls. - #11002 - We will not auto-generate a `~/.datahubenv` file. You must either run `datahub init` to create that file, or set environment variables so that the config is loaded. - #11023 - Added a new parameter to datahub's `put` cli command: `--run-id`. This parameter is useful to associate a given write to an ingestion process. A use-case can be mimick transformers when a transformer for aspect being written does not exist. - #11051 - Ingestion reports will now trim the summary text to a maximum of 800k characters to avoid generating `dataHubExecutionRequestResult` that are too large for GMS to handle. + ## 0.13.3 ### Breaking Changes diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java index cedaac25ffee9d..f6858e7da4ba63 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/GraphRetriever.java @@ -5,6 +5,7 @@ import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.query.filter.SortCriterion; import java.util.List; +import java.util.function.Function; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -40,4 +41,60 @@ RelatedEntitiesScrollResult scrollRelatedEntities( int count, @Nullable Long startTimeMillis, @Nullable Long endTimeMillis); + + /** + * Consume graph edges + * + * @param consumer consumer function, return true to exit early + * @param sourceTypes + * @param sourceEntityFilter + * @param destinationTypes + * @param destinationEntityFilter + * @param relationshipTypes + * @param relationshipFilter + * @param sortCriteria + * @param count + * @param startTimeMillis + * @param endTimeMillis + */ + default void consumeRelatedEntities( + @Nonnull Function consumer, + @Nullable List sourceTypes, + @Nonnull Filter sourceEntityFilter, + @Nullable List destinationTypes, + @Nonnull Filter destinationEntityFilter, + @Nonnull List relationshipTypes, + @Nonnull RelationshipFilter relationshipFilter, + @Nonnull List sortCriteria, + int count, + @Nullable Long startTimeMillis, + @Nullable Long endTimeMillis) { + + String scrollId = null; + boolean exitCriteria = false; + + while (!exitCriteria) { + RelatedEntitiesScrollResult result = + scrollRelatedEntities( + sourceTypes, + sourceEntityFilter, + destinationTypes, + destinationEntityFilter, + relationshipTypes, + relationshipFilter, + sortCriteria, + scrollId, + count, + startTimeMillis, + endTimeMillis); + + exitCriteria = consumer.apply(result); + + if (result == null || result.getEntities().isEmpty() || result.getScrollId() == null) { + exitCriteria = true; + } else { + scrollId = result.getScrollId(); + } + } + } } diff --git a/lombok.config b/lombok.config index df71bb6a0fb878..7324b9265c5203 100644 --- a/lombok.config +++ b/lombok.config @@ -1,2 +1,3 @@ config.stopBubbling = true lombok.addLombokGeneratedAnnotation = true +lombok.copyableAnnotations += org.springframework.beans.factory.annotation.Qualifier diff --git a/metadata-auth/auth-api/build.gradle b/metadata-auth/auth-api/build.gradle index 7303b79b0c5f0a..acc1af7e2e3ad6 100644 --- a/metadata-auth/auth-api/build.gradle +++ b/metadata-auth/auth-api/build.gradle @@ -31,6 +31,7 @@ dependencies() { testImplementation externalDependency.testng testImplementation externalDependency.mockito + testImplementation project(path: ':metadata-operation-context') testImplementation 'uk.org.webcompere:system-stubs-testng:2.1.6' } diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java index d5bf22cab60406..62d8206e42565c 100644 --- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java @@ -25,8 +25,6 @@ import static com.linkedin.metadata.authorization.PoliciesConfig.API_ENTITY_PRIVILEGE_MAP; import static com.linkedin.metadata.authorization.PoliciesConfig.API_PRIVILEGE_MAP; -import com.datahub.authentication.Authentication; -import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; @@ -50,7 +48,6 @@ import java.util.Collection; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; @@ -95,8 +92,7 @@ public class AuthUtil { /** OpenAPI/Rest.li Methods */ public static List> isAPIAuthorized( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final ApiGroup apiGroup, @Nonnull final EntityRegistry entityRegistry, @Nonnull final Collection mcps) { @@ -117,8 +113,7 @@ public static List> isAPIAuthorized( Map, Integer> authorizationResult = isAPIAuthorizedUrns( - authentication, - authorizer, + session, apiGroup, changeUrnMCPs.stream().map(Pair::getFirst).collect(Collectors.toSet())); @@ -133,8 +128,7 @@ public static List> isAPIAuthorized( } public static Map, Integer> isAPIAuthorizedUrns( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final ApiGroup apiGroup, @Nonnull final Collection> changeTypeUrns) { @@ -150,8 +144,7 @@ public static Map, Integer> isAPIAuthorizedUrns( case RESTATE: case PATCH: if (!isAPIAuthorized( - authentication, - authorizer, + session, lookupAPIPrivilege(apiGroup, UPDATE, urn.getEntityType()), new EntitySpec(urn.getEntityType(), urn.toString()))) { return Pair.of(changeTypePair, HttpStatus.SC_FORBIDDEN); @@ -159,8 +152,7 @@ public static Map, Integer> isAPIAuthorizedUrns( break; case CREATE_ENTITY: if (!isAPIAuthorized( - authentication, - authorizer, + session, lookupAPIPrivilege(apiGroup, CREATE, urn.getEntityType()), new EntitySpec(urn.getEntityType(), urn.toString()))) { return Pair.of(changeTypePair, HttpStatus.SC_FORBIDDEN); @@ -168,8 +160,7 @@ public static Map, Integer> isAPIAuthorizedUrns( break; case DELETE: if (!isAPIAuthorized( - authentication, - authorizer, + session, lookupAPIPrivilege(apiGroup, DELETE, urn.getEntityType()), new EntitySpec(urn.getEntityType(), urn.toString()))) { return Pair.of(changeTypePair, HttpStatus.SC_FORBIDDEN); @@ -184,58 +175,45 @@ public static Map, Integer> isAPIAuthorizedUrns( } public static boolean isAPIAuthorizedResult( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, - @Nonnull final SearchResult result) { + @Nonnull final AuthorizationSession session, @Nonnull final SearchResult result) { return isAPIAuthorizedEntityUrns( - authentication, - authorizer, + session, READ, result.getEntities().stream().map(SearchEntity::getEntity).collect(Collectors.toList())); } public static boolean isAPIAuthorizedResult( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, - @Nonnull final ScrollResult result) { + @Nonnull final AuthorizationSession session, @Nonnull final ScrollResult result) { return isAPIAuthorizedEntityUrns( - authentication, - authorizer, + session, READ, result.getEntities().stream().map(SearchEntity::getEntity).collect(Collectors.toList())); } public static boolean isAPIAuthorizedResult( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, - @Nonnull final AutoCompleteResult result) { + @Nonnull final AuthorizationSession session, @Nonnull final AutoCompleteResult result) { return isAPIAuthorizedEntityUrns( - authentication, - authorizer, + session, READ, result.getEntities().stream().map(AutoCompleteEntity::getUrn).collect(Collectors.toList())); } public static boolean isAPIAuthorizedResult( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, - @Nonnull final BrowseResult result) { + @Nonnull final AuthorizationSession session, @Nonnull final BrowseResult result) { return isAPIAuthorizedEntityUrns( - authentication, - authorizer, + session, READ, result.getEntities().stream().map(BrowseResultEntity::getUrn).collect(Collectors.toList())); } public static boolean isAPIAuthorizedUrns( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final ApiGroup apiGroup, @Nonnull final ApiOperation apiOperation, @Nonnull final Collection urns) { if (ApiGroup.ENTITY.equals(apiGroup)) { - return isAPIAuthorizedEntityUrns(authentication, authorizer, apiOperation, urns); + return isAPIAuthorizedEntityUrns(session, apiOperation, urns); } List resourceSpecs = @@ -244,15 +222,11 @@ public static boolean isAPIAuthorizedUrns( .collect(Collectors.toList()); return isAPIAuthorized( - authentication, - authorizer, - lookupAPIPrivilege(apiGroup, apiOperation, null), - resourceSpecs); + session, lookupAPIPrivilege(apiGroup, apiOperation, null), resourceSpecs); } public static boolean isAPIAuthorizedEntityUrns( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final ApiOperation apiOperation, @Nonnull final Collection urns) { @@ -265,43 +239,36 @@ public static boolean isAPIAuthorizedEntityUrns( .allMatch( entry -> isAPIAuthorized( - authentication, - authorizer, + session, lookupAPIPrivilege(ENTITY, apiOperation, entry.getKey()), entry.getValue())); } public static boolean isAPIAuthorizedEntityType( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final ApiOperation apiOperation, @Nonnull final String entityType) { - return isAPIAuthorizedEntityType( - authentication, authorizer, ENTITY, apiOperation, List.of(entityType)); + return isAPIAuthorizedEntityType(session, ENTITY, apiOperation, List.of(entityType)); } public static boolean isAPIAuthorizedEntityType( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final ApiGroup apiGroup, @Nonnull final ApiOperation apiOperation, @Nonnull final String entityType) { - return isAPIAuthorizedEntityType( - authentication, authorizer, apiGroup, apiOperation, List.of(entityType)); + return isAPIAuthorizedEntityType(session, apiGroup, apiOperation, List.of(entityType)); } public static boolean isAPIAuthorizedEntityType( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final ApiOperation apiOperation, @Nonnull final Collection entityTypes) { - return isAPIAuthorizedEntityType(authentication, authorizer, ENTITY, apiOperation, entityTypes); + return isAPIAuthorizedEntityType(session, ENTITY, apiOperation, entityTypes); } public static boolean isAPIAuthorizedEntityType( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final ApiGroup apiGroup, @Nonnull final ApiOperation apiOperation, @Nonnull final Collection entityTypes) { @@ -311,60 +278,45 @@ public static boolean isAPIAuthorizedEntityType( .allMatch( entityType -> isAPIAuthorized( - authentication, - authorizer, + session, lookupAPIPrivilege(apiGroup, apiOperation, entityType), new EntitySpec(entityType, ""))); } public static boolean isAPIAuthorized( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final ApiGroup apiGroup, @Nonnull final ApiOperation apiOperation) { return isAPIAuthorized( - authentication, - authorizer, - lookupAPIPrivilege(apiGroup, apiOperation, null), - (EntitySpec) null); + session, lookupAPIPrivilege(apiGroup, apiOperation, null), (EntitySpec) null); } public static boolean isAPIAuthorized( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final PoliciesConfig.Privilege privilege, @Nullable final EntitySpec resource) { - return isAPIAuthorized(authentication, authorizer, Disjunctive.disjoint(privilege), resource); + return isAPIAuthorized(session, Disjunctive.disjoint(privilege), resource); } public static boolean isAPIAuthorized( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final PoliciesConfig.Privilege privilege) { - return isAPIAuthorized( - authentication, authorizer, Disjunctive.disjoint(privilege), (EntitySpec) null); + return isAPIAuthorized(session, Disjunctive.disjoint(privilege), (EntitySpec) null); } private static boolean isAPIAuthorized( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final Disjunctive> privileges, @Nullable final EntitySpec resource) { - return isAPIAuthorized( - authentication, authorizer, privileges, resource != null ? List.of(resource) : List.of()); + return isAPIAuthorized(session, privileges, resource != null ? List.of(resource) : List.of()); } private static boolean isAPIAuthorized( - @Nonnull final Authentication authentication, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final Disjunctive> privileges, @Nonnull final Collection resources) { if (Boolean.parseBoolean(System.getenv(REST_API_AUTHORIZATION_ENABLED_ENV))) { - return isAuthorized( - authorizer, - authentication.getActor().toUrnStr(), - buildDisjunctivePrivilegeGroup(privileges), - resources); + return isAuthorized(session, buildDisjunctivePrivilegeGroup(privileges), resources); } else { return true; } @@ -372,29 +324,25 @@ private static boolean isAPIAuthorized( /** GraphQL Methods */ public static boolean canViewEntity( - @Nonnull final String actor, @Nonnull Authorizer authorizer, @Nonnull Urn urn) { - return canViewEntity(actor, authorizer, List.of(urn)); + @Nonnull final AuthorizationSession session, @Nonnull Urn urn) { + return canViewEntity(session, List.of(urn)); } public static boolean canViewEntity( - @Nonnull final String actor, - @Nonnull final Authorizer authorizer, - @Nonnull final Collection urns) { + @Nonnull final AuthorizationSession session, @Nonnull final Collection urns) { - return isAuthorizedEntityUrns(authorizer, actor, READ, urns); + return isAuthorizedEntityUrns(session, READ, urns); } public static boolean isAuthorized( - @Nonnull final String actor, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final ApiGroup apiGroup, @Nonnull final ApiOperation apiOperation) { - return isAuthorized(authorizer, actor, lookupAPIPrivilege(apiGroup, apiOperation, null), null); + return isAuthorized(session, lookupAPIPrivilege(apiGroup, apiOperation, null), null); } public static boolean isAuthorizedEntityType( - @Nonnull final String actor, - @Nonnull final Authorizer authorizer, + @Nonnull final AuthorizationSession session, @Nonnull final ApiOperation apiOperation, @Nonnull final Collection entityTypes) { @@ -403,23 +351,20 @@ public static boolean isAuthorizedEntityType( .allMatch( entityType -> isAuthorized( - authorizer, - actor, + session, lookupEntityAPIPrivilege(apiOperation, entityType), new EntitySpec(entityType, ""))); } public static boolean isAuthorizedEntityUrns( - @Nonnull final Authorizer authorizer, - @Nonnull final String actor, + @Nonnull final AuthorizationSession session, @Nonnull final ApiOperation apiOperation, @Nonnull final Collection urns) { - return isAuthorizedUrns(authorizer, actor, ENTITY, apiOperation, urns); + return isAuthorizedUrns(session, ENTITY, apiOperation, urns); } public static boolean isAuthorizedUrns( - @Nonnull final Authorizer authorizer, - @Nonnull final String actor, + @Nonnull final AuthorizationSession session, @Nonnull final ApiGroup apiGroup, @Nonnull final ApiOperation apiOperation, @Nonnull final Collection urns) { @@ -435,50 +380,41 @@ public static boolean isAuthorizedUrns( Disjunctive> privileges = lookupAPIPrivilege(apiGroup, apiOperation, entry.getKey()); return entry.getValue().stream() - .allMatch(entitySpec -> isAuthorized(authorizer, actor, privileges, entitySpec)); + .allMatch(entitySpec -> isAuthorized(session, privileges, entitySpec)); }); } public static boolean isAuthorized( - @Nonnull final Authorizer authorizer, - @Nonnull final String actor, + @Nonnull final AuthorizationSession session, @Nonnull final PoliciesConfig.Privilege privilege) { return isAuthorized( - authorizer, - actor, + session, buildDisjunctivePrivilegeGroup(Disjunctive.disjoint(privilege)), (EntitySpec) null); } public static boolean isAuthorized( - @Nonnull final Authorizer authorizer, - @Nonnull final String actor, + @Nonnull final AuthorizationSession session, @Nonnull final PoliciesConfig.Privilege privilege, @Nullable final EntitySpec entitySpec) { return isAuthorized( - authorizer, - actor, - buildDisjunctivePrivilegeGroup(Disjunctive.disjoint(privilege)), - entitySpec); + session, buildDisjunctivePrivilegeGroup(Disjunctive.disjoint(privilege)), entitySpec); } private static boolean isAuthorized( - @Nonnull final Authorizer authorizer, - @Nonnull final String actor, + @Nonnull final AuthorizationSession session, @Nonnull final Disjunctive> privileges, @Nullable EntitySpec maybeResourceSpec) { - return isAuthorized( - authorizer, actor, buildDisjunctivePrivilegeGroup(privileges), maybeResourceSpec); + return isAuthorized(session, buildDisjunctivePrivilegeGroup(privileges), maybeResourceSpec); } public static boolean isAuthorized( - @Nonnull final Authorizer authorizer, - @Nonnull final String actor, + @Nonnull final AuthorizationSession session, @Nonnull final DisjunctivePrivilegeGroup privilegeGroup, @Nullable final EntitySpec resourceSpec) { for (ConjunctivePrivilegeGroup conjunctive : privilegeGroup.getAuthorizedPrivilegeGroups()) { - if (isAuthorized(authorizer, actor, conjunctive, resourceSpec)) { + if (isAuthorized(session, conjunctive, resourceSpec)) { return true; } } @@ -487,8 +423,7 @@ public static boolean isAuthorized( } private static boolean isAuthorized( - @Nonnull final Authorizer authorizer, - @Nonnull final String actor, + @Nonnull final AuthorizationSession session, @Nonnull final ConjunctivePrivilegeGroup requiredPrivileges, @Nullable final EntitySpec resourceSpec) { @@ -500,7 +435,7 @@ private static boolean isAuthorized( // Each privilege in a group _must_ all be true to permit the operation. for (final String privilege : requiredPrivileges.getRequiredPrivileges()) { // Create and evaluate an Authorization request. - if (isDenied(authorizer, actor, privilege, resourceSpec)) { + if (isDenied(session, privilege, resourceSpec)) { // Short circuit. return false; } @@ -509,17 +444,15 @@ private static boolean isAuthorized( } private static boolean isAuthorized( - @Nonnull final Authorizer authorizer, - @Nonnull final String actor, + @Nonnull final AuthorizationSession session, @Nonnull final DisjunctivePrivilegeGroup privilegeGroup, @Nonnull final Collection resourceSpecs) { if (resourceSpecs.isEmpty()) { - return isAuthorized(authorizer, actor, privilegeGroup, (EntitySpec) null); + return isAuthorized(session, privilegeGroup, (EntitySpec) null); } - return resourceSpecs.stream() - .allMatch(spec -> isAuthorized(authorizer, actor, privilegeGroup, spec)); + return resourceSpecs.stream().allMatch(spec -> isAuthorized(session, privilegeGroup, spec)); } /** Common Methods */ @@ -618,14 +551,11 @@ static DisjunctivePrivilegeGroup buildDisjunctivePrivilegeGroup( } private static boolean isDenied( - @Nonnull final Authorizer authorizer, - @Nonnull final String actor, + @Nonnull final AuthorizationSession session, @Nonnull final String privilege, @Nullable final EntitySpec resourceSpec) { // Create and evaluate an Authorization request. - final AuthorizationRequest request = - new AuthorizationRequest(actor, privilege, Optional.ofNullable(resourceSpec)); - final AuthorizationResult result = authorizer.authorize(request); + final AuthorizationResult result = session.authorize(privilege, resourceSpec); return AuthorizationResult.Type.DENY.equals(result.getType()); } diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationSession.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationSession.java new file mode 100644 index 00000000000000..0ca972873e2f6c --- /dev/null +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthorizationSession.java @@ -0,0 +1,10 @@ +package com.datahub.authorization; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** Combines a common interface for actor and authorizer which is cached per session */ +public interface AuthorizationSession { + AuthorizationResult authorize( + @Nonnull final String privilege, @Nullable final EntitySpec resourceSpec); +} diff --git a/metadata-auth/auth-api/src/test/java/com/datahub/authorization/AuthUtilTest.java b/metadata-auth/auth-api/src/test/java/com/datahub/authorization/AuthUtilTest.java index 199b0faa933aeb..6619bcd0f47dcb 100644 --- a/metadata-auth/auth-api/src/test/java/com/datahub/authorization/AuthUtilTest.java +++ b/metadata-auth/auth-api/src/test/java/com/datahub/authorization/AuthUtilTest.java @@ -25,6 +25,7 @@ import com.linkedin.metadata.authorization.ApiOperation; import com.linkedin.metadata.authorization.Conjunctive; import com.linkedin.util.Pair; +import io.datahubproject.test.metadata.context.TestAuthSession; import java.util.List; import java.util.Map; import java.util.Set; @@ -97,16 +98,14 @@ public void testIsAPIAuthorizedUrns() { // User A (Entity 1 & 2 Edit, View only Entity 3) assertTrue( AuthUtil.isAPIAuthorizedEntityUrns( - TEST_AUTH_A, - mockAuthorizer, + TestAuthSession.from(TEST_AUTH_A, mockAuthorizer), READ, List.of(TEST_ENTITY_1, TEST_ENTITY_2, TEST_ENTITY_3)), "Expected read allowed for all entities"); assertEquals( AuthUtil.isAPIAuthorizedUrns( - TEST_AUTH_A, - mockAuthorizer, + TestAuthSession.from(TEST_AUTH_A, mockAuthorizer), ENTITY, List.of( Pair.of(ChangeType.UPSERT, TEST_ENTITY_1), @@ -120,8 +119,7 @@ public void testIsAPIAuthorizedUrns() { assertEquals( AuthUtil.isAPIAuthorizedUrns( - TEST_AUTH_A, - mockAuthorizer, + TestAuthSession.from(TEST_AUTH_A, mockAuthorizer), ENTITY, List.of( Pair.of(ChangeType.DELETE, TEST_ENTITY_1), @@ -136,20 +134,20 @@ public void testIsAPIAuthorizedUrns() { // User B Entity 2 Denied, Read access 1 & 3 assertFalse( AuthUtil.isAPIAuthorizedEntityUrns( - TEST_AUTH_B, - mockAuthorizer, + TestAuthSession.from(TEST_AUTH_B, mockAuthorizer), READ, List.of(TEST_ENTITY_1, TEST_ENTITY_2, TEST_ENTITY_3)), "Expected read denied for based on entity 2"); assertTrue( AuthUtil.isAPIAuthorizedEntityUrns( - TEST_AUTH_B, mockAuthorizer, READ, List.of(TEST_ENTITY_1, TEST_ENTITY_3)), + TestAuthSession.from(TEST_AUTH_B, mockAuthorizer), + READ, + List.of(TEST_ENTITY_1, TEST_ENTITY_3)), "Expected read allowed due to exclusion of entity 2"); assertEquals( AuthUtil.isAPIAuthorizedUrns( - TEST_AUTH_B, - mockAuthorizer, + TestAuthSession.from(TEST_AUTH_B, mockAuthorizer), ENTITY, List.of( Pair.of(ChangeType.UPSERT, TEST_ENTITY_1), @@ -163,8 +161,7 @@ public void testIsAPIAuthorizedUrns() { assertEquals( AuthUtil.isAPIAuthorizedUrns( - TEST_AUTH_B, - mockAuthorizer, + TestAuthSession.from(TEST_AUTH_B, mockAuthorizer), ENTITY, List.of( Pair.of(ChangeType.DELETE, TEST_ENTITY_1), diff --git a/metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/producer/KafkaProducerThrottle.java b/metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/throttle/KafkaThrottleSensor.java similarity index 61% rename from metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/producer/KafkaProducerThrottle.java rename to metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/throttle/KafkaThrottleSensor.java index 8fbb34b1eacd6f..2adf2543aa2f77 100644 --- a/metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/producer/KafkaProducerThrottle.java +++ b/metadata-dao-impl/kafka-producer/src/main/java/com/datahub/metadata/dao/throttle/KafkaThrottleSensor.java @@ -1,20 +1,31 @@ -package com.datahub.metadata.dao.producer; +package com.datahub.metadata.dao.throttle; + +import static com.linkedin.metadata.dao.throttle.ThrottleType.MCL_TIMESERIES_LAG; +import static com.linkedin.metadata.dao.throttle.ThrottleType.MCL_VERSIONED_LAG; import com.codahale.metrics.Gauge; import com.google.common.annotations.VisibleForTesting; import com.linkedin.metadata.config.MetadataChangeProposalConfig; +import com.linkedin.metadata.dao.throttle.ThrottleControl; +import com.linkedin.metadata.dao.throttle.ThrottleEvent; +import com.linkedin.metadata.dao.throttle.ThrottleSensor; +import com.linkedin.metadata.dao.throttle.ThrottleType; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.util.Pair; +import java.util.ArrayList; import java.util.Collection; +import java.util.Comparator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; -import java.util.function.Consumer; +import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; @@ -27,23 +38,43 @@ import org.springframework.util.backoff.BackOffExecution; import org.springframework.util.backoff.ExponentialBackOff; +/** + * This class is designed to monitor MCL consumption by a specific consumer group and provide + * throttling hooks. + * + *

Initially this was designed for throttling the async mcp processor `mce-consumer`, however it + * also handles throttling synchronous requests via rest.li, graphql, and openapi for non-browser + * based requests. + */ @Slf4j @Builder(toBuilder = true) -public class KafkaProducerThrottle { +public class KafkaThrottleSensor implements ThrottleSensor { + private static final Set SUPPORTED_THROTTLE_TYPES = + Set.of(MCL_VERSIONED_LAG, MCL_TIMESERIES_LAG); @Nonnull private final EntityRegistry entityRegistry; @Nonnull private final Admin kafkaAdmin; @Nonnull private final MetadataChangeProposalConfig.ThrottlesConfig config; @Nonnull private final String mclConsumerGroupId; @Nonnull private final String versionedTopicName; @Nonnull private final String timeseriesTopicName; - @Nonnull private final Consumer pauseConsumer; + + /** A list of throttle event listeners to execute when throttling occurs and ceases */ + @Builder.Default @Nonnull + private final List> throttleCallbacks = + new ArrayList<>(); private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1); - private final Map medianLag = new ConcurrentHashMap<>(); - private final Map backoffMap = new ConcurrentHashMap<>(); + private final Map medianLag = new ConcurrentHashMap<>(); + private final Map backoffMap = new ConcurrentHashMap<>(); + + @Override + public KafkaThrottleSensor addCallback(Function callback) { + throttleCallbacks.add(callback); + return this; + } /** Update lag information at a given rate */ - public KafkaProducerThrottle start() { + public KafkaThrottleSensor start() { if ((config.getVersioned().isEnabled() || config.getTimeseries().isEnabled()) && config.getUpdateIntervalMs() > 0) { scheduler.scheduleAtFixedRate( @@ -79,13 +110,13 @@ public void stop() { * @return median lag per mcl topic */ @VisibleForTesting - public Map getLag() { + public Map getLag() { return medianLag.entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } @VisibleForTesting - public boolean isThrottled(MclType mclType) { + public boolean isThrottled(ThrottleType mclType) { if (getThrottleConfig(mclType).isEnabled() && medianLag.containsKey(mclType)) { return medianLag.get(mclType) > getThrottleConfig(mclType).getThreshold(); } @@ -93,7 +124,7 @@ public boolean isThrottled(MclType mclType) { } @VisibleForTesting - public long computeNextBackOff(MclType mclType) { + public long computeNextBackOff(ThrottleType mclType) { if (isThrottled(mclType)) { BackOffExecution backOffExecution = backoffMap.computeIfAbsent( @@ -115,54 +146,61 @@ public long computeNextBackOff(MclType mclType) { @VisibleForTesting public void throttle() throws InterruptedException { - for (MclType mclType : MclType.values()) { - if (isThrottled(mclType)) { - long backoffWaitMs = computeNextBackOff(mclType); - - if (backoffWaitMs > 0) { - log.warn( - "Throttled producer Topic: {} Duration: {} ms MedianLag: {}", - getTopicName(mclType), - backoffWaitMs, - medianLag.get(mclType)); - MetricUtils.gauge( - this.getClass(), - String.format("%s_throttled", getTopicName(mclType)), - () -> (Gauge) () -> 1); - MetricUtils.counter( - this.getClass(), String.format("%s_throttledCount", getTopicName(mclType))) - .inc(); - - log.info("Pausing MCE consumer for {} ms.", backoffWaitMs); - pauseConsumer.accept(true); - Thread.sleep(backoffWaitMs); - log.info("Resuming MCE consumer."); - pauseConsumer.accept(false); - - // if throttled for one topic, skip remaining - return; - } else { - // no throttle or exceeded configuration limits - log.info("MCE consumer throttle exponential backoff reset."); - backoffMap.remove(mclType); - MetricUtils.gauge( - this.getClass(), - String.format("%s_throttled", getTopicName(mclType)), - () -> (Gauge) () -> 0); - } - } else { + + Map throttled = new LinkedHashMap<>(); + + for (ThrottleType mclType : SUPPORTED_THROTTLE_TYPES) { + long backoffWaitMs = computeNextBackOff(mclType); + + if (backoffWaitMs <= 0) { // not throttled, remove backoff tracking - log.info("MCE consumer throttle exponential backoff reset."); + log.info("Throttle exponential backoff reset."); backoffMap.remove(mclType); MetricUtils.gauge( this.getClass(), String.format("%s_throttled", getTopicName(mclType)), () -> (Gauge) () -> 0); + } else { + throttled.put(mclType, backoffWaitMs); + } + } + + // handle throttled + if (!throttled.isEmpty()) { + long maxBackoffWaitMs = throttled.values().stream().max(Comparator.naturalOrder()).get(); + log.warn( + "Throttled Topic: {} Duration: {} ms MedianLag: {}", + throttled.keySet().stream().map(this::getTopicName).collect(Collectors.toList()), + maxBackoffWaitMs, + throttled.keySet().stream().map(medianLag::get).collect(Collectors.toList())); + + throttled.keySet().stream() + .forEach( + mclType -> { + MetricUtils.gauge( + this.getClass(), + String.format("%s_throttled", getTopicName(mclType)), + () -> (Gauge) () -> 1); + MetricUtils.counter( + this.getClass(), String.format("%s_throttledCount", getTopicName(mclType))) + .inc(); + }); + + log.info("Throttling {} callbacks for {} ms.", throttleCallbacks.size(), maxBackoffWaitMs); + final ThrottleEvent throttleEvent = ThrottleEvent.throttle(throttled); + List throttleControls = + throttleCallbacks.stream().map(callback -> callback.apply(throttleEvent)).toList(); + + if (throttleControls.stream().anyMatch(ThrottleControl::hasCallback)) { + Thread.sleep(maxBackoffWaitMs); + log.info("Resuming {} callbacks after wait.", throttleControls.size()); + throttleControls.forEach( + control -> control.execute(ThrottleEvent.clearThrottle(throttleEvent))); } } } - private Map getMedianLag() { + private Map getMedianLag() { try { Map mclConsumerOffsets = kafkaAdmin @@ -183,11 +221,11 @@ private Map getMedianLag() { .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); return Stream.of( - Pair.of(MclType.VERSIONED, versionedTopicName), - Pair.of(MclType.TIMESERIES, timeseriesTopicName)) + Pair.of(MCL_VERSIONED_LAG, versionedTopicName), + Pair.of(MCL_TIMESERIES_LAG, timeseriesTopicName)) .map( topic -> { - MclType mclType = topic.getFirst(); + ThrottleType mclType = topic.getFirst(); String topicName = topic.getSecond(); Map topicOffsets = @@ -212,22 +250,22 @@ private Map getMedianLag() { .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } catch (ExecutionException | InterruptedException e) { log.error("Error fetching consumer group offsets.", e); - return Map.of(MclType.VERSIONED, 0L, MclType.TIMESERIES, 0L); + return Map.of(MCL_VERSIONED_LAG, 0L, MCL_TIMESERIES_LAG, 0L); } } - private MetadataChangeProposalConfig.ThrottleConfig getThrottleConfig(MclType mclType) { + private MetadataChangeProposalConfig.ThrottleConfig getThrottleConfig(ThrottleType mclType) { MetadataChangeProposalConfig.ThrottleConfig throttleConfig; switch (mclType) { - case VERSIONED -> throttleConfig = config.getVersioned(); - case TIMESERIES -> throttleConfig = config.getTimeseries(); + case MCL_VERSIONED_LAG -> throttleConfig = config.getVersioned(); + case MCL_TIMESERIES_LAG -> throttleConfig = config.getTimeseries(); default -> throw new IllegalStateException(); } return throttleConfig; } - private String getTopicName(MclType mclType) { - return MclType.TIMESERIES.equals(mclType) ? timeseriesTopicName : versionedTopicName; + private String getTopicName(ThrottleType mclType) { + return MCL_TIMESERIES_LAG.equals(mclType) ? timeseriesTopicName : versionedTopicName; } private static Double getMedian(Collection listValues) { @@ -238,9 +276,4 @@ private static Double getMedian(Collection listValues) { else median = values[values.length / 2]; return median; } - - public enum MclType { - TIMESERIES, - VERSIONED - } } diff --git a/metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/producer/KafkaProducerThrottleTest.java b/metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/throttle/KafkaThrottleSensorTest.java similarity index 80% rename from metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/producer/KafkaProducerThrottleTest.java rename to metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/throttle/KafkaThrottleSensorTest.java index ce6104ee2ca7dc..6f82ad86852992 100644 --- a/metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/producer/KafkaProducerThrottleTest.java +++ b/metadata-dao-impl/kafka-producer/src/test/java/com/datahub/metadata/dao/throttle/KafkaThrottleSensorTest.java @@ -1,4 +1,4 @@ -package com.datahub.metadata.dao.producer; +package com.datahub.metadata.dao.throttle; import static org.mockito.ArgumentMatchers.anyMap; import static org.mockito.ArgumentMatchers.anyString; @@ -14,6 +14,8 @@ import static org.testng.Assert.assertTrue; import com.linkedin.metadata.config.MetadataChangeProposalConfig; +import com.linkedin.metadata.dao.throttle.ThrottleControl; +import com.linkedin.metadata.dao.throttle.ThrottleType; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.mxe.Topics; import com.linkedin.util.Pair; @@ -34,7 +36,7 @@ import org.apache.kafka.common.TopicPartition; import org.testng.annotations.Test; -public class KafkaProducerThrottleTest { +public class KafkaThrottleSensorTest { private static final List STANDARD_TOPICS = List.of(Topics.METADATA_CHANGE_LOG_VERSIONED, Topics.METADATA_CHANGE_LOG_TIMESERIES); private static final String STANDARD_MCL_CONSUMER_GROUP_ID = "generic-mae-consumer-job-client"; @@ -54,16 +56,16 @@ public void testLagCalculation() throws ExecutionException, InterruptedException topicPart -> ((long) topicPart.partition() + 1) * 2, 3)); - KafkaProducerThrottle test = - KafkaProducerThrottle.builder() + KafkaThrottleSensor test = + KafkaThrottleSensor.builder() .config(noSchedulerConfig().getThrottle()) .kafkaAdmin(mockAdmin) .versionedTopicName(STANDARD_TOPICS.get(0)) .timeseriesTopicName(STANDARD_TOPICS.get(1)) .entityRegistry(mock(EntityRegistry.class)) .mclConsumerGroupId(STANDARD_MCL_CONSUMER_GROUP_ID) - .pauseConsumer(mock(Consumer.class)) - .build(); + .build() + .addCallback((throttleEvent -> ThrottleControl.NONE)); // Refresh calculations test.refresh(); @@ -71,8 +73,8 @@ public void testLagCalculation() throws ExecutionException, InterruptedException assertEquals( test.getLag(), Map.of( - KafkaProducerThrottle.MclType.VERSIONED, 2L, - KafkaProducerThrottle.MclType.TIMESERIES, 2L)); + ThrottleType.MCL_VERSIONED_LAG, 2L, + ThrottleType.MCL_TIMESERIES_LAG, 2L)); } @Test @@ -111,45 +113,52 @@ public void testThrottle() throws ExecutionException, InterruptedException { Consumer pauseFunction = mock(Consumer.class); - KafkaProducerThrottle test = - KafkaProducerThrottle.builder() + KafkaThrottleSensor test = + KafkaThrottleSensor.builder() .config(noThrottleConfig) .kafkaAdmin(mockAdmin) .versionedTopicName(STANDARD_TOPICS.get(0)) .timeseriesTopicName(STANDARD_TOPICS.get(1)) .entityRegistry(mock(EntityRegistry.class)) .mclConsumerGroupId(STANDARD_MCL_CONSUMER_GROUP_ID) - .pauseConsumer(pauseFunction) - .build(); + .build() + .addCallback( + (throttleEvent -> { + pauseFunction.accept(throttleEvent.isThrottled()); + return ThrottleControl.builder() + .callback( + throttleResume -> pauseFunction.accept(throttleResume.isThrottled())) + .build(); + })); // Refresh calculations test.refresh(); assertEquals( test.getLag(), Map.of( - KafkaProducerThrottle.MclType.VERSIONED, 2L, - KafkaProducerThrottle.MclType.TIMESERIES, 2L)); + ThrottleType.MCL_VERSIONED_LAG, 2L, + ThrottleType.MCL_TIMESERIES_LAG, 2L)); assertFalse( - test.isThrottled(KafkaProducerThrottle.MclType.VERSIONED), + test.isThrottled(ThrottleType.MCL_VERSIONED_LAG), "Expected not throttling, lag is below threshold"); - assertFalse(test.isThrottled(KafkaProducerThrottle.MclType.TIMESERIES)); + assertFalse(test.isThrottled(ThrottleType.MCL_TIMESERIES_LAG)); test.throttle(); verifyNoInteractions(pauseFunction); reset(pauseFunction); - KafkaProducerThrottle test2 = test.toBuilder().config(throttleConfig).build(); + KafkaThrottleSensor test2 = test.toBuilder().config(throttleConfig).build(); // Refresh calculations test2.refresh(); assertEquals( test2.getLag(), Map.of( - KafkaProducerThrottle.MclType.VERSIONED, 2L, - KafkaProducerThrottle.MclType.TIMESERIES, 2L)); + ThrottleType.MCL_VERSIONED_LAG, 2L, + ThrottleType.MCL_TIMESERIES_LAG, 2L)); assertTrue( - test2.isThrottled(KafkaProducerThrottle.MclType.VERSIONED), + test2.isThrottled(ThrottleType.MCL_VERSIONED_LAG), "Expected throttling, lag is above threshold."); assertFalse( - test2.isThrottled(KafkaProducerThrottle.MclType.TIMESERIES), + test2.isThrottled(ThrottleType.MCL_TIMESERIES_LAG), "Expected not throttling. Timeseries is disabled"); test2.throttle(); @@ -183,56 +192,48 @@ public void testBackOff() throws ExecutionException, InterruptedException { topicPart -> ((long) topicPart.partition() + 1) * 2, 3)); - KafkaProducerThrottle test = - KafkaProducerThrottle.builder() + KafkaThrottleSensor test = + KafkaThrottleSensor.builder() .config(throttleConfig) .kafkaAdmin(mockAdmin) .versionedTopicName(STANDARD_TOPICS.get(0)) .timeseriesTopicName(STANDARD_TOPICS.get(1)) .entityRegistry(mock(EntityRegistry.class)) .mclConsumerGroupId(STANDARD_MCL_CONSUMER_GROUP_ID) - .pauseConsumer(mock(Consumer.class)) - .build(); + .build() + .addCallback((throttleEvent -> ThrottleControl.NONE)); // Refresh calculations test.refresh(); assertEquals( test.getLag(), Map.of( - KafkaProducerThrottle.MclType.VERSIONED, 2L, - KafkaProducerThrottle.MclType.TIMESERIES, 2L)); + ThrottleType.MCL_VERSIONED_LAG, 2L, + ThrottleType.MCL_TIMESERIES_LAG, 2L)); assertTrue( - test.isThrottled(KafkaProducerThrottle.MclType.VERSIONED), + test.isThrottled(ThrottleType.MCL_VERSIONED_LAG), "Expected throttling, lag is above threshold."); assertFalse( - test.isThrottled(KafkaProducerThrottle.MclType.TIMESERIES), + test.isThrottled(ThrottleType.MCL_TIMESERIES_LAG), "Expected no throttling. Timeseries is disabled"); assertEquals( - test.computeNextBackOff(KafkaProducerThrottle.MclType.TIMESERIES), + test.computeNextBackOff(ThrottleType.MCL_TIMESERIES_LAG), 0L, "Expected no backoff. Timeseries is disabled."); + assertEquals(test.computeNextBackOff(ThrottleType.MCL_VERSIONED_LAG), 1L, "Expected initial 1"); assertEquals( - test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), 1L, "Expected initial 1"); + test.computeNextBackOff(ThrottleType.MCL_VERSIONED_LAG), 2L, "Expected second 2^1"); + assertEquals(test.computeNextBackOff(ThrottleType.MCL_VERSIONED_LAG), 4L, "Expected third 2^2"); assertEquals( - test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), - 2L, - "Expected second 2^1"); + test.computeNextBackOff(ThrottleType.MCL_VERSIONED_LAG), 8L, "Expected fourth 2^3"); assertEquals( - test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), 4L, "Expected third 2^2"); - assertEquals( - test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), - 8L, - "Expected fourth 2^3"); - assertEquals( - test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), + test.computeNextBackOff(ThrottleType.MCL_VERSIONED_LAG), 8L, "Expected fifth max interval at 8"); assertEquals( - test.computeNextBackOff(KafkaProducerThrottle.MclType.VERSIONED), - -1L, - "Expected max attempts"); + test.computeNextBackOff(ThrottleType.MCL_VERSIONED_LAG), -1L, "Expected max attempts"); } @Test @@ -253,16 +254,16 @@ public void testScheduler() throws ExecutionException, InterruptedException { AdminClient mockAdmin = mockKafka(generateLag(STANDARD_TOPICS, topicPart -> 1L, topicPart -> 2L, 1)); - KafkaProducerThrottle test = - KafkaProducerThrottle.builder() + KafkaThrottleSensor test = + KafkaThrottleSensor.builder() .config(throttlesConfig) .kafkaAdmin(mockAdmin) .versionedTopicName(STANDARD_TOPICS.get(0)) .timeseriesTopicName(STANDARD_TOPICS.get(1)) .entityRegistry(mock(EntityRegistry.class)) .mclConsumerGroupId(STANDARD_MCL_CONSUMER_GROUP_ID) - .pauseConsumer(mock(Consumer.class)) - .build(); + .build() + .addCallback((throttleEvent -> ThrottleControl.NONE)); try { test.start(); @@ -270,8 +271,8 @@ public void testScheduler() throws ExecutionException, InterruptedException { assertEquals( test.getLag(), Map.of( - KafkaProducerThrottle.MclType.VERSIONED, 1L, - KafkaProducerThrottle.MclType.TIMESERIES, 1L), + ThrottleType.MCL_VERSIONED_LAG, 1L, + ThrottleType.MCL_TIMESERIES_LAG, 1L), "Expected lag updated"); } finally { test.stop(); diff --git a/metadata-ingestion-modules/airflow-plugin/build.gradle b/metadata-ingestion-modules/airflow-plugin/build.gradle index e874f70db02a3f..9506609a10044a 100644 --- a/metadata-ingestion-modules/airflow-plugin/build.gradle +++ b/metadata-ingestion-modules/airflow-plugin/build.gradle @@ -26,7 +26,7 @@ task environmentSetup(type: Exec) { outputs.file(sentinel_file) commandLine 'bash', '-c', "${python_executable} -m venv ${venv_name} && set -x && " + - "${venv_name}/bin/python -m pip install --upgrade pip uv wheel 'setuptools>=63.0.0' && " + + "${venv_name}/bin/python -m pip install --upgrade uv && " + "touch ${sentinel_file}" } @@ -34,12 +34,8 @@ task installPackage(type: Exec, dependsOn: [environmentSetup, ':metadata-ingesti def sentinel_file = "${venv_name}/.build_install_package_sentinel" inputs.file file('setup.py') outputs.file(sentinel_file) - // Workaround for https://github.com/yaml/pyyaml/issues/601. - // See https://github.com/yaml/pyyaml/issues/601#issuecomment-1638509577. - // and https://github.com/datahub-project/datahub/pull/8435. commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + - "pip install 'Cython<3.0' 'PyYAML<6' --no-build-isolation && " + "${pip_install_command} -e .[ignore${extra_pip_extras}] ${extra_pip_requirements} &&" + "touch ${sentinel_file}" } diff --git a/metadata-ingestion-modules/dagster-plugin/build.gradle b/metadata-ingestion-modules/dagster-plugin/build.gradle index 74ca7cedea3a52..0f11af9ca83d73 100644 --- a/metadata-ingestion-modules/dagster-plugin/build.gradle +++ b/metadata-ingestion-modules/dagster-plugin/build.gradle @@ -23,7 +23,7 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { outputs.file(sentinel_file) commandLine 'bash', '-c', "${python_executable} -m venv ${venv_name} && " + - "${venv_name}/bin/python -m pip install --upgrade pip uv wheel 'setuptools>=63.0.0' && " + + "${venv_name}/bin/python -m pip install --upgrade uv && " + "touch ${sentinel_file}" } @@ -77,30 +77,16 @@ task installDevTest(type: Exec, dependsOn: [installDev]) { "touch ${sentinel_file}" } -def testFile = hasProperty('testFile') ? testFile : 'unknown' -task testSingle(dependsOn: [installDevTest]) { - doLast { - if (testFile != 'unknown') { - exec { - commandLine 'bash', '-x', '-c', - "source ${venv_name}/bin/activate && pytest ${testFile}" - } - } else { - throw new GradleException("No file provided. Use -PtestFile=") - } - } -} - task testQuick(type: Exec, dependsOn: installDevTest) { // We can't enforce the coverage requirements if we run a subset of the tests. inputs.files(project.fileTree(dir: "src/", include: "**/*.py")) inputs.files(project.fileTree(dir: "tests/")) outputs.dir("${venv_name}") - commandLine 'bash', '-x', '-c', - "source ${venv_name}/bin/activate && pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" } - task buildWheel(type: Exec, dependsOn: [environmentSetup]) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'uv pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_INSTALL=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' diff --git a/metadata-ingestion-modules/gx-plugin/build.gradle b/metadata-ingestion-modules/gx-plugin/build.gradle index f1adbc6676e5bc..2288ae6bd83961 100644 --- a/metadata-ingestion-modules/gx-plugin/build.gradle +++ b/metadata-ingestion-modules/gx-plugin/build.gradle @@ -23,7 +23,7 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { outputs.file(sentinel_file) commandLine 'bash', '-c', "${python_executable} -m venv ${venv_name} && " + - "${venv_name}/bin/python -m pip install --upgrade pip uv wheel 'setuptools>=63.0.0' && " + + "${venv_name}/bin/python -m pip install --upgrade uv && " + "touch ${sentinel_file}" } @@ -77,30 +77,16 @@ task installDevTest(type: Exec, dependsOn: [installDev]) { "touch ${sentinel_file}" } -def testFile = hasProperty('testFile') ? testFile : 'unknown' -task testSingle(dependsOn: [installDevTest]) { - doLast { - if (testFile != 'unknown') { - exec { - commandLine 'bash', '-x', '-c', - "source ${venv_name}/bin/activate && pytest ${testFile}" - } - } else { - throw new GradleException("No file provided. Use -PtestFile=") - } - } -} - task testQuick(type: Exec, dependsOn: installDevTest) { // We can't enforce the coverage requirements if we run a subset of the tests. inputs.files(project.fileTree(dir: "src/", include: "**/*.py")) inputs.files(project.fileTree(dir: "tests/")) outputs.dir("${venv_name}") - commandLine 'bash', '-x', '-c', - "source ${venv_name}/bin/activate && pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" } - task buildWheel(type: Exec, dependsOn: [environmentSetup]) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'uv pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_INSTALL=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh' diff --git a/metadata-ingestion-modules/prefect-plugin/build.gradle b/metadata-ingestion-modules/prefect-plugin/build.gradle index b078b8d8de3b37..bc091c64933645 100644 --- a/metadata-ingestion-modules/prefect-plugin/build.gradle +++ b/metadata-ingestion-modules/prefect-plugin/build.gradle @@ -23,8 +23,8 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { outputs.file(sentinel_file) commandLine 'bash', '-c', "${python_executable} -m venv ${venv_name} && " + - "${venv_name}/bin/python -m pip install --upgrade pip uv wheel 'setuptools>=63.0.0' && " + - "touch ${sentinel_file}" + "${venv_name}/bin/python -m pip install --upgrade uv && " + + "touch ${sentinel_file}" } task installPackage(type: Exec, dependsOn: [environmentSetup, ':metadata-ingestion:codegen']) { @@ -75,33 +75,21 @@ task installDevTest(type: Exec, dependsOn: [installDev]) { "${pip_install_command} -e .[dev,integration-tests] && touch ${sentinel_file}" } -def testFile = hasProperty('testFile') ? testFile : 'unknown' -task testSingle(dependsOn: [installDevTest]) { - doLast { - if (testFile != 'unknown') { - exec { - commandLine 'bash', '-x', '-c', - "source ${venv_name}/bin/activate && pytest ${testFile}" - } - } else { - throw new GradleException("No file provided. Use -PtestFile=") - } - } -} - task testQuick(type: Exec, dependsOn: installDevTest) { // We can't enforce the coverage requirements if we run a subset of the tests. inputs.files(project.fileTree(dir: "src/", include: "**/*.py")) inputs.files(project.fileTree(dir: "tests/")) outputs.dir("${venv_name}") - commandLine 'bash', '-x', '-c', - "source ${venv_name}/bin/activate && pytest --cov-config=setup.cfg --cov-report xml:coverage_quick.xml -vv --continue-on-collection-errors --junit-xml=junit.quick.xml -s" + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "pytest --cov-config=setup.cfg --cov-report xml:coverage_quick.xml -vv --continue-on-collection-errors --junit-xml=junit.quick.xml -s" } task testFull(type: Exec, dependsOn: [testQuick, installDevTest]) { - commandLine 'bash', '-x', '-c', - "source ${venv_name}/bin/activate && pytest -m 'not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.full.xml" + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "pytest -m 'not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.full.xml" } diff --git a/metadata-ingestion/docs/sources/dbt/dbt.md b/metadata-ingestion/docs/sources/dbt/dbt.md index 2333ddcee677bd..52a19777dd0337 100644 --- a/metadata-ingestion/docs/sources/dbt/dbt.md +++ b/metadata-ingestion/docs/sources/dbt/dbt.md @@ -273,16 +273,19 @@ source: # ... other configs ``` -

- [Experimental] Reducing "composed of" sprawl with multiproject setups +If you have models that have tons of sources from other projects listed in the "Composed Of" section, it may also make sense to hide sources. -When many dbt projects use a single table as a source, the "Composed Of" relationships can become very large and difficult to navigate. -To address this, we are experimenting with an alternative approach to handling multiproject setups: not including sources. +### Reducing "composed of" sprawl by hiding sources + +When many dbt projects use a single table as a source, the "Composed Of" relationships can become very large and difficult to navigate +and extra source nodes can clutter the lineage graph. + +This is particularly useful for multi-project setups, but can be useful in single-project setups as well. The benefit is that your entire dbt estate becomes much easier to navigate, and the borders between projects less noticeable. The downside is that we will not pick up any documentation or meta mappings applied to dbt sources. -To enable this, set a few additional flags in your dbt source config: +To enable this, set `entities_enabled.sources: No` and `skip_sources_in_lineage: true` in your dbt source config: ```yaml source: @@ -298,4 +301,4 @@ source: skip_sources_in_lineage: true ``` -
+[Experimental] It's also possible to use `skip_sources_in_lineage: true` without disabling sources entirely. If you do this, sources will not participate in the lineage graph - they'll have upstreams but no downstreams. However, they will still contribute to docs, tags, etc to the warehouse entity. diff --git a/metadata-ingestion/examples/data_product/dataproduct.yaml b/metadata-ingestion/examples/data_product/dataproduct.yaml index baf5bb42020a45..c8158aae9d1e38 100644 --- a/metadata-ingestion/examples/data_product/dataproduct.yaml +++ b/metadata-ingestion/examples/data_product/dataproduct.yaml @@ -13,6 +13,8 @@ assets: owners: - id: urn:li:corpuser:jdoe type: BUSINESS_OWNER + - id: urn:li:corpuser:fbar + type: urn:li:ownershipType:architect # Maps to a custom ownership type # Tags associated with this Data Product tags: diff --git a/metadata-ingestion/src/datahub/api/circuit_breaker/assertion_circuit_breaker.py b/metadata-ingestion/src/datahub/api/circuit_breaker/assertion_circuit_breaker.py index 67a5b3630a455f..9d2a65663ba37d 100644 --- a/metadata-ingestion/src/datahub/api/circuit_breaker/assertion_circuit_breaker.py +++ b/metadata-ingestion/src/datahub/api/circuit_breaker/assertion_circuit_breaker.py @@ -31,6 +31,7 @@ class AssertionCircuitBreaker(AbstractCircuitBreaker): The circuit breaker checks if there are passing assertion on the Dataset. """ + config: AssertionCircuitBreakerConfig def __init__(self, config: AssertionCircuitBreakerConfig): diff --git a/metadata-ingestion/src/datahub/api/circuit_breaker/circuit_breaker.py b/metadata-ingestion/src/datahub/api/circuit_breaker/circuit_breaker.py index f8554334281d85..a3c54046faf681 100644 --- a/metadata-ingestion/src/datahub/api/circuit_breaker/circuit_breaker.py +++ b/metadata-ingestion/src/datahub/api/circuit_breaker/circuit_breaker.py @@ -34,9 +34,11 @@ def __init__( # Select your transport with a defined url endpoint self.transport = RequestsHTTPTransport( url=datahub_host + "/api/graphql", - headers={"Authorization": "Bearer " + datahub_token} - if datahub_token is not None - else None, + headers=( + {"Authorization": "Bearer " + datahub_token} + if datahub_token is not None + else None + ), method="POST", timeout=timeout, ) diff --git a/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py b/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py index e0ef85d5fd66c0..4a68fa6c66adad 100644 --- a/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py +++ b/metadata-ingestion/src/datahub/api/entities/datacontract/datacontract.py @@ -179,14 +179,16 @@ def generate_mcp( aspects=[ DataContractPropertiesClass( entity=self.entity, - schema=[SchemaContractClass(assertion=schema_assertion_urn)] - if schema_assertion_urn - else None, - freshness=[ - FreshnessContractClass(assertion=freshness_assertion_urn) - ] - if freshness_assertion_urn - else None, + schema=( + [SchemaContractClass(assertion=schema_assertion_urn)] + if schema_assertion_urn + else None + ), + freshness=( + [FreshnessContractClass(assertion=freshness_assertion_urn)] + if freshness_assertion_urn + else None + ), dataQuality=[ DataQualityContractClass(assertion=dq_assertion_urn) for dq_assertion_urn in dq_assertions @@ -195,9 +197,11 @@ def generate_mcp( # Also emit status. StatusClass(removed=False), # Emit the contract state as PENDING. - DataContractStatusClass(state=DataContractStateClass.PENDING) - if True - else None, + ( + DataContractStatusClass(state=DataContractStateClass.PENDING) + if True + else None + ), ], ) diff --git a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py index bf521ded5dbf3b..d406fa36e00db6 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py +++ b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py @@ -190,14 +190,16 @@ def end_event_mcp( timestampMillis=end_timestamp_millis, result=DataProcessInstanceRunResultClass( type=result, - nativeResultType=result_type - if result_type is not None - else self.orchestrator, + nativeResultType=( + result_type if result_type is not None else self.orchestrator + ), ), attempt=attempt, - durationMillis=(end_timestamp_millis - start_timestamp_millis) - if start_timestamp_millis - else None, + durationMillis=( + (end_timestamp_millis - start_timestamp_millis) + if start_timestamp_millis + else None + ), ), ) yield mcp @@ -258,9 +260,11 @@ def generate_mcp( aspect=DataProcessInstanceRelationships( upstreamInstances=[str(urn) for urn in self.upstream_urns], parentTemplate=str(self.template_urn) if self.template_urn else None, - parentInstance=str(self.parent_instance) - if self.parent_instance is not None - else None, + parentInstance=( + str(self.parent_instance) + if self.parent_instance is not None + else None + ), ), ) yield mcp diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py index 64b10d31487e00..8f58fa469a7d96 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py +++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py @@ -343,27 +343,31 @@ def from_datahub(cls, graph: DataHubGraph, id: str) -> DataProduct: tags: Optional[GlobalTagsClass] = graph.get_aspect(id, GlobalTagsClass) return DataProduct( id=id, - display_name=data_product_properties.name - if data_product_properties - else None, + display_name=( + data_product_properties.name if data_product_properties else None + ), domain=domains.domains[0], - description=data_product_properties.description - if data_product_properties - else None, - assets=[e.destinationUrn for e in data_product_properties.assets or []] - if data_product_properties - else None, + description=( + data_product_properties.description if data_product_properties else None + ), + assets=( + [e.destinationUrn for e in data_product_properties.assets or []] + if data_product_properties + else None + ), owners=yaml_owners, - terms=[term.urn for term in glossary_terms.terms] - if glossary_terms - else None, + terms=( + [term.urn for term in glossary_terms.terms] if glossary_terms else None + ), tags=[tag.tag for tag in tags.tags] if tags else None, - properties=data_product_properties.customProperties - if data_product_properties - else None, - external_url=data_product_properties.externalUrl - if data_product_properties - else None, + properties=( + data_product_properties.customProperties + if data_product_properties + else None + ), + external_url=( + data_product_properties.externalUrl if data_product_properties else None + ), ) def _patch_ownership( diff --git a/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py b/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py index f9a188c65feeff..315f2249d2e5cd 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py +++ b/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py @@ -94,20 +94,24 @@ def from_schema_field( description=schema_field.description, label=schema_field.label, created=schema_field.created.__dict__ if schema_field.created else None, - lastModified=schema_field.lastModified.__dict__ - if schema_field.lastModified - else None, + lastModified=( + schema_field.lastModified.__dict__ + if schema_field.lastModified + else None + ), recursive=schema_field.recursive, - globalTags=schema_field.globalTags.__dict__ - if schema_field.globalTags - else None, - glossaryTerms=schema_field.glossaryTerms.__dict__ - if schema_field.glossaryTerms - else None, + globalTags=( + schema_field.globalTags.__dict__ if schema_field.globalTags else None + ), + glossaryTerms=( + schema_field.glossaryTerms.__dict__ + if schema_field.glossaryTerms + else None + ), isPartitioningKey=schema_field.isPartitioningKey, - jsonProps=json.loads(schema_field.jsonProps) - if schema_field.jsonProps - else None, + jsonProps=( + json.loads(schema_field.jsonProps) if schema_field.jsonProps else None + ), ) @validator("urn", pre=True, always=True) @@ -300,9 +304,11 @@ def generate_mcp( properties=[ StructuredPropertyValueAssignmentClass( propertyUrn=f"urn:li:structuredProperty:{prop_key}", - values=prop_value - if isinstance(prop_value, list) - else [prop_value], + values=( + prop_value + if isinstance(prop_value, list) + else [prop_value] + ), ) for prop_key, prop_value in field.structured_properties.items() ] @@ -359,9 +365,11 @@ def generate_mcp( properties=[ StructuredPropertyValueAssignmentClass( propertyUrn=f"urn:li:structuredProperty:{prop_key}", - values=prop_value - if isinstance(prop_value, list) - else [prop_value], + values=( + prop_value + if isinstance(prop_value, list) + else [prop_value] + ), ) for prop_key, prop_value in self.structured_properties.items() ] @@ -501,25 +509,29 @@ def from_datahub(cls, graph: DataHubGraph, urn: str) -> "Dataset": return Dataset( # type: ignore[call-arg] urn=urn, - description=dataset_properties.description - if dataset_properties and dataset_properties.description - else None, - name=dataset_properties.name - if dataset_properties and dataset_properties.name - else None, + description=( + dataset_properties.description + if dataset_properties and dataset_properties.description + else None + ), + name=( + dataset_properties.name + if dataset_properties and dataset_properties.name + else None + ), schema=Dataset._schema_from_schema_metadata(graph, urn), tags=[tag.tag for tag in tags.tags] if tags else None, - glossary_terms=[term.urn for term in glossary_terms.terms] - if glossary_terms - else None, + glossary_terms=( + [term.urn for term in glossary_terms.terms] if glossary_terms else None + ), owners=yaml_owners, - properties=dataset_properties.customProperties - if dataset_properties - else None, + properties=( + dataset_properties.customProperties if dataset_properties else None + ), subtypes=[subtype for subtype in subtypes.typeNames] if subtypes else None, - structured_properties=structured_properties_map - if structured_properties - else None, + structured_properties=( + structured_properties_map if structured_properties else None + ), ) def to_yaml( diff --git a/metadata-ingestion/src/datahub/api/entities/forms/forms.py b/metadata-ingestion/src/datahub/api/entities/forms/forms.py index 8fb7ea0bf11edd..9188ea33d6c684 100644 --- a/metadata-ingestion/src/datahub/api/entities/forms/forms.py +++ b/metadata-ingestion/src/datahub/api/entities/forms/forms.py @@ -197,11 +197,13 @@ def validate_prompts(self, emitter: DataHubGraph) -> List[FormPromptClass]: title=prompt.title, description=prompt.description, type=prompt.type, - structuredPropertyParams=StructuredPropertyParamsClass( - urn=prompt.structured_property_urn - ) - if prompt.structured_property_urn - else None, + structuredPropertyParams=( + StructuredPropertyParamsClass( + urn=prompt.structured_property_urn + ) + if prompt.structured_property_urn + else None + ), required=prompt.required, ) ) @@ -339,9 +341,11 @@ def from_datahub(graph: DataHubGraph, urn: str) -> "Forms": title=prompt_raw.title, description=prompt_raw.description, type=prompt_raw.type, - structured_property_urn=prompt_raw.structuredPropertyParams.urn - if prompt_raw.structuredPropertyParams - else None, + structured_property_urn=( + prompt_raw.structuredPropertyParams.urn + if prompt_raw.structuredPropertyParams + else None + ), ) ) return Forms( diff --git a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py index ed97948de9034c..44fd32d5a426b5 100644 --- a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py +++ b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py @@ -126,19 +126,23 @@ def create(file: str) -> None: ], cardinality=structuredproperty.cardinality, immutable=structuredproperty.immutable, - allowedValues=[ - PropertyValueClass( - value=v.value, description=v.description - ) - for v in structuredproperty.allowed_values - ] - if structuredproperty.allowed_values - else None, - typeQualifier={ - "allowedTypes": structuredproperty.type_qualifier.allowed_types - } - if structuredproperty.type_qualifier - else None, + allowedValues=( + [ + PropertyValueClass( + value=v.value, description=v.description + ) + for v in structuredproperty.allowed_values + ] + if structuredproperty.allowed_values + else None + ), + typeQualifier=( + { + "allowedTypes": structuredproperty.type_qualifier.allowed_types + } + if structuredproperty.type_qualifier + else None + ), ), ) emitter.emit_mcp(mcp) @@ -160,20 +164,22 @@ def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties": description=structured_property.description, entity_types=structured_property.entityTypes, cardinality=structured_property.cardinality, - allowed_values=[ - AllowedValue( - value=av.value, - description=av.description, - ) - for av in structured_property.allowedValues or [] - ] - if structured_property.allowedValues is not None - else None, - type_qualifier={ - "allowed_types": structured_property.typeQualifier.get("allowedTypes") - } - if structured_property.typeQualifier - else None, + allowed_values=( + [ + AllowedValue( + value=av.value, + description=av.description, + ) + for av in structured_property.allowedValues or [] + ] + if structured_property.allowedValues is not None + else None + ), + type_qualifier=( + {"allowed_types": structured_property.typeQualifier.get("allowedTypes")} + if structured_property.typeQualifier + else None + ), ) def to_yaml( diff --git a/metadata-ingestion/src/datahub/api/graphql/base.py b/metadata-ingestion/src/datahub/api/graphql/base.py index 3654bd38816996..c1ea6b71a6d145 100644 --- a/metadata-ingestion/src/datahub/api/graphql/base.py +++ b/metadata-ingestion/src/datahub/api/graphql/base.py @@ -23,9 +23,11 @@ def __init__( # Select your transport with a defined url endpoint self.transport = RequestsHTTPTransport( url=datahub_host + "/api/graphql", - headers={"Authorization": "Bearer " + datahub_token} - if datahub_token is not None - else None, + headers=( + {"Authorization": "Bearer " + datahub_token} + if datahub_token is not None + else None + ), method="POST", timeout=timeout, ) diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py index 9189a881f9ce72..971d4e6e72aa16 100644 --- a/metadata-ingestion/src/datahub/cli/docker_cli.py +++ b/metadata-ingestion/src/datahub/cli/docker_cli.py @@ -240,9 +240,11 @@ def _attempt_stop(quickstart_compose_file: List[pathlib.Path]) -> None: compose_files_for_stopping = ( quickstart_compose_file if quickstart_compose_file - else [pathlib.Path(default_quickstart_compose_file)] - if default_quickstart_compose_file - else None + else ( + [pathlib.Path(default_quickstart_compose_file)] + if default_quickstart_compose_file + else None + ) ) if compose_files_for_stopping: # docker-compose stop @@ -868,10 +870,10 @@ def download_compose_files( # also allow local files request_session = requests.Session() request_session.mount("file://", FileAdapter()) - with open( - quickstart_compose_file_name, "wb" - ) if quickstart_compose_file_name else tempfile.NamedTemporaryFile( - suffix=".yml", delete=False + with ( + open(quickstart_compose_file_name, "wb") + if quickstart_compose_file_name + else tempfile.NamedTemporaryFile(suffix=".yml", delete=False) ) as tmp_file: path = pathlib.Path(tmp_file.name) quickstart_compose_file_list.append(path) @@ -892,10 +894,10 @@ def download_compose_files( default_consumer_compose_file = ( Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml" ) - with open( - default_consumer_compose_file, "wb" - ) if default_consumer_compose_file else tempfile.NamedTemporaryFile( - suffix=".yml", delete=False + with ( + open(default_consumer_compose_file, "wb") + if default_consumer_compose_file + else tempfile.NamedTemporaryFile(suffix=".yml", delete=False) ) as tmp_file: path = pathlib.Path(tmp_file.name) quickstart_compose_file_list.append(path) @@ -914,10 +916,10 @@ def download_compose_files( default_kafka_compose_file = ( Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.kafka-setup.yml" ) - with open( - default_kafka_compose_file, "wb" - ) if default_kafka_compose_file else tempfile.NamedTemporaryFile( - suffix=".yml", delete=False + with ( + open(default_kafka_compose_file, "wb") + if default_kafka_compose_file + else tempfile.NamedTemporaryFile(suffix=".yml", delete=False) ) as tmp_file: path = pathlib.Path(tmp_file.name) quickstart_compose_file_list.append(path) diff --git a/metadata-ingestion/src/datahub/cli/lite_cli.py b/metadata-ingestion/src/datahub/cli/lite_cli.py index 841c2f27528b72..957ee16245dd81 100644 --- a/metadata-ingestion/src/datahub/cli/lite_cli.py +++ b/metadata-ingestion/src/datahub/cli/lite_cli.py @@ -84,10 +84,14 @@ def shell_complete(self, ctx, param, incomplete): try: completions = lite.ls(path) return [ - CompletionItem(browseable.auto_complete.suggested_path, type="plain") - if browseable.auto_complete - else CompletionItem( - f"{incomplete}/{browseable.name}".replace("//", "/") + ( + CompletionItem( + browseable.auto_complete.suggested_path, type="plain" + ) + if browseable.auto_complete + else CompletionItem( + f"{incomplete}/{browseable.name}".replace("//", "/") + ) ) for browseable in completions if not browseable.leaf @@ -240,12 +244,16 @@ def ls(path: Optional[str]) -> None: for browseable in [b for b in browseables if b.auto_complete is None]: click.secho( browseable.name, - fg="white" - if browseable.leaf - else "green" - if browseable.id.startswith("urn:") - and not browseable.id.startswith("urn:li:systemNode") - else "cyan", + fg=( + "white" + if browseable.leaf + else ( + "green" + if browseable.id.startswith("urn:") + and not browseable.id.startswith("urn:li:systemNode") + else "cyan" + ) + ), ) except PathNotFoundException: click.echo(f"Path not found: {path}") diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py index a7578e39374ac5..c21361eb256c10 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py @@ -204,9 +204,9 @@ def gen_containers( externalUrl=external_url, qualifiedName=qualified_name, created=TimeStamp(time=created) if created is not None else None, - lastModified=TimeStamp(time=last_modified) - if last_modified is not None - else None, + lastModified=( + TimeStamp(time=last_modified) if last_modified is not None else None + ), ), ).as_workunit() @@ -220,9 +220,11 @@ def gen_containers( entityUrn=f"{container_urn}", aspect=DataPlatformInstance( platform=f"{make_data_platform_urn(container_key.platform)}", - instance=f"{make_dataplatform_instance_urn(container_key.platform, container_key.instance)}" - if container_key.instance - else None, + instance=( + f"{make_dataplatform_instance_urn(container_key.platform, container_key.instance)}" + if container_key.instance + else None + ), ), ).as_workunit() diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index 234d7e5e255d74..c783d9a35814b3 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -1740,9 +1740,9 @@ def report_assertion_result( "type": type, "properties": properties, "externalUrl": external_url, - "error": {"type": error_type, "message": error_message} - if error_type - else None, + "error": ( + {"type": error_type, "message": error_message} if error_type else None + ), } res = self.execute_graphql( diff --git a/metadata-ingestion/src/datahub/ingestion/graph/filters.py b/metadata-ingestion/src/datahub/ingestion/graph/filters.py index edb45fa5c2dbc1..588090ec567277 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/filters.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/filters.py @@ -113,7 +113,7 @@ def _get_env_filters(env: str) -> List[SearchFilterRule]: { "field": "env", "value": env, - } + }, # Note that not all entity types have an env (e.g. dashboards / charts). # If the env filter is specified, these will be excluded. ] diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py index 6d7105bd264416..7090dc5cb6e10b 100644 --- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py +++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py @@ -9,6 +9,8 @@ from enum import auto from typing import List, Optional, Tuple, Union +import pydantic + from datahub.configuration.common import ( ConfigEnum, ConfigurationError, @@ -39,7 +41,7 @@ logger = logging.getLogger(__name__) -DEFAULT_REST_SINK_MAX_THREADS = int( +_DEFAULT_REST_SINK_MAX_THREADS = int( os.getenv("DATAHUB_REST_SINK_DEFAULT_MAX_THREADS", 15) ) @@ -49,16 +51,21 @@ class RestSinkMode(ConfigEnum): ASYNC = auto() # Uses the new ingestProposalBatch endpoint. Significantly more efficient than the other modes, - # but requires a server version that supports it. + # but requires a server version that supports it. Added in # https://github.com/datahub-project/datahub/pull/10706 ASYNC_BATCH = auto() +_DEFAULT_REST_SINK_MODE = pydantic.parse_obj_as( + RestSinkMode, os.getenv("DATAHUB_REST_SINK_DEFAULT_MODE", RestSinkMode.ASYNC) +) + + class DatahubRestSinkConfig(DatahubClientConfig): - mode: RestSinkMode = RestSinkMode.ASYNC + mode: RestSinkMode = _DEFAULT_REST_SINK_MODE # These only apply in async modes. - max_threads: int = DEFAULT_REST_SINK_MAX_THREADS + max_threads: int = _DEFAULT_REST_SINK_MAX_THREADS max_pending_requests: int = 2000 # Only applies in async batch mode. diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index 58fb1cb1eb2c28..3b9b5dbf63e184 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -1351,11 +1351,13 @@ def _get_delta_schema_metadata() -> Optional[SchemaMetadata]: def get_data_platform_instance() -> DataPlatformInstanceClass: return DataPlatformInstanceClass( platform=make_data_platform_urn(self.platform), - instance=make_dataplatform_instance_urn( - self.platform, self.source_config.platform_instance - ) - if self.source_config.platform_instance - else None, + instance=( + make_dataplatform_instance_urn( + self.platform, self.source_config.platform_instance + ) + if self.source_config.platform_instance + else None + ), ) @lru_cache(maxsize=None) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py index 23106ce7d2f868..a826f09b9a7c89 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py @@ -247,17 +247,21 @@ def get_workunits_internal( self.report.num_queries_by_project[project.id] += 1 queries.append(entry) self.report.num_total_queries = len(queries) + logger.info(f"Found {self.report.num_total_queries} total queries") with self.report.audit_log_preprocessing_timer: # Preprocessing stage that deduplicates the queries using query hash per usage bucket + # Note: FileBackedDict is an ordered dictionary, so the order of execution of + # queries is inherently maintained queries_deduped: FileBackedDict[Dict[int, ObservedQuery]] queries_deduped = self.deduplicate_queries(queries) self.report.num_unique_queries = len(queries_deduped) + logger.info(f"Found {self.report.num_unique_queries} unique queries") with self.report.audit_log_load_timer: i = 0 - for query_instances in queries_deduped.values(): - for _, query in query_instances.items(): + for _, query_instances in queries_deduped.items(): + for query in query_instances.values(): if i > 0 and i % 10000 == 0: logger.info(f"Added {i} query log entries to SQL aggregator") diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py index fb22f0b6edde26..4bc120fbecf8f1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py @@ -72,6 +72,7 @@ class BIAssetSubTypes(StrEnum): # Mode MODE_REPORT = "Report" + MODE_DATASET = "Dataset" MODE_QUERY = "Query" MODE_CHART = "Chart" diff --git a/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py b/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py index f40e6504f11885..09ce8b5b05203c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py +++ b/metadata-ingestion/src/datahub/ingestion/source/confluent_schema_registry.py @@ -260,9 +260,9 @@ def _get_schema_and_fields( self.report.warning( title="Failed to get subject schema from schema registry", message=f"Failed to get {kafka_entity} {schema_type_str or ''} schema from schema registry", - context=f"{topic}: {topic_subject}" - if not is_subject - else topic_subject, + context=( + f"{topic}: {topic_subject}" if not is_subject else topic_subject + ), exc=e, ) else: @@ -320,9 +320,11 @@ def _get_schema_fields( fields = schema_util.avro_schema_to_mce_fields( avro_schema, is_key_schema=is_key_schema, - meta_mapping_processor=self.field_meta_processor - if self.source_config.enable_meta_mapping - else None, + meta_mapping_processor=( + self.field_meta_processor + if self.source_config.enable_meta_mapping + else None + ), schema_tags_field=self.source_config.schema_tags_field, tag_prefix=self.source_config.tag_prefix, ) @@ -334,9 +336,11 @@ def _get_schema_fields( base_name: str = topic.replace(".", "_") fields = protobuf_util.protobuf_schema_to_mce_fields( ProtobufSchema( - f"{base_name}-key.proto" - if is_key_schema - else f"{base_name}-value.proto", + ( + f"{base_name}-key.proto" + if is_key_schema + else f"{base_name}-value.proto" + ), schema.schema_str, ), imported_schemas, diff --git a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py index bc19940afdd1e3..10dd9e9e7e029a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py +++ b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py @@ -396,9 +396,11 @@ def get_partition_from_path(self, path: str) -> Optional[List[Tuple[str, str]]]: partition_keys.append( ( named_vars.named["partition_key"][key], - named_vars.named["partition_value"][key] - if "partition_value" in named_vars.named - else named_vars.named["partition"][key], + ( + named_vars.named["partition_value"][key] + if "partition_value" in named_vars.named + else named_vars.named["partition"][key] + ), ) ) return partition_keys diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py index 0672b9ce6f781c..1866599fa21c67 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py @@ -472,6 +472,7 @@ def _parse_into_dbt_node(self, node: Dict) -> DBTNode: upstream_nodes=upstream_nodes, materialization=materialization, catalog_type=catalog_type, + missing_from_catalog=False, # This doesn't really apply to dbt Cloud. meta=meta, query_tag={}, # TODO: Get this from the dbt API. tags=tags, diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 8d67551b9e1f2f..12812aad441f23 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -154,7 +154,7 @@ class DBTSourceReport(StaleEntityRemovalSourceReport): default_factory=LossyList ) - in_manifest_but_missing_catalog: LossyList[str] = field(default_factory=LossyList) + nodes_filtered: LossyList[str] = field(default_factory=LossyList) class EmitDirective(ConfigEnum): @@ -528,6 +528,7 @@ class DBTNode: materialization: Optional[str] # table, view, ephemeral, incremental, snapshot # see https://docs.getdbt.com/reference/artifacts/manifest-json catalog_type: Optional[str] + missing_from_catalog: bool # indicates if the node was missing from the catalog.json owner: Optional[str] @@ -853,6 +854,9 @@ def get_column_type( TypeClass = resolve_postgres_modified_type(column_type) elif dbt_adapter == "vertica": TypeClass = resolve_vertica_modified_type(column_type) + elif dbt_adapter == "snowflake": + # Snowflake types are uppercase, so we check that. + TypeClass = _field_type_mapping.get(column_type.upper()) # if still not found, report the warning if TypeClass is None: @@ -1034,6 +1038,7 @@ def _filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]: key = node.dbt_name if not self.config.node_name_pattern.allowed(key): + self.report.nodes_filtered.append(key) continue nodes.append(node) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py index e24c18147e4e61..1aad806e958f85 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py @@ -58,6 +58,12 @@ class DBTCoreConfig(DBTCommonConfig): "See https://docs.getdbt.com/reference/artifacts/run-results-json.", ) + only_include_if_in_catalog: bool = Field( + default=False, + description="[experimental] If true, only include nodes that are also present in the catalog file. " + "This is useful if you only want to include models that have been built by the associated run.", + ) + # Because we now also collect model performance metadata, the "test_results" field was renamed to "run_results". _convert_test_results_path = pydantic_renamed_field( "test_results_path", "run_results_paths", transform=lambda x: [x] if x else [] @@ -156,6 +162,7 @@ def extract_dbt_entities( manifest_adapter: str, use_identifiers: bool, tag_prefix: str, + only_include_if_in_catalog: bool, report: DBTSourceReport, ) -> List[DBTNode]: sources_by_id = {x["unique_id"]: x for x in sources_results} @@ -194,12 +201,22 @@ def extract_dbt_entities( # It's a source catalog_node = all_catalog_entities.get(key) + missing_from_catalog = catalog_node is None catalog_type = None if catalog_node is None: - if materialization not in {"test", "ephemeral"}: + if materialization in {"test", "ephemeral"}: # Test and ephemeral nodes will never show up in the catalog. - report.in_manifest_but_missing_catalog.append(key) + missing_from_catalog = False + else: + if not only_include_if_in_catalog: + report.warning( + title="Node missing from catalog", + message="Found a node in the manifest file but not in the catalog. " + "This usually means the catalog file was not generated by `dbt docs generate` and so is incomplete. " + "Some metadata, such as column types and descriptions, will be impacted.", + context=key, + ) else: catalog_type = all_catalog_entities[key]["metadata"]["type"] @@ -264,6 +281,7 @@ def extract_dbt_entities( upstream_nodes=upstream_nodes, materialization=materialization, catalog_type=catalog_type, + missing_from_catalog=missing_from_catalog, meta=meta, query_tag=query_tag_props, tags=tags, @@ -291,14 +309,6 @@ def extract_dbt_entities( dbt_entities.append(dbtNode) - if report.in_manifest_but_missing_catalog: - # We still want this to show up as a warning, but don't want to spam the warnings section - # if there's a lot of them. - report.warning( - "in_manifest_but_missing_catalog", - f"Found {len(report.in_manifest_but_missing_catalog)} nodes in manifest but not in catalog. See in_manifest_but_missing_catalog for details.", - ) - return dbt_entities @@ -535,6 +545,7 @@ def loadManifestAndCatalog( manifest_adapter, self.config.use_identifiers, self.config.tag_prefix, + self.config.only_include_if_in_catalog, self.report, ) @@ -588,6 +599,23 @@ def load_nodes(self) -> Tuple[List[DBTNode], Dict[str, Optional[str]]]: return all_nodes, additional_custom_props + def _filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]: + nodes = super()._filter_nodes(all_nodes) + + if not self.config.only_include_if_in_catalog: + return nodes + + filtered_nodes = [] + for node in nodes: + if node.missing_from_catalog: + # TODO: We need to do some additional testing of this flag to validate that it doesn't + # drop important things entirely (e.g. sources). + self.report.nodes_filtered.append(node.dbt_name) + else: + filtered_nodes.append(node) + + return filtered_nodes + def get_external_url(self, node: DBTNode) -> Optional[str]: if self.config.git_info and node.dbt_file_path: return self.config.git_info.get_url_for_file_path(node.dbt_file_path) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py index 5770ce712d6284..dc4e5d426fe42f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py @@ -176,20 +176,27 @@ def make_assertion_from_test( dataset=upstream_urn, scope=assertion_params.scope, operator=assertion_params.operator, - fields=[mce_builder.make_schema_field_urn(upstream_urn, column_name)] - if ( - assertion_params.scope == DatasetAssertionScopeClass.DATASET_COLUMN - and column_name - ) - else [], + fields=( + [mce_builder.make_schema_field_urn(upstream_urn, column_name)] + if ( + assertion_params.scope + == DatasetAssertionScopeClass.DATASET_COLUMN + and column_name + ) + else [] + ), nativeType=node.name, aggregation=assertion_params.aggregation, - parameters=assertion_params.parameters(kw_args) - if assertion_params.parameters - else None, - logic=assertion_params.logic_fn(kw_args) - if assertion_params.logic_fn - else None, + parameters=( + assertion_params.parameters(kw_args) + if assertion_params.parameters + else None + ), + logic=( + assertion_params.logic_fn(kw_args) + if assertion_params.logic_fn + else None + ), nativeParameters=_string_map(kw_args), ), ) @@ -244,10 +251,12 @@ def make_assertion_result_from_test( asserteeUrn=upstream_urn, runId=test_result.invocation_id, result=AssertionResultClass( - type=AssertionResultTypeClass.SUCCESS - if test_result.status == "pass" - or (not test_warnings_are_errors and test_result.status == "warn") - else AssertionResultTypeClass.FAILURE, + type=( + AssertionResultTypeClass.SUCCESS + if test_result.status == "pass" + or (not test_warnings_are_errors and test_result.status == "warn") + else AssertionResultTypeClass.FAILURE + ), nativeResults=test_result.native_results, ), status=AssertionRunStatusClass.COMPLETE, diff --git a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py index 653b80c116adfd..a5a195e05a6354 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py +++ b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py @@ -333,7 +333,6 @@ def http_auth(self) -> Optional[Tuple[str, str]]: @support_status(SupportStatus.CERTIFIED) @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") class ElasticsearchSource(Source): - """ This plugin extracts the following: @@ -479,11 +478,15 @@ def _extract_mcps( entityUrn=dataset_urn, aspect=SubTypesClass( typeNames=[ - DatasetSubTypes.ELASTIC_INDEX_TEMPLATE - if not is_index - else DatasetSubTypes.ELASTIC_INDEX - if not data_stream - else DatasetSubTypes.ELASTIC_DATASTREAM + ( + DatasetSubTypes.ELASTIC_INDEX_TEMPLATE + if not is_index + else ( + DatasetSubTypes.ELASTIC_INDEX + if not data_stream + else DatasetSubTypes.ELASTIC_DATASTREAM + ) + ) ] ), ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py index b459b47deb153a..704a6f20a5c19b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py +++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py @@ -111,9 +111,11 @@ def _extend_lineage(self, connector: Connector, datajob: DataJob) -> None: for table_lineage in connector.table_lineage: input_dataset_urn = DatasetUrn.create_from_ids( platform_id=source_platform, - table_name=f"{source_database.lower()}.{table_lineage.source_table}" - if source_database - else table_lineage.source_table, + table_name=( + f"{source_database.lower()}.{table_lineage.source_table}" + if source_database + else table_lineage.source_table + ), env=source_platform_detail.env, platform_instance=source_platform_detail.platform_instance, ) @@ -132,23 +134,27 @@ def _extend_lineage(self, connector: Connector, datajob: DataJob) -> None: fine_grained_lineage.append( FineGrainedLineage( upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, - upstreams=[ - builder.make_schema_field_urn( - str(input_dataset_urn), - column_lineage.source_column, - ) - ] - if input_dataset_urn - else [], + upstreams=( + [ + builder.make_schema_field_urn( + str(input_dataset_urn), + column_lineage.source_column, + ) + ] + if input_dataset_urn + else [] + ), downstreamType=FineGrainedLineageDownstreamType.FIELD, - downstreams=[ - builder.make_schema_field_urn( - str(output_dataset_urn), - column_lineage.destination_column, - ) - ] - if output_dataset_urn - else [], + downstreams=( + [ + builder.make_schema_field_urn( + str(output_dataset_urn), + column_lineage.destination_column, + ) + ] + if output_dataset_urn + else [] + ), ) ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py b/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py index be051a74ed9c7f..18838af9bdf85f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gcs/gcs_source.py @@ -118,9 +118,11 @@ def create_equivalent_s3_path_specs(self): s3_path_specs.append( PathSpec( include=path_spec.include.replace("gs://", "s3://"), - exclude=[exc.replace("gs://", "s3://") for exc in path_spec.exclude] - if path_spec.exclude - else None, + exclude=( + [exc.replace("gs://", "s3://") for exc in path_spec.exclude] + if path_spec.exclude + else None + ), file_types=path_spec.file_types, default_extension=path_spec.default_extension, table_name=path_spec.table_name, diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index 0c3dbc0eaadd88..d175fce04a52c2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -305,10 +305,9 @@ def _is_single_row_query_method(query: Any) -> bool: if frame.name in SINGLE_ROW_QUERY_METHODS: return True if frame.name in CONSTANT_ROW_QUERY_METHODS: - # TODO: figure out how to handle these. - # A cross join will return (`constant` ** `queries`) rows rather - # than `constant` rows with `queries` columns. - # See https://stackoverflow.com/questions/35638753/create-query-to-join-2-tables-1-on-1-with-nothing-in-common. + # TODO: figure out how to handle these. A cross join will return (`constant` ** `queries`) rows rather + # than `constant` rows with `queries` columns. See + # https://stackoverflow.com/questions/35638753/create-query-to-join-2-tables-1-on-1-with-nothing-in-common. return False if frame.name == COLUMN_MAP_QUERY_METHOD: @@ -429,9 +428,12 @@ def _get_column_cardinality( logger.debug( f"Caught exception while attempting to get column cardinality for column {column}. {e}" ) + self.report.report_warning( - "Profiling - Unable to get column cardinality", - f"{self.dataset_name}.{column}", + title="Profiling: Unable to Calculate Cardinality", + message="The cardinality for the column will not be accessible", + context=f"{self.dataset_name}.{column}", + exc=e, ) return @@ -484,14 +486,15 @@ def _get_dataset_rows(self, dataset_profile: DatasetProfileClass) -> None: self.dataset.engine.execute(get_estimate_script).scalar() ) else: - # If the configuration is not set to 'estimate only' mode, we directly obtain the row count from the dataset. - # However, if an offset or limit is set, we need to adjust how we calculate the row count. - # This is because applying a limit or offset could potentially skew the row count. - # For instance, if a limit is set and the actual row count exceeds this limit, - # the returned row count would incorrectly be the limit value. + # If the configuration is not set to 'estimate only' mode, we directly obtain the row count from the + # dataset. However, if an offset or limit is set, we need to adjust how we calculate the row count. This + # is because applying a limit or offset could potentially skew the row count. For instance, if a limit is + # set and the actual row count exceeds this limit, the returned row count would incorrectly be the limit + # value. # - # To address this, if a limit is set, we use the original table name when calculating the row count. - # This ensures that the row count is based on the original table, not on a view which have limit or offset applied. + # To address this, if a limit is set, we use the original table name when calculating the row count. This + # ensures that the row count is based on the original table, not on a view which have limit or offset + # applied. if (self.config.limit or self.config.offset) and not self.custom_sql: # We don't want limit and offset to get applied to the row count # This is kinda hacky way to do it, but every other way would require major refactoring @@ -513,9 +516,12 @@ def _get_dataset_column_min( logger.debug( f"Caught exception while attempting to get column min for column {column}. {e}" ) + self.report.report_warning( - "Profiling - Unable to get column min", - f"{self.dataset_name}.{column}", + title="Profiling: Unable to Calculate Min", + message="The min for the column will not be accessible", + context=f"{self.dataset_name}.{column}", + exc=e, ) @_run_with_query_combiner @@ -530,9 +536,12 @@ def _get_dataset_column_max( logger.debug( f"Caught exception while attempting to get column max for column {column}. {e}" ) + self.report.report_warning( - "Profiling - Unable to get column max", - f"{self.dataset_name}.{column}", + title="Profiling: Unable to Calculate Max", + message="The max for the column will not be accessible", + context=f"{self.dataset_name}.{column}", + exc=e, ) @_run_with_query_combiner @@ -547,9 +556,12 @@ def _get_dataset_column_mean( logger.debug( f"Caught exception while attempting to get column mean for column {column}. {e}" ) + self.report.report_warning( - "Profiling - Unable to get column mean", - f"{self.dataset_name}.{column}", + title="Profiling: Unable to Calculate Mean", + message="The mean for the column will not be accessible", + context=f"{self.dataset_name}.{column}", + exc=e, ) @_run_with_query_combiner @@ -581,9 +593,12 @@ def _get_dataset_column_median( logger.debug( f"Caught exception while attempting to get column median for column {column}. {e}" ) + self.report.report_warning( - "Profiling - Unable to get column medians", - f"{self.dataset_name}.{column}", + title="Profiling: Unable to Calculate Medians", + message="The medians for the column will not be accessible", + context=f"{self.dataset_name}.{column}", + exc=e, ) @_run_with_query_combiner @@ -599,8 +614,10 @@ def _get_dataset_column_stdev( f"Caught exception while attempting to get column stddev for column {column}. {e}" ) self.report.report_warning( - "Profiling - Unable to get column stddev", - f"{self.dataset_name}.{column}", + title="Profiling: Unable to Calculate Standard Deviation", + message="The standard deviation for the column will not be accessible", + context=f"{self.dataset_name}.{column}", + exc=e, ) @_run_with_query_combiner @@ -638,9 +655,12 @@ def _get_dataset_column_quantiles( logger.debug( f"Caught exception while attempting to get column quantiles for column {column}. {e}" ) + self.report.report_warning( - "Profiling - Unable to get column quantiles", - f"{self.dataset_name}.{column}", + title="Profiling: Unable to Calculate Quantiles", + message="The quantiles for the column will not be accessible", + context=f"{self.dataset_name}.{column}", + exc=e, ) @_run_with_query_combiner @@ -682,9 +702,12 @@ def _get_dataset_column_histogram( logger.debug( f"Caught exception while attempting to get column histogram for column {column}. {e}" ) + self.report.report_warning( - "Profiling - Unable to get column histogram", - f"{self.dataset_name}.{column}", + title="Profiling: Unable to Calculate Histogram", + message="The histogram for the column will not be accessible", + context=f"{self.dataset_name}.{column}", + exc=e, ) @_run_with_query_combiner @@ -714,9 +737,12 @@ def _get_dataset_column_sample_values( logger.debug( f"Caught exception while attempting to get sample values for column {column}. {e}" ) + self.report.report_warning( - "Profiling - Unable to get column sample values", - f"{self.dataset_name}.{column}", + title="Profiling: Unable to Calculate Sample Values", + message="The sample values for the column will not be accessible", + context=f"{self.dataset_name}.{column}", + exc=e, ) def generate_dataset_profile( # noqa: C901 (complexity) diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py index 49b6422902299a..dda81b0e34a8d2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py +++ b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py @@ -664,9 +664,9 @@ def _map_okta_user_profile(self, profile: UserProfile) -> CorpUserInfoClass: full_name = f"{profile.firstName} {profile.lastName}" return CorpUserInfoClass( active=True, - displayName=profile.displayName - if profile.displayName is not None - else full_name, + displayName=( + profile.displayName if profile.displayName is not None else full_name + ), firstName=profile.firstName, lastName=profile.lastName, fullName=full_name, diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py index 266f9f6db57620..0b201278142e3a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py @@ -402,11 +402,13 @@ def get_table_names(self) -> List[Tuple]: # List of Tuple containing (schema, table) tables: List[Tuple] = [ ( - unquote( - table_id.split(sep)[-2], leading_quote_char, trailing_quote_char - ) - if len(table_id.split(sep)) > 1 - else "", + ( + unquote( + table_id.split(sep)[-2], leading_quote_char, trailing_quote_char + ) + if len(table_id.split(sep)) > 1 + else "" + ), unquote( table_id.split(sep)[-1], leading_quote_char, trailing_quote_char ), @@ -593,9 +595,11 @@ def get_parser( source_platform="mongodb", database_name=connector_manifest.config.get("database"), topic_prefix=connector_manifest.config.get("topic_prefix"), - transforms=connector_manifest.config["transforms"].split(",") - if "transforms" in connector_manifest.config - else [], + transforms=( + connector_manifest.config["transforms"].split(",") + if "transforms" in connector_manifest.config + else [] + ), ) return parser diff --git a/metadata-ingestion/src/datahub/ingestion/source/ldap.py b/metadata-ingestion/src/datahub/ingestion/source/ldap.py index 9c7fba68f263bc..236e91a86700c3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ldap.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ldap.py @@ -1,4 +1,5 @@ """LDAP Source""" + import contextlib import dataclasses from typing import Any, Dict, Iterable, List, Optional diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py index 7ed46c8f7084cf..72898921c3683e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py @@ -68,7 +68,9 @@ def to_write_query(self) -> WriteQuery: model=cast(str, self.model.value), # the cast is jut to silent the lint view=cast(str, self.explore.value), fields=[cast(str, field.value) for field in self.fields], - filters={filter_.value: self.filters[filter_] for filter_ in self.filters} - if self.filters is not None - else {}, + filters=( + {filter_.value: self.filters[filter_] for filter_ in self.filters} + if self.filters is not None + else {} + ), ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index 2d5250cfb74fa5..71d497c56f13e8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -1323,7 +1323,7 @@ def process_dashboard( dashboard_object.folder.is_personal or dashboard_object.folder.is_personal_descendant ): - self.reporter.report_warning( + self.reporter.info( title="Dropped Dashboard", message="Dropped due to being a personal folder", context=f"Dashboard ID: {dashboard_id}", diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py index c97025d75229b1..93af0effa9f1f4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py @@ -74,16 +74,20 @@ def from_dashboard(cls, dashboard: Dashboard) -> "LookerDashboardForUsage": id=dashboard.id, view_count=dashboard.view_count, favorite_count=dashboard.favorite_count, - last_viewed_at=round(dashboard.last_viewed_at.timestamp() * 1000) - if dashboard.last_viewed_at - else None, - looks=[ - LookerChartForUsage.from_chart(e.look) - for e in dashboard.dashboard_elements - if e.look is not None - ] - if dashboard.dashboard_elements - else [], + last_viewed_at=( + round(dashboard.last_viewed_at.timestamp() * 1000) + if dashboard.last_viewed_at + else None + ), + looks=( + [ + LookerChartForUsage.from_chart(e.look) + for e in dashboard.dashboard_elements + if e.look is not None + ] + if dashboard.dashboard_elements + else [] + ), ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/str_functions.py b/metadata-ingestion/src/datahub/ingestion/source/looker/str_functions.py index 5426d2b8ab9521..6f5c1248e3ef7a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/str_functions.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/str_functions.py @@ -1,6 +1,7 @@ """ Here write down functions which are operating on string. Like replacing some character and so on """ + import re diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py index 0917a9e9faafee..de1022b5482cef 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py @@ -328,9 +328,11 @@ def create_fields(self) -> List[ViewField]: ViewField( name=cll.downstream.column, label="", - type=cll.downstream.native_column_type - if cll.downstream.native_column_type is not None - else "unknown", + type=( + cll.downstream.native_column_type + if cll.downstream.native_column_type is not None + else "unknown" + ), description="", field_type=ViewFieldType.UNKNOWN, upstream_fields=_drop_hive_dot_from_upstream(cll.upstreams), diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index 47475c5825a493..73427d9084dd3c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -106,7 +106,7 @@ infer_output_schema, ) from datahub.utilities import config_clean -from datahub.utilities.lossy_collections import LossyDict, LossyList +from datahub.utilities.lossy_collections import LossyList logger: logging.Logger = logging.getLogger(__name__) @@ -199,10 +199,6 @@ class ModeSourceReport(StaleEntityRemovalSourceReport): num_query_template_render_failures: int = 0 num_query_template_render_success: int = 0 - dropped_imported_datasets: LossyDict[str, LossyList[str]] = dataclasses.field( - default_factory=LossyDict - ) - def report_dropped_space(self, ent_name: str) -> None: self.filtered_spaces.append(ent_name) @@ -429,10 +425,25 @@ def construct_dashboard( # Last refreshed ts. last_refreshed_ts = self._parse_last_run_at(report_info) + # Datasets + datasets = [] + for imported_dataset_name in report_info.get("imported_datasets", {}): + mode_dataset = self._get_request_json( + f"{self.workspace_uri}/reports/{imported_dataset_name.get('token')}" + ) + dataset_urn = builder.make_dataset_urn_with_platform_instance( + self.platform, + str(mode_dataset.get("id")), + platform_instance=None, + env=self.config.env, + ) + datasets.append(dataset_urn) + dashboard_info_class = DashboardInfoClass( description=description if description else "", title=title if title else "", charts=self._get_chart_urns(report_token), + datasets=datasets if datasets else None, lastModified=last_modified, lastRefreshed=last_refreshed_ts, dashboardUrl=f"{self.config.connect_uri}/{self.config.workspace}/reports/{report_token}", @@ -725,6 +736,10 @@ def _get_platform_and_dbname( data_source.get("adapter", ""), data_source.get("name", "") ) database = data_source.get("database", "") + # This is hacky but on bigquery we want to change the database if its default + # For lineage we need project_id.db.table + if platform == "bigquery" and database == "default": + database = data_source.get("host", "") return platform, database else: self.report.report_warning( @@ -900,24 +915,36 @@ def normalize_mode_query(self, query: str) -> str: return rendered_query - def construct_query_from_api_data( + def construct_query_or_dataset( self, report_token: str, query_data: dict, space_token: str, report_info: dict, + is_mode_dataset: bool, ) -> Iterable[MetadataWorkUnit]: - query_urn = self.get_dataset_urn_from_query(query_data) + query_urn = ( + self.get_dataset_urn_from_query(query_data) + if not is_mode_dataset + else self.get_dataset_urn_from_query(report_info) + ) + query_token = query_data.get("token") + externalUrl = ( + f"{self.config.connect_uri}/{self.config.workspace}/datasets/{report_token}" + if is_mode_dataset + else f"{self.config.connect_uri}/{self.config.workspace}/reports/{report_token}/details/queries/{query_token}" + ) + dataset_props = DatasetPropertiesClass( - name=query_data.get("name"), + name=report_info.get("name") if is_mode_dataset else query_data.get("name"), description=f"""### Source Code ``` sql {query_data.get("raw_query")} ``` """, - externalUrl=f"{self.config.connect_uri}/{self.config.workspace}/reports/{report_token}/details/queries/{query_token}", + externalUrl=externalUrl, customProperties=self.get_custom_props_from_dict( query_data, [ @@ -939,7 +966,22 @@ def construct_query_from_api_data( ).as_workunit() ) - subtypes = SubTypesClass(typeNames=([BIAssetSubTypes.MODE_QUERY])) + if is_mode_dataset: + space_container_key = self.gen_space_key(space_token) + yield from add_dataset_to_container( + container_key=space_container_key, + dataset_urn=query_urn, + ) + + subtypes = SubTypesClass( + typeNames=( + [ + BIAssetSubTypes.MODE_DATASET + if is_mode_dataset + else BIAssetSubTypes.MODE_QUERY + ] + ) + ) yield ( MetadataChangeProposalWrapper( entityUrn=query_urn, @@ -950,7 +992,9 @@ def construct_query_from_api_data( yield MetadataChangeProposalWrapper( entityUrn=query_urn, aspect=BrowsePathsV2Class( - path=self._browse_path_query(space_token, report_info) + path=self._browse_path_dashboard(space_token) + if is_mode_dataset + else self._browse_path_query(space_token, report_info) ), ).as_workunit() @@ -958,7 +1002,6 @@ def construct_query_from_api_data( upstream_warehouse_platform, upstream_warehouse_db_name, ) = self._get_platform_and_dbname(query_data.get("data_source_id")) - if upstream_warehouse_platform is None: # this means we can't infer the platform return @@ -1022,7 +1065,7 @@ def construct_query_from_api_data( schema_fields = infer_output_schema(parsed_query_object) if schema_fields: schema_metadata = SchemaMetadataClass( - schemaName="mode_query", + schemaName="mode_dataset" if is_mode_dataset else "mode_query", platform=f"urn:li:dataPlatform:{self.platform}", version=0, fields=schema_fields, @@ -1040,7 +1083,7 @@ def construct_query_from_api_data( ) yield from self.get_upstream_lineage_for_parsed_sql( - query_data, parsed_query_object + query_urn, query_data, parsed_query_object ) operation = OperationClass( @@ -1089,10 +1132,9 @@ def construct_query_from_api_data( ).as_workunit() def get_upstream_lineage_for_parsed_sql( - self, query_data: dict, parsed_query_object: SqlParsingResult + self, query_urn: str, query_data: dict, parsed_query_object: SqlParsingResult ) -> List[MetadataWorkUnit]: wu = [] - query_urn = self.get_dataset_urn_from_query(query_data) if parsed_query_object is None: logger.info( @@ -1350,6 +1392,24 @@ def _get_reports(self, space_token: str) -> List[dict]: ) return reports + @lru_cache(maxsize=None) + def _get_datasets(self, space_token: str) -> List[dict]: + """ + Retrieves datasets for a given space token. + """ + datasets = [] + try: + url = f"{self.workspace_uri}/spaces/{space_token}/datasets" + datasets_json = self._get_request_json(url) + datasets = datasets_json.get("_embedded", {}).get("reports", []) + except HTTPError as http_error: + self.report.report_failure( + title="Failed to Retrieve Datasets for Space", + message=f"Unable to retrieve datasets for space token {space_token}.", + context=f"Error: {str(http_error)}", + ) + return datasets + @lru_cache(maxsize=None) def _get_queries(self, report_token: str) -> list: queries = [] @@ -1523,24 +1583,14 @@ def emit_chart_mces(self) -> Iterable[MetadataWorkUnit]: for report in reports: report_token = report.get("token", "") - if report.get("imported_datasets"): - # The connector doesn't support imported datasets yet. - # For now, we just keep this in the report to track what we're missing. - imported_datasets = [ - imported_dataset.get("name") or str(imported_dataset) - for imported_dataset in report["imported_datasets"] - ] - self.report.dropped_imported_datasets.setdefault( - report_token, LossyList() - ).extend(imported_datasets) - queries = self._get_queries(report_token) for query in queries: - query_mcps = self.construct_query_from_api_data( + query_mcps = self.construct_query_or_dataset( report_token, query, space_token=space_token, report_info=report, + is_mode_dataset=False, ) chart_fields: Dict[str, SchemaFieldClass] = {} for wu in query_mcps: @@ -1566,6 +1616,27 @@ def emit_chart_mces(self) -> Iterable[MetadataWorkUnit]: query_name=query["name"], ) + def emit_dataset_mces(self): + """ + Emits MetadataChangeEvents (MCEs) for datasets within each space. + """ + for space_token, _ in self.space_tokens.items(): + datasets = self._get_datasets(space_token) + + for report in datasets: + report_token = report.get("token", "") + queries = self._get_queries(report_token) + for query in queries: + query_mcps = self.construct_query_or_dataset( + report_token, + query, + space_token=space_token, + report_info=report, + is_mode_dataset=True, + ) + for wu in query_mcps: + yield wu + @classmethod def create(cls, config_dict: dict, ctx: PipelineContext) -> "ModeSource": config: ModeConfig = ModeConfig.parse_obj(config_dict) @@ -1581,6 +1652,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: yield from self.emit_dashboard_mces() + yield from self.emit_dataset_mces() yield from self.emit_chart_mces() def get_report(self) -> SourceReport: diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py index 6e8d939325d5b0..5106b9817d3517 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py @@ -281,9 +281,11 @@ def new_powerbi_dataset(workspace_id: str, raw_instance: dict) -> PowerBIDataset id=raw_instance["id"], name=raw_instance.get("name"), description=raw_instance.get("description", ""), - webUrl="{}/details".format(raw_instance.get("webUrl")) - if raw_instance.get("webUrl") is not None - else None, + webUrl=( + "{}/details".format(raw_instance.get("webUrl")) + if raw_instance.get("webUrl") is not None + else None + ), workspace_id=workspace_id, parameters={}, tables=[], diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py index d6c7076d49507e..8854f9ff48348d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py @@ -315,11 +315,11 @@ def custom_properties( "createdDate": str(report.created_date), "modifiedBy": report.modified_by or "", "modifiedDate": str(report.modified_date) or str(report.created_date), - "dataSource": str( - [report.connection_string for report in _report.data_sources] - ) - if _report.data_sources - else "", + "dataSource": ( + str([report.connection_string for report in _report.data_sources]) + if _report.data_sources + else "" + ), } # DashboardInfo mcp diff --git a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py index b6c48dd3c488ec..8d6eaa4bf10474 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py +++ b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py @@ -195,9 +195,11 @@ def _gen_space_workunit(self, space: Space) -> Iterable[MetadataWorkUnit]: description=space.description, sub_types=[BIContainerSubTypes.QLIK_SPACE], extra_properties={Constant.TYPE: str(space.type)}, - owner_urn=builder.make_user_urn(owner_username) - if self.config.ingest_owner and owner_username - else None, + owner_urn=( + builder.make_user_urn(owner_username) + if self.config.ingest_owner and owner_username + else None + ), external_url=f"https://{self.config.tenant_hostname}/catalog?space_filter={space.id}", created=int(space.createdAt.timestamp() * 1000), last_modified=int(space.updatedAt.timestamp() * 1000), @@ -458,9 +460,11 @@ def _gen_app_workunit(self, app: App) -> Iterable[MetadataWorkUnit]: sub_types=[BIContainerSubTypes.QLIK_APP], parent_container_key=self._gen_space_key(app.spaceId), extra_properties={Constant.QRI: app.qri, Constant.USAGE: app.qUsage}, - owner_urn=builder.make_user_urn(owner_username) - if self.config.ingest_owner and owner_username - else None, + owner_urn=( + builder.make_user_urn(owner_username) + if self.config.ingest_owner and owner_username + else None + ), external_url=f"https://{self.config.tenant_hostname}/sense/app/{app.id}/overview", created=int(app.createdAt.timestamp() * 1000), last_modified=int(app.updatedAt.timestamp() * 1000), @@ -500,9 +504,11 @@ def _gen_schema_fields( schema_field = SchemaField( fieldPath=field.name, type=SchemaFieldDataTypeClass( - type=FIELD_TYPE_MAPPING.get(field.dataType, NullType)() - if field.dataType - else NullType() + type=( + FIELD_TYPE_MAPPING.get(field.dataType, NullType)() + if field.dataType + else NullType() + ) ), nativeDataType=field.dataType if field.dataType else "", nullable=field.nullable, diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py index a37aec675cdfae..e0bf8b23dd0f7d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py @@ -272,18 +272,22 @@ def _gen_access_events_from_history_query( userid=row[field_names.index("userid")], username=row[field_names.index("username")], query=row[field_names.index("query")], - querytxt=row[field_names.index("querytxt")].strip() - if row[field_names.index("querytxt")] - else None, + querytxt=( + row[field_names.index("querytxt")].strip() + if row[field_names.index("querytxt")] + else None + ), tbl=row[field_names.index("tbl")], database=row[field_names.index("database")], schema=row[field_names.index("schema")], table=row[field_names.index("table")], starttime=row[field_names.index("starttime")], endtime=row[field_names.index("endtime")], - operation_type=row[field_names.index("operation_type")] - if "operation_type" in field_names - else None, + operation_type=( + row[field_names.index("operation_type")] + if "operation_type" in field_names + else None + ), ) except pydantic.error_wrappers.ValidationError as e: logging.warning( diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index ef5ed3c6304c92..e8c70260ebc7ce 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -464,9 +464,11 @@ def add_partition_columns_to_schema( for partition_key in partition_keys: fields.append( SchemaField( - fieldPath=f"{partition_key[0]}" - if not is_fieldpath_v2 - else f"[version=2.0].[type=string].{partition_key[0]}", + fieldPath=( + f"{partition_key[0]}" + if not is_fieldpath_v2 + else f"[version=2.0].[type=string].{partition_key[0]}" + ), nativeDataType="string", type=SchemaFieldDataTypeClass(StringTypeClass()), isPartitioningKey=True, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py b/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py index 88cb1f821ff0d6..8309c469f67c5f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py @@ -333,15 +333,19 @@ def get_resource_workunits( lastModified=ChangeAuditStampsClass( created=AuditStampClass( time=round(resource.created_time.timestamp() * 1000), - actor=make_user_urn(resource.created_by) - if resource.created_by - else "urn:li:corpuser:unknown", + actor=( + make_user_urn(resource.created_by) + if resource.created_by + else "urn:li:corpuser:unknown" + ), ), lastModified=AuditStampClass( time=round(resource.modified_time.timestamp() * 1000), - actor=make_user_urn(resource.modified_by) - if resource.modified_by - else "urn:li:corpuser:unknown", + actor=( + make_user_urn(resource.modified_by) + if resource.modified_by + else "urn:li:corpuser:unknown" + ), ), ), customProperties={ diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py index 5db5e543510db9..dd4b65a2cbdf29 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py @@ -175,9 +175,11 @@ def _gen_workspace_workunit( container_key=self._gen_workspace_key(workspace.workspaceId), name=workspace.name, sub_types=[BIContainerSubTypes.SIGMA_WORKSPACE], - owner_urn=builder.make_user_urn(owner_username) - if self.config.ingest_owner and owner_username - else None, + owner_urn=( + builder.make_user_urn(owner_username) + if self.config.ingest_owner and owner_username + else None + ), created=int(workspace.createdAt.timestamp() * 1000), last_modified=int(workspace.updatedAt.timestamp() * 1000), ) @@ -534,16 +536,20 @@ def _gen_workbook_workunit(self, workbook: Workbook) -> Iterable[MetadataWorkUni container_key=workbook_key, name=workbook.name, sub_types=[BIContainerSubTypes.SIGMA_WORKBOOK], - parent_container_key=self._gen_workspace_key(workbook.workspaceId) - if workbook.workspaceId - else None, + parent_container_key=( + self._gen_workspace_key(workbook.workspaceId) + if workbook.workspaceId + else None + ), extra_properties={ "path": workbook.path, "latestVersion": str(workbook.latestVersion), }, - owner_urn=builder.make_user_urn(owner_username) - if self.config.ingest_owner and owner_username - else None, + owner_urn=( + builder.make_user_urn(owner_username) + if self.config.ingest_owner and owner_username + else None + ), external_url=workbook.url, tags=[workbook.badge] if workbook.badge else None, created=int(workbook.createdAt.timestamp() * 1000), diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py index e981ed3e2e6650..d39e95a884dbc2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py @@ -237,9 +237,11 @@ def get_connect_args(self) -> dict: p_key = serialization.load_pem_private_key( pkey_bytes, - password=self.private_key_password.get_secret_value().encode() - if self.private_key_password is not None - else None, + password=( + self.private_key_password.get_secret_value().encode() + if self.private_key_password is not None + else None + ), backend=default_backend(), ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py index b2c40f914bddc6..aeb21e88d04437 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py @@ -229,9 +229,11 @@ def _get_all_table_comments_and_properties(self, connection, **kw): for table in result: all_table_comments[(table.database, table.table_name)] = { "text": table.comment, - "properties": {k: str(v) for k, v in json.loads(table.properties).items()} - if table.properties - else {}, + "properties": ( + {k: str(v) for k, v in json.loads(table.properties).items()} + if table.properties + else {} + ), } return all_table_comments diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py index 8b517747307f84..21e7fad3343314 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py @@ -164,9 +164,11 @@ def urn(self) -> str: flow_id=self.entity.flow.formatted_name, job_id=self.entity.formatted_name, cluster=self.entity.flow.cluster, - platform_instance=self.entity.flow.platform_instance - if self.entity.flow.platform_instance - else None, + platform_instance=( + self.entity.flow.platform_instance + if self.entity.flow.platform_instance + else None + ), ) def add_property( @@ -223,9 +225,9 @@ def urn(self) -> str: orchestrator=self.entity.orchestrator, flow_id=self.entity.formatted_name, cluster=self.entity.cluster, - platform_instance=self.entity.platform_instance - if self.entity.platform_instance - else None, + platform_instance=( + self.entity.platform_instance if self.entity.platform_instance else None + ), ) @property diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 9ce50e21608843..238fd88f1c9509 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -1033,11 +1033,13 @@ def get_schema_fields_for_column( field = SchemaField( fieldPath=column["name"], type=get_column_type(self.report, dataset_name, column["type"]), - nativeDataType=full_type - if full_type is not None - else get_native_data_type_for_sqlalchemy_type( - column["type"], - inspector=inspector, + nativeDataType=( + full_type + if full_type is not None + else get_native_data_type_for_sqlalchemy_type( + column["type"], + inspector=inspector, + ) ), description=column.get("comment", None), nullable=column["nullable"], diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/analyze_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/unity/analyze_profiler.py index 4c8b22f2399b26..995690be790c4f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/analyze_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/analyze_profiler.py @@ -82,13 +82,15 @@ def gen_dataset_profile_workunit( rowCount=row_count, columnCount=table_profile.num_columns, sizeInBytes=table_profile.total_size, - fieldProfiles=[ - self._gen_dataset_field_profile(row_count, column_profile) - for column_profile in table_profile.column_profiles - if column_profile # Drop column profiles with no data - ] - if self.config.include_columns - else None, + fieldProfiles=( + [ + self._gen_dataset_field_profile(row_count, column_profile) + for column_profile in table_profile.column_profiles + if column_profile # Drop column profiles with no data + ] + if self.config.include_columns + else None + ), ) return MetadataChangeProposalWrapper( entityUrn=self.dataset_urn_builder(ref), diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py index c66189d99f738f..f84f6c1b0c08d6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/proxy_types.py @@ -151,9 +151,11 @@ class TableReference: @classmethod def create(cls, table: "Table") -> "TableReference": return cls( - table.schema.catalog.metastore.id - if table.schema.catalog.metastore - else None, + ( + table.schema.catalog.metastore.id + if table.schema.catalog.metastore + else None + ), table.schema.catalog.name, table.schema.name, table.name, diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py index f07e7a92d87626..5eec2ca587ead2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py @@ -128,9 +128,9 @@ def _generate_operation_workunit( operation_aspect = OperationClass( timestampMillis=int(time.time() * 1000), lastUpdatedTimestamp=int(query.end_time.timestamp() * 1000), - actor=self.user_urn_builder(query.user_name) - if query.user_name - else None, + actor=( + self.user_urn_builder(query.user_name) if query.user_name else None + ), operationType=OPERATION_STATEMENT_TYPES[query.statement_type], affectedDatasets=[ self.table_urn_builder(table) for table in table_info.source_tables diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py index c474e423030e05..4045917eb830e0 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_dataproduct.py @@ -144,11 +144,11 @@ class PatternAddDatasetDataProduct(AddDatasetDataProduct): def __init__(self, config: PatternDatasetDataProductConfig, ctx: PipelineContext): dataset_to_data_product = config.dataset_to_data_product_urns_pattern generic_config = AddDatasetDataProductConfig( - get_data_product_to_add=lambda dataset_urn: dataset_to_data_product.value( - dataset_urn - )[0] - if dataset_to_data_product.value(dataset_urn) - else None, + get_data_product_to_add=lambda dataset_urn: ( + dataset_to_data_product.value(dataset_urn)[0] + if dataset_to_data_product.value(dataset_urn) + else None + ), is_container=config.is_container, ) super().__init__(generic_config, ctx) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py index 047252a5eeff03..a7e92d4bd7edbd 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_schema_terms.py @@ -82,10 +82,12 @@ def extend_field( new_glossary_term = GlossaryTermsClass( terms=[], - auditStamp=schema_field.glossaryTerms.auditStamp - if schema_field.glossaryTerms is not None - else AuditStampClass( - time=builder.get_sys_time(), actor="urn:li:corpUser:restEmitter" + auditStamp=( + schema_field.glossaryTerms.auditStamp + if schema_field.glossaryTerms is not None + else AuditStampClass( + time=builder.get_sys_time(), actor="urn:li:corpUser:restEmitter" + ) ), ) new_glossary_term.terms.extend(unique_gloseary_terms) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_terms.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_terms.py index f21e3ec3193492..3daf52e32ed4bb 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_terms.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_terms.py @@ -73,10 +73,12 @@ def transform_aspect( ) out_glossary_terms: GlossaryTermsClass = GlossaryTermsClass( terms=[], - auditStamp=in_glossary_terms.auditStamp - if in_glossary_terms is not None - else AuditStampClass( - time=builder.get_sys_time(), actor="urn:li:corpUser:restEmitter" + auditStamp=( + in_glossary_terms.auditStamp + if in_glossary_terms is not None + else AuditStampClass( + time=builder.get_sys_time(), actor="urn:li:corpUser:restEmitter" + ) ), ) # Check if user want to keep existing terms diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py b/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py index fb776ca8d23281..0a59380531ad36 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py @@ -257,10 +257,12 @@ def transform( transformed_aspect = self.transform_aspect( entity_urn=urn, aspect_name=self.aspect_name(), - aspect=last_seen_mcp.aspect - if last_seen_mcp - and last_seen_mcp.aspectName == self.aspect_name() - else None, + aspect=( + last_seen_mcp.aspect + if last_seen_mcp + and last_seen_mcp.aspectName == self.aspect_name() + else None + ), ) if transformed_aspect: structured_urn = Urn.from_string(urn) @@ -269,9 +271,11 @@ def transform( MetadataChangeProposalWrapper( entityUrn=urn, entityType=structured_urn.get_type(), - systemMetadata=last_seen_mcp.systemMetadata - if last_seen_mcp - else last_seen_mce_system_metadata, + systemMetadata=( + last_seen_mcp.systemMetadata + if last_seen_mcp + else last_seen_mce_system_metadata + ), aspectName=self.aspect_name(), aspect=transformed_aspect, ) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py index 27311ff998cbf9..245a3aa3d9db15 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py @@ -90,9 +90,11 @@ def convert_tag_as_per_mapping(self, tag: str) -> str: tag = tag[:index] + new_char + tag[index + len(old_char) :] # Adjust indices for overlapping replacements indices = [ - each + (len(new_char) - len(old_char)) - if each > index - else each + ( + each + (len(new_char) - len(old_char)) + if each > index + else each + ) for each in indices ] indices.append(index) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/generic_aspect_transformer.py b/metadata-ingestion/src/datahub/ingestion/transformer/generic_aspect_transformer.py index 4dc5f12005e499..3c0bc00633e7cb 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/generic_aspect_transformer.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/generic_aspect_transformer.py @@ -38,7 +38,8 @@ def transform_generic_aspect( self, entity_urn: str, aspect_name: str, aspect: Optional[GenericAspectClass] ) -> Optional[GenericAspectClass]: """Implement this method to transform the single custom aspect for an entity. - The purpose of this abstract method is to reinforce the use of GenericAspectClass.""" + The purpose of this abstract method is to reinforce the use of GenericAspectClass. + """ pass def _transform_or_record_mcpc( @@ -114,9 +115,11 @@ def transform( changeType="UPSERT", aspectName=self.aspect_name(), aspect=transformed_aspect, - systemMetadata=last_seen_mcp.systemMetadata - if last_seen_mcp - else last_seen_mce_system_metadata, + systemMetadata=( + last_seen_mcp.systemMetadata + if last_seen_mcp + else last_seen_mce_system_metadata + ), ), metadata=record_metadata, ) diff --git a/metadata-ingestion/src/datahub/upgrade/upgrade.py b/metadata-ingestion/src/datahub/upgrade/upgrade.py index 9e2639abdca415..d940dfd78a82ed 100644 --- a/metadata-ingestion/src/datahub/upgrade/upgrade.py +++ b/metadata-ingestion/src/datahub/upgrade/upgrade.py @@ -196,11 +196,11 @@ async def retrieve_version_stats( current=VersionStats( version=current_server_version, release_date=current_server_release_date ), - latest=VersionStats( - version=last_server_version, release_date=last_server_date - ) - if last_server_version - else None, + latest=( + VersionStats(version=last_server_version, release_date=last_server_date) + if last_server_version + else None + ), current_server_type=current_server_type, ) diff --git a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py index 96d3732b8fb497..e866a372fbbaf2 100644 --- a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py +++ b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py @@ -183,6 +183,12 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]): """A dict-like object that stores its data in a temporary SQLite database. This is useful for storing large amounts of data that don't fit in memory. + + Like a standard Python dict / OrderedDict, it maintains insertion order. + + It maintains a small in-memory cache to avoid having to serialize/deserialize + data from the database too often. This is an implementation detail that isn't + exposed to the user. """ # Use a predefined connection, able to be shared across multiple FileBacked* objects @@ -212,8 +218,9 @@ def __post_init__(self) -> None: self.cache_eviction_batch_size > 0 ), "cache_eviction_batch_size must be positive" - assert "key" not in self.extra_columns, '"key" is a reserved column name' - assert "value" not in self.extra_columns, '"value" is a reserved column name' + for reserved_column in ("key", "value", "rowid"): + if reserved_column in self.extra_columns: + raise ValueError(f'"{reserved_column}" is a reserved column name') if self.shared_connection: self._conn = self.shared_connection @@ -227,10 +234,13 @@ def __post_init__(self) -> None: self._active_object_cache = collections.OrderedDict() # Create the table. + # We could use the built-in sqlite `rowid` column, but that can get changed + # if a VACUUM is performed and would break our ordering guarantees. if_not_exists = "IF NOT EXISTS" if self._conn.allow_table_name_reuse else "" self._conn.execute( f"""CREATE TABLE {if_not_exists} {self.tablename} ( - key TEXT PRIMARY KEY, + rowid INTEGER PRIMARY KEY AUTOINCREMENT, + key TEXT UNIQUE, value BLOB {''.join(f', {column_name} BLOB' for column_name in self.extra_columns.keys())} )""" @@ -280,13 +290,20 @@ def _prune_cache(self, num_items_to_prune: int) -> None: items_to_write.append(tuple(values)) if items_to_write: + # Tricky: By using a INSERT INTO ... ON CONFLICT (key) structure, we can + # ensure that the rowid remains the same if a value is updated but is + # autoincremented when rows are inserted. self._conn.executemany( - f"""INSERT OR REPLACE INTO {self.tablename} ( + f"""INSERT INTO {self.tablename} ( key, value {''.join(f', {column_name}' for column_name in self.extra_columns.keys())} ) - VALUES ({', '.join(['?'] *(2 + len(self.extra_columns)))})""", + VALUES ({', '.join(['?'] *(2 + len(self.extra_columns)))}) + ON CONFLICT (key) DO UPDATE SET + value = excluded.value + {''.join(f', {column_name} = excluded.{column_name}' for column_name in self.extra_columns.keys())} + """, items_to_write, ) @@ -356,14 +373,15 @@ def mark_dirty(self, key: str) -> None: self._active_object_cache[key] = self._active_object_cache[key][0], True def __iter__(self) -> Iterator[str]: - # Cache should be small, so safe set cast to avoid mutation during iteration - cache_keys = set(self._active_object_cache.keys()) - yield from cache_keys + self.flush() - cursor = self._conn.execute(f"SELECT key FROM {self.tablename}") + # Our active object cache should now be empty, so it's fine to + # just pull from the DB. + cursor = self._conn.execute( + f"SELECT key FROM {self.tablename} ORDER BY rowid ASC" + ) for row in cursor: - if row[0] not in cache_keys: - yield row[0] + yield row[0] def items_snapshot( self, cond_sql: Optional[str] = None diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py index 6cc7d9c50b775d..4ea42d568da635 100644 --- a/metadata-ingestion/src/datahub/utilities/mapping.py +++ b/metadata-ingestion/src/datahub/utilities/mapping.py @@ -292,9 +292,11 @@ def convert_to_aspects(self, operation_map: Dict[str, list]) -> Dict[str, Any]: owner=x.get("urn"), type=x.get("category"), typeUrn=x.get("categoryUrn"), - source=OwnershipSourceClass(type=self.owner_source_type) - if self.owner_source_type - else None, + source=( + OwnershipSourceClass(type=self.owner_source_type) + if self.owner_source_type + else None + ), ) for x in sorted( operation_map[Constants.ADD_OWNER_OPERATION], diff --git a/metadata-ingestion/src/datahub/utilities/ratelimiter.py b/metadata-ingestion/src/datahub/utilities/ratelimiter.py index 3d47d25e14c492..c32041a63417c0 100644 --- a/metadata-ingestion/src/datahub/utilities/ratelimiter.py +++ b/metadata-ingestion/src/datahub/utilities/ratelimiter.py @@ -7,7 +7,6 @@ # Modified version of https://github.com/RazerM/ratelimiter/blob/master/ratelimiter/_sync.py class RateLimiter(AbstractContextManager): - """Provides rate limiting for an operation with a configurable number of requests for a time period. """ diff --git a/metadata-ingestion/src/datahub/utilities/sql_lineage_parser_impl.py b/metadata-ingestion/src/datahub/utilities/sql_lineage_parser_impl.py index d0e38de661dd18..5a8802c7a0a49c 100644 --- a/metadata-ingestion/src/datahub/utilities/sql_lineage_parser_impl.py +++ b/metadata-ingestion/src/datahub/utilities/sql_lineage_parser_impl.py @@ -114,9 +114,11 @@ def get_tables(self) -> List[str]: table_normalized = re.sub( r"^.", "", - str(table) - if not self._use_raw_names - else f"{table.schema.raw_name}.{table.raw_name}", + ( + str(table) + if not self._use_raw_names + else f"{table.schema.raw_name}.{table.raw_name}" + ), ) result.append(str(table_normalized)) diff --git a/metadata-ingestion/tests/integration/mode/mode_mces_golden.json b/metadata-ingestion/tests/integration/mode/mode_mces_golden.json index 2fa9f4ee86a860..a6a685672bda00 100644 --- a/metadata-ingestion/tests/integration/mode/mode_mces_golden.json +++ b/metadata-ingestion/tests/integration/mode/mode_mces_golden.json @@ -132,8 +132,8 @@ "json": { "timestampMillis": 1638860400000, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "viewsCount": 6 } @@ -173,7 +173,9 @@ "charts": [ "urn:li:chart:(mode,f622b9ee725b)" ], - "datasets": [], + "datasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mode,5450544,PROD)" + ], "lastModified": { "created": { "time": 1639169724316, @@ -243,6 +245,89 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,5450544,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "updated_at": "2024-09-02T07:40:44.046Z", + "last_run_id": "3535709679", + "data_source_id": "44763", + "report_imports_count": "2" + }, + "externalUrl": "https://app.mode.com/acryl/datasets/24f66e1701b6", + "name": "Dataset 1", + "description": "### Source Code\n``` sql\n-- Returns first 100 rows from DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY\n SELECT \n\t\tAGE,\n\t\tID,\n\t\tNAME,\n\t\t_FIVETRAN_DELETED,\n\t\t_FIVETRAN_SYNCED\n FROM DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY LIMIT 100;\n\n-- Returns first 100 rows from ETHAN_TEST_DB.PUBLIC.ACCOUNT_PHONE_NUMBER\n SELECT \n\t\tCOMMUNICATION_ACCOUNT_ID,\n\t\tID,\n\t\tMMS_CAPABLE,\n\t\tPHONE_NUMBER,\n\t\tSMS_CAPABLE,\n\t\tSTATUS,\n\t\tSTATUS_TLM,\n\t\tTLM,\n\t\tVOICE_CAPABLE,\n\t\tWHEN_CREATED\n FROM ETHAN_TEST_DB.PUBLIC.ACCOUNT_PHONE_NUMBER LIMIT 100;\n \n \n```\n ", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "mode-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,5450544,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:800cfcb4cec6ad587cafde11a0b0bb4a" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "mode-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,5450544,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Dataset" + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "mode-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,5450544,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "acryl" + }, + { + "id": "urn:li:container:800cfcb4cec6ad587cafde11a0b0bb4a", + "urn": "urn:li:container:800cfcb4cec6ad587cafde11a0b0bb4a" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "mode-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD)", @@ -643,8 +728,8 @@ "json": { "timestampMillis": 1638860400000, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "operationType": "UPDATE", "lastUpdatedTimestamp": 1639177973273 @@ -721,9 +806,9 @@ "json": { "fields": [ { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),payment_date)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),amount)", "schemaField": { - "fieldPath": "payment_date", + "fieldPath": "amount", "nullable": false, "type": { "type": { @@ -743,9 +828,9 @@ } }, { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),amount)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),payment_date)", "schemaField": { - "fieldPath": "amount", + "fieldPath": "payment_date", "nullable": false, "type": { "type": { @@ -943,6 +1028,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,5450544,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "mode-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "query", "entityUrn": "urn:li:query:10149707.34499.1897576958", diff --git a/metadata-ingestion/tests/integration/mode/setup/dataset_24f66e1701b6.json b/metadata-ingestion/tests/integration/mode/setup/dataset_24f66e1701b6.json new file mode 100644 index 00000000000000..4e9cb911ab565d --- /dev/null +++ b/metadata-ingestion/tests/integration/mode/setup/dataset_24f66e1701b6.json @@ -0,0 +1,149 @@ +{ + "token": "24f66e1701b6", + "id": 5450544, + "name": "Dataset 1", + "description": "", + "created_at": "2024-09-02T07:38:43.722Z", + "updated_at": "2024-09-02T07:40:44.026Z", + "published_at": null, + "edited_at": "2024-09-02T07:40:32.668Z", + "type": "DatasetReport", + "last_successful_sync_at": null, + "last_saved_at": "2024-09-02T07:40:32.679Z", + "archived": false, + "space_token": "75737b70402e", + "account_id": 751252, + "account_username": "acryltest", + "public": false, + "manual_run_disabled": false, + "drill_anywhere_enabled": false, + "run_privately": true, + "drilldowns_enabled": false, + "expected_runtime": 0.763795, + "last_successfully_run_at": "2024-09-02T07:40:44.009Z", + "last_run_at": "2024-09-02T07:40:43.185Z", + "last_successful_run_token": "29e56ca29a45", + "query_count": 1, + "max_query_count": 160, + "runs_count": 3, + "schedules_count": 0, + "query_preview": "-- Returns first 100 rows from DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY\n SELECT \n\t\tAGE,\n\t\tID,\n\t\tNAME,\n\t\t_FIVETRAN_DELE", + "view_count": 6, + "thoughtspot_published_at": null, + "_links": { + "self": { + "href": "/api/acryltest/reports/24f66e1701b6" + }, + "web": { + "href": "https://app.mode.com/acryltest/datasets/24f66e1701b6" + }, + "web_edit": { + "href": "/editor/acryltest/datasets/24f66e1701b6" + }, + "account": { + "href": "/api/acryltest" + }, + "report_run": { + "templated": true, + "href": "/api/acryltest/reports/24f66e1701b6/runs/{id}?embed[result]=1" + }, + "space": { + "href": "/api/acryltest/collections/75737b70402e" + }, + "space_links": { + "href": "/api/acryltest/reports/24f66e1701b6/space_links" + }, + "queries": { + "href": "/api/acryltest/reports/24f66e1701b6/queries" + }, + "report_runs": { + "href": "/api/acryltest/reports/24f66e1701b6/runs" + }, + "report_pins": { + "href": "/api/acryltest/reports/24f66e1701b6/pins" + }, + "report_schedules": { + "href": "/api/acryltest/reports/24f66e1701b6/schedules" + }, + "dataset_dependencies": { + "href": "/api/acryltest/datasets/24f66e1701b6/reports" + }, + "last_run": { + "href": "/api/acryltest/reports/24f66e1701b6/runs/29e56ca29a45" + }, + "last_successful_run": { + "href": "/api/acryltest/reports/24f66e1701b6/runs/29e56ca29a45" + }, + "perspective_email_subscription_memberships": { + "href": "/api/acryltest/reports/24f66e1701b6/perspective_email_report_subscription_memberships" + }, + "creator": { + "href": "/api/modeuser" + }, + "report_index_web": { + "href": "/acryltest/spaces/75737b70402e" + } + }, + "_forms": { + "edit": { + "method": "patch", + "action": "/api/acryltest/reports/24f66e1701b6", + "input": { + "report": { + "name": { + "type": "text", + "value": "Dataset_2" + }, + "description": { + "type": "text", + "value": "" + }, + "account_id": { + "type": "text", + "value": 751252 + }, + "space_token": { + "type": "text", + "value": "75737b70402e" + } + } + } + }, + "destroy": { + "method": "delete", + "action": "/api/acryltest/reports/24f66e1701b6" + }, + "archive": { + "method": "patch", + "action": "/api/acryltest/reports/24f66e1701b6/archive" + }, + "unarchive": { + "method": "patch", + "action": "/api/acryltest/reports/24f66e1701b6/unarchive" + }, + "update_settings": { + "method": "patch", + "action": "/api/acryltest/reports/24f66e1701b6/update_settings", + "input": { + "report": { + "manual_run_disabled": { + "type": "select", + "options": [ + true, + false + ], + "value": false + }, + "drill_anywhere_enabled": { + "type": "select", + "options": [ + true, + false + ], + "value": false + } + } + } + } + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mode/setup/dataset_queries_24f66e1701b6.json b/metadata-ingestion/tests/integration/mode/setup/dataset_queries_24f66e1701b6.json new file mode 100644 index 00000000000000..ba3be157786e6f --- /dev/null +++ b/metadata-ingestion/tests/integration/mode/setup/dataset_queries_24f66e1701b6.json @@ -0,0 +1,64 @@ +{ + "_links": { + "self": { + "href": "/api/acryl/reports/24f66e1701b6/queries" + } + }, + "_embedded": { + "queries": [ + { + "id": 19780522, + "token": "9b2f34343531", + "raw_query": "-- Returns first 100 rows from DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY\n SELECT \n\t\tAGE,\n\t\tID,\n\t\tNAME,\n\t\t_FIVETRAN_DELETED,\n\t\t_FIVETRAN_SYNCED\n FROM DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY LIMIT 100;\n\n-- Returns first 100 rows from ETHAN_TEST_DB.PUBLIC.ACCOUNT_PHONE_NUMBER\n SELECT \n\t\tCOMMUNICATION_ACCOUNT_ID,\n\t\tID,\n\t\tMMS_CAPABLE,\n\t\tPHONE_NUMBER,\n\t\tSMS_CAPABLE,\n\t\tSTATUS,\n\t\tSTATUS_TLM,\n\t\tTLM,\n\t\tVOICE_CAPABLE,\n\t\tWHEN_CREATED\n FROM ETHAN_TEST_DB.PUBLIC.ACCOUNT_PHONE_NUMBER LIMIT 100;\n \n ", + "created_at": "2024-09-02T07:38:43.755Z", + "updated_at": "2024-09-02T07:40:44.046Z", + "name": "Query 1", + "last_run_id": 3535709679, + "data_source_id": 44763, + "explorations_count": 0, + "report_imports_count": 2, + "dbt_metric_id": null, + "_links": { + "self": { + "href": "/api/acryl/reports/24f66e1701b6/queries/9b2f34343531" + }, + "report": { + "href": "/api/acryl/reports/24f66e1701b6" + }, + "report_runs": { + "href": "/api/acryl/reports/24f66e1701b6/runs" + }, + "query_runs": { + "href": "/api/acryl/reports/24f66e1701b6/queries/9b2f34343531/runs" + }, + "creator": { + "href": "/api/modeuser" + } + }, + "_forms": { + "edit": { + "method": "patch", + "action": "/api/acryl/reports/24f66e1701b6/queries/9b2f34343531", + "content_type": "application/json", + "input": { + "query": { + "raw_query": { + "type": "text", + "value": "-- Returns first 100 rows from DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY\n SELECT \n\t\tAGE,\n\t\tID,\n\t\tNAME,\n\t\t_FIVETRAN_DELETED,\n\t\t_FIVETRAN_SYNCED\n FROM DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY LIMIT 100;\n\n-- Returns first 100 rows from ETHAN_TEST_DB.PUBLIC.ACCOUNT_PHONE_NUMBER\n SELECT \n\t\tCOMMUNICATION_ACCOUNT_ID,\n\t\tID,\n\t\tMMS_CAPABLE,\n\t\tPHONE_NUMBER,\n\t\tSMS_CAPABLE,\n\t\tSTATUS,\n\t\tSTATUS_TLM,\n\t\tTLM,\n\t\tVOICE_CAPABLE,\n\t\tWHEN_CREATED\n FROM ETHAN_TEST_DB.PUBLIC.ACCOUNT_PHONE_NUMBER LIMIT 100;\n \n " + }, + "name": { + "type": "text", + "value": "Query 1" + }, + "data_source_id": { + "type": "text", + "value": 44763 + } + } + } + } + } + } + ] + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mode/setup/datasets_157933cc1168.json b/metadata-ingestion/tests/integration/mode/setup/datasets_157933cc1168.json new file mode 100644 index 00000000000000..4ca48a84e9110f --- /dev/null +++ b/metadata-ingestion/tests/integration/mode/setup/datasets_157933cc1168.json @@ -0,0 +1,10 @@ +{ + "_links": { + "self": { + "href": "/api/acryltest/collections/157933cc1168/reports" + } + }, + "_embedded": { + "reports": [] + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mode/setup/datasets_75737b70402e.json b/metadata-ingestion/tests/integration/mode/setup/datasets_75737b70402e.json new file mode 100644 index 00000000000000..ffb1bbf521db76 --- /dev/null +++ b/metadata-ingestion/tests/integration/mode/setup/datasets_75737b70402e.json @@ -0,0 +1,149 @@ +{ + "_links": { + "self": { + "href": "/api/acryltest/collections/75737b70402e/reports" + } + }, + "_embedded": { + "reports": [ + { + "account_id": 751252, + "account_username": "acryltest", + "collection_name": "AcrylTest", + "collection_token": "75737b70402e", + "created_at": "2024-09-02T07:38:43.722Z", + "description": "", + "drilldowns_enabled": false, + "edited_at": "2024-09-02T07:40:32.668Z", + "id": 5450544, + "is_sample": false, + "last_run_at": "2024-09-02T07:40:43.185Z", + "last_saved_at": "2024-09-02T07:40:32.679Z", + "last_successful_run_token": "29e56ca29a45", + "last_successful_sync_at": null, + "last_successfully_run_at": "2024-09-02T07:40:44.009Z", + "manual_run_disabled": false, + "max_query_count": 1, + "name": "Dataset 1", + "public": false, + "query_count": 1, + "query_preview": "-- Returns first 100 rows from DATAHUB_COMMUNITY.POSTGRES_PUBLIC.COMPANY\n SELECT \n\t\tAGE,\n\t\tID,\n\t\tNAME,\n\t\t_FIVETRAN_DELE", + "run_privately": true, + "runs_count": 3, + "schedules_count": 0, + "space_token": "75737b70402e", + "switch_view_token": "f213a1bb8f8a", + "token": "24f66e1701b6", + "type": "DatasetReport", + "updated_at": "2024-09-02T07:40:44.026Z", + "view_count": 6, + "thoughtspot_published_at": null, + "_links": { + "account": { + "href": "/api/acryltest" + }, + "creator": { + "href": "/api/modeuser" + }, + "dataset_dependencies": { + "href": "/api/acryltest/datasets/24f66e1701b6/reports" + }, + "last_run": { + "href": "/api/acryltest/reports/24f66e1701b6/runs/29e56ca29a45" + }, + "last_successful_run": { + "href": "/api/acryltest/reports/24f66e1701b6/runs/29e56ca29a45" + }, + "queries": { + "href": "/api/acryltest/reports/24f66e1701b6/queries" + }, + "report_index_web": { + "href": "/acryltest/spaces/75737b70402e" + }, + "report_pins": { + "href": "/api/acryltest/reports/24f66e1701b6/pins" + }, + "report_run": { + "templated": true, + "href": "/api/acryltest/reports/24f66e1701b6/runs/{id}?embed[result]=1" + }, + "report_runs": { + "href": "/api/acryltest/reports/24f66e1701b6/runs" + }, + "report_schedules": { + "href": "/api/acryltest/reports/24f66e1701b6/schedules" + }, + "self": { + "href": "/api/acryltest/reports/24f66e1701b6" + }, + "space": { + "href": "/api/acryltest/collections/75737b70402e" + }, + "space_links": { + "href": "/api/acryltest/reports/24f66e1701b6/space_links" + }, + "web": { + "href": "https://app.mode.com/acryltest/datasets/24f66e1701b6" + }, + "web_edit": { + "href": "/editor/acryltest/datasets/24f66e1701b6" + } + }, + "_forms": { + "destroy": { + "method": "delete", + "action": "/api/acryltest/reports/24f66e1701b6" + }, + "edit": { + "method": "patch", + "action": "/api/acryltest/reports/24f66e1701b6", + "input": { + "report": { + "name": { + "type": "text", + "value": "Dataset_2" + }, + "description": { + "type": "text", + "value": "" + }, + "account_id": { + "type": "text", + "value": 751252 + }, + "space_token": { + "type": "text", + "value": "75737b70402e" + } + } + } + }, + "update_settings": { + "method": "patch", + "action": "/api/acryltest/reports/24f66e1701b6/update_settings", + "input": { + "report": { + "manual_run_disabled": { + "type": "select", + "options": [ + true, + false + ], + "value": false + }, + "drill_anywhere_enabled": { + "type": "select", + "options": [ + true, + false + ], + "value": false + } + } + } + } + } + } + ] + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mode/setup/reports_75737b70402e.json b/metadata-ingestion/tests/integration/mode/setup/reports_75737b70402e.json index 9718967e5e463f..956093a95d8492 100644 --- a/metadata-ingestion/tests/integration/mode/setup/reports_75737b70402e.json +++ b/metadata-ingestion/tests/integration/mode/setup/reports_75737b70402e.json @@ -221,7 +221,34 @@ } } } - } + }, + "imported_datasets": [ + { + "name": "Dataset 1", + "token": "24f66e1701b6", + "_links": { + "report": { + "href": "/api/acryltest/reports/94750a190dc8" + }, + "source_dataset": { + "href": "/api/acryltest/reports/24f66e1701b6" + } + }, + "_forms": { + "refresh": { + "method": "post", + "action": "/api/acryltest/reports/94750a190dc8/runs", + "input": { + "dataset_tokens": [ + { + "token": "24f66e1701b6" + } + ] + } + } + } + } + ] }] } } \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mode/test_mode.py b/metadata-ingestion/tests/integration/mode/test_mode.py index 0346767b05d253..7ea6597460de20 100644 --- a/metadata-ingestion/tests/integration/mode/test_mode.py +++ b/metadata-ingestion/tests/integration/mode/test_mode.py @@ -22,6 +22,10 @@ "https://app.mode.com/api/acryl/reports/9d2da37fa91e/queries/6e26a9f3d4e2/charts": "charts.json", "https://app.mode.com/api/acryl/data_sources": "data_sources.json", "https://app.mode.com/api/acryl/definitions": "definitions.json", + "https://app.mode.com/api/acryl/spaces/157933cc1168/datasets": "datasets_157933cc1168.json", + "https://app.mode.com/api/acryl/spaces/75737b70402e/datasets": "datasets_75737b70402e.json", + "https://app.mode.com/api/acryl/reports/24f66e1701b6": "dataset_24f66e1701b6.json", + "https://app.mode.com/api/acryl/reports/24f66e1701b6/queries": "dataset_queries_24f66e1701b6.json", } RESPONSE_ERROR_LIST = ["https://app.mode.com/api/acryl/spaces/75737b70402e/reports"] diff --git a/metadata-ingestion/tests/integration/s3/test_s3.py b/metadata-ingestion/tests/integration/s3/test_s3.py index b45f1f78fc55a8..54156610c68720 100644 --- a/metadata-ingestion/tests/integration/s3/test_s3.py +++ b/metadata-ingestion/tests/integration/s3/test_s3.py @@ -112,9 +112,9 @@ def s3_populate(pytestconfig, s3_resource, s3_client, bucket_names): bkt.upload_file( full_path, rel_path, # Set content type for `no_extension/small` file to text/csv - ExtraArgs={"ContentType": "text/csv"} - if "." not in rel_path - else {}, + ExtraArgs=( + {"ContentType": "text/csv"} if "." not in rel_path else {} + ), ) s3_client.put_object_tagging( Bucket=bucket_name, diff --git a/metadata-ingestion/tests/performance/bigquery/bigquery_events.py b/metadata-ingestion/tests/performance/bigquery/bigquery_events.py index 0e0bfe78c260fa..bf3d566da8d278 100644 --- a/metadata-ingestion/tests/performance/bigquery/bigquery_events.py +++ b/metadata-ingestion/tests/performance/bigquery/bigquery_events.py @@ -47,9 +47,11 @@ def generate_events( for query in queries: project = ( # Most queries are run in the project of the tables they access table_to_project[ - query.object_modified.name - if query.object_modified - else query.fields_accessed[0].table.name + ( + query.object_modified.name + if query.object_modified + else query.fields_accessed[0].table.name + ) ] if random.random() >= proabability_of_project_mismatch else random.choice(projects) @@ -71,9 +73,11 @@ def generate_events( query=query.text, statementType=random.choice(OPERATION_TYPE_MAP[query.type]), project_id=project, - destinationTable=ref_from_table(query.object_modified, table_to_project) - if query.object_modified - else None, + destinationTable=( + ref_from_table(query.object_modified, table_to_project) + if query.object_modified + else None + ), referencedTables=list( dict.fromkeys( # Preserve order ref_from_table(field.table, table_to_project) @@ -90,9 +94,11 @@ def generate_events( ) ), referencedViews=referencedViews, - payload=dataclasses.asdict(query) - if config.debug_include_full_payloads - else None, + payload=( + dataclasses.asdict(query) + if config.debug_include_full_payloads + else None + ), query_on_view=True if referencedViews else False, ) ) @@ -118,9 +124,11 @@ def generate_events( resource=ref, fieldsRead=list(columns), readReason=random.choice(READ_REASONS), - payload=dataclasses.asdict(query) - if config.debug_include_full_payloads - else None, + payload=( + dataclasses.asdict(query) + if config.debug_include_full_payloads + else None + ), ) ) diff --git a/metadata-ingestion/tests/performance/data_generation.py b/metadata-ingestion/tests/performance/data_generation.py index 9b80d6260d4082..fcff13edf59363 100644 --- a/metadata-ingestion/tests/performance/data_generation.py +++ b/metadata-ingestion/tests/performance/data_generation.py @@ -7,6 +7,7 @@ This is a work in progress, built piecemeal as needed. """ + import random from abc import ABCMeta, abstractmethod from collections import OrderedDict diff --git a/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py b/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py index ee1caf6783ec12..cb3a1c165acdd4 100644 --- a/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py +++ b/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py @@ -120,9 +120,9 @@ def tables(self, schema: Schema) -> Iterable[Table]: updated_at=None, updated_by=None, table_id="", - view_definition=table.definition - if isinstance(table, data_model.View) - else None, + view_definition=( + table.definition if isinstance(table, data_model.View) else None + ), properties={}, ) diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py index 2d43b24e10763c..cafca521ae0148 100644 --- a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py +++ b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py @@ -37,20 +37,22 @@ def make_lineage_aspect( ) for upstream_urn in upstreams ], - fineGrainedLineages=[ - models.FineGrainedLineageClass( - upstreamType=models.FineGrainedLineageUpstreamTypeClass.FIELD_SET, - downstreamType=models.FineGrainedLineageDownstreamTypeClass.FIELD, - upstreams=[ - make_schema_field_urn(upstream_urn, col) - for upstream_urn in upstreams - ], - downstreams=[make_schema_field_urn(dataset_urn, col)], - ) - for col in columns - ] - if include_cll - else None, + fineGrainedLineages=( + [ + models.FineGrainedLineageClass( + upstreamType=models.FineGrainedLineageUpstreamTypeClass.FIELD_SET, + downstreamType=models.FineGrainedLineageDownstreamTypeClass.FIELD, + upstreams=[ + make_schema_field_urn(upstream_urn, col) + for upstream_urn in upstreams + ], + downstreams=[make_schema_field_urn(dataset_urn, col)], + ) + for col in columns + ] + if include_cll + else None + ), ) diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_lineage_via_temp_table_disordered_add.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_lineage_via_temp_table_disordered_add.json similarity index 100% rename from metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_lineage_via_temp_table_disordered_add.json rename to metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_lineage_via_temp_table_disordered_add.json diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py b/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py index c730b4ee35e552..0d21936a74d072 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sql_aggregator.py @@ -579,7 +579,9 @@ def test_create_table_query_mcps(pytestconfig: pytest.Config) -> None: @freeze_time(FROZEN_TIME) -def test_lineage_via_temp_table_disordered_add(pytestconfig: pytest.Config) -> None: +def test_table_lineage_via_temp_table_disordered_add( + pytestconfig: pytest.Config, +) -> None: aggregator = SqlParsingAggregator( platform="redshift", generate_lineage=True, @@ -607,7 +609,8 @@ def test_lineage_via_temp_table_disordered_add(pytestconfig: pytest.Config) -> N mce_helpers.check_goldens_stream( pytestconfig, outputs=mcps, - golden_path=RESOURCE_DIR / "test_lineage_via_temp_table_disordered_add.json", + golden_path=RESOURCE_DIR + / "test_table_lineage_via_temp_table_disordered_add.json", ) diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py index b8a1222125d103..506bfd9c12674a 100644 --- a/metadata-ingestion/tests/unit/test_transform_dataset.py +++ b/metadata-ingestion/tests/unit/test_transform_dataset.py @@ -163,12 +163,14 @@ def create_and_run_test_pipeline( "tests.unit.test_source.FakeSource.get_workunits" ) as mock_getworkunits: mock_getworkunits.return_value = [ - workunit.MetadataWorkUnit( - id=f"test-workunit-mce-{e.proposedSnapshot.urn}", mce=e - ) - if isinstance(e, MetadataChangeEventClass) - else workunit.MetadataWorkUnit( - id=f"test-workunit-mcp-{e.entityUrn}-{e.aspectName}", mcp=e + ( + workunit.MetadataWorkUnit( + id=f"test-workunit-mce-{e.proposedSnapshot.urn}", mce=e + ) + if isinstance(e, MetadataChangeEventClass) + else workunit.MetadataWorkUnit( + id=f"test-workunit-mcp-{e.entityUrn}-{e.aspectName}", mcp=e + ) ) for e in events ] diff --git a/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py b/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py index 06d1cfc7d154d4..f4062f9a911453 100644 --- a/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py +++ b/metadata-ingestion/tests/unit/utilities/test_file_backed_collections.py @@ -186,6 +186,42 @@ def test_file_dict_stores_counter() -> None: assert in_memory_counters[i].most_common(2) == cache[str(i)].most_common(2) +def test_file_dict_ordering() -> None: + """ + We require that FileBackedDict maintains insertion order, similar to Python's + built-in dict. This test makes one of each and validates that they behave the same. + """ + + cache = FileBackedDict[int]( + serializer=str, + deserializer=int, + cache_max_size=1, + ) + data = {} + + num_items = 14 + + for i in range(num_items): + cache[str(i)] = i + data[str(i)] = i + + assert list(cache.items()) == list(data.items()) + + # Try some deletes. + for i in range(3, num_items, 3): + del cache[str(i)] + del data[str(i)] + + assert list(cache.items()) == list(data.items()) + + # And some updates + inserts. + for i in range(2, num_items, 2): + cache[str(i)] = i * 10 + data[str(i)] = i * 10 + + assert list(cache.items()) == list(data.items()) + + @dataclass class Pair: x: int diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 9f5fc109eea7f6..7e72767c08b79c 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -27,6 +27,8 @@ dependencies { implementation externalDependency.guava implementation externalDependency.reflections + // https://mvnrepository.com/artifact/nl.basjes.parse.useragent/yauaa + implementation 'nl.basjes.parse.useragent:yauaa:7.27.0' api(externalDependency.dgraph4j) { exclude group: 'com.google.guava', module: 'guava' diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java index 3ec090a3db3a45..1fba8426317209 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java @@ -181,7 +181,7 @@ public AspectsBatchImplBuilder mcps( mcp, auditStamp, retrieverContext.getAspectRetriever()); } } catch (IllegalArgumentException e) { - log.error("Invalid proposal, skipping and proceeding with batch: " + mcp, e); + log.error("Invalid proposal, skipping and proceeding with batch: {}", mcp, e); return null; } }) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/APIThrottle.java b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/APIThrottle.java new file mode 100644 index 00000000000000..542eb5f3869c01 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/APIThrottle.java @@ -0,0 +1,82 @@ +package com.linkedin.metadata.dao.throttle; + +import static com.linkedin.metadata.dao.throttle.ThrottleType.MANUAL; +import static com.linkedin.metadata.dao.throttle.ThrottleType.MCL_TIMESERIES_LAG; +import static com.linkedin.metadata.dao.throttle.ThrottleType.MCL_VERSIONED_LAG; + +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; +import java.util.Comparator; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import nl.basjes.parse.useragent.UserAgent; +import nl.basjes.parse.useragent.UserAgentAnalyzer; + +public class APIThrottle { + private static final Set AGENT_EXEMPTIONS = Set.of("Browser"); + private static final UserAgentAnalyzer UAA = + UserAgentAnalyzer.newBuilder() + .hideMatcherLoadStats() + .withField(UserAgent.AGENT_CLASS) + .withCache(1000) + .build(); + + private APIThrottle() {} + + /** + * This method is expected to be called on sync ingest requests for both timeseries or versioned + * aspects. + * + *

1. Async requests are never expected to be throttled here. 2. UI requests are not expected + * to be throttled, so we'll try to detect browser vs non-browser activity. 3. Throttling + * exceptions are expected to be caught by the API implementation and converted to a 429 http + * status code + * + * @param opContext the operation context + * @param throttleEvents the throttle state + * @param isTimeseries whether the operation is for timeseries or not (throttled separately) + */ + public static void evaluate( + @Nonnull OperationContext opContext, + @Nullable Set throttleEvents, + boolean isTimeseries) { + + Set eventMatchMaxWaitMs = eventMatchMaxWaitMs(throttleEvents, isTimeseries); + + if (!eventMatchMaxWaitMs.isEmpty() && !isExempt(opContext.getRequestContext())) { + throw new APIThrottleException( + eventMatchMaxWaitMs.stream().max(Comparator.naturalOrder()).orElse(-1L), + "Throttled due to " + throttleEvents); + } + } + + private static boolean isExempt(@Nullable RequestContext requestContext) { + // Exclude internal calls + if (requestContext == null + || requestContext.getUserAgent() == null + || requestContext.getUserAgent().isEmpty()) { + return true; + } + + UserAgent ua = UAA.parse(requestContext.getUserAgent()); + return AGENT_EXEMPTIONS.contains(ua.get(UserAgent.AGENT_CLASS).getValue()); + } + + private static Set eventMatchMaxWaitMs( + @Nullable Set throttleEvents, boolean isTimeseries) { + if (throttleEvents == null) { + return Set.of(); + } + + return throttleEvents.stream() + .map( + e -> + e.getActiveThrottleMaxWaitMs( + Set.of(MANUAL, isTimeseries ? MCL_TIMESERIES_LAG : MCL_VERSIONED_LAG))) + .filter(Objects::nonNull) + .collect(Collectors.toSet()); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/APIThrottleException.java b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/APIThrottleException.java new file mode 100644 index 00000000000000..6f1a5fcd1af220 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/APIThrottleException.java @@ -0,0 +1,20 @@ +package com.linkedin.metadata.dao.throttle; + +import java.util.concurrent.TimeUnit; + +public class APIThrottleException extends RuntimeException { + private final long durationMs; + + public APIThrottleException(long durationMs, String message) { + super(message); + this.durationMs = durationMs; + } + + public long getDurationMs() { + return durationMs; + } + + public long getDurationSeconds() { + return TimeUnit.MILLISECONDS.toSeconds(durationMs); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/NoOpSensor.java b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/NoOpSensor.java new file mode 100644 index 00000000000000..29692ff86d805f --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/NoOpSensor.java @@ -0,0 +1,12 @@ +package com.linkedin.metadata.dao.throttle; + +import java.util.function.Function; +import lombok.EqualsAndHashCode; + +@EqualsAndHashCode +public class NoOpSensor implements ThrottleSensor { + @Override + public ThrottleSensor addCallback(Function callback) { + return this; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleControl.java b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleControl.java new file mode 100644 index 00000000000000..b08c43078e79ba --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleControl.java @@ -0,0 +1,31 @@ +package com.linkedin.metadata.dao.throttle; + +import java.util.function.Consumer; +import javax.annotation.Nullable; +import lombok.AccessLevel; +import lombok.Builder; +import lombok.Getter; +import lombok.Value; +import lombok.experimental.Accessors; + +@Value +@Accessors(fluent = true) +@Builder +public class ThrottleControl { + public static ThrottleControl NONE = ThrottleControl.builder().build(); + + // call this after pause/sleep + @Getter(AccessLevel.NONE) + @Nullable + Consumer callback; + + public boolean hasCallback() { + return callback != null; + } + + public void execute(ThrottleEvent throttleEvent) { + if (callback != null) { + callback.accept(throttleEvent); + } + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleEvent.java b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleEvent.java new file mode 100644 index 00000000000000..d382c87d6b546a --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleEvent.java @@ -0,0 +1,96 @@ +package com.linkedin.metadata.dao.throttle; + +import java.util.Comparator; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Value; +import lombok.experimental.Accessors; + +@Value +@Accessors(fluent = true) +@Builder +public class ThrottleEvent { + public static ThrottleEvent throttle(Map backoffWaitMs) { + return ThrottleEvent.builder() + .backoffWaitMs(backoffWaitMs) + .throttled( + backoffWaitMs.entrySet().stream() + .filter(entry -> entry.getValue() > 0) + .map(entry -> Map.entry(entry.getKey(), true)) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))) + .build(); + } + + public static ThrottleEvent clearThrottle(ThrottleEvent throttleEvent) { + return clearThrottle(throttleEvent.getActiveThrottles()); + } + + public static ThrottleEvent clearThrottle(Set clear) { + return ThrottleEvent.builder() + .throttled( + clear.stream() + .map(t -> Map.entry(t, false)) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))) + .build(); + } + + Map throttled; + Map backoffWaitMs; + + public Set getActiveThrottles() { + return streamTypes().filter(this::isThrottled).collect(Collectors.toSet()); + } + + /** + * Return the suggested wait time in milliseconds given an optional list filter types. + * + * @param filterTypes empty for no filters + * @return suggested wait time in milliseconds, negative if no suggestion is possible, null if no + * wait + */ + @Nullable + public Long getActiveThrottleMaxWaitMs(Set filterTypes) { + Set activeThrottles = + getActiveThrottles().stream() + .filter(a -> filterTypes.isEmpty() || filterTypes.contains(a)) + .collect(Collectors.toSet()); + + if (activeThrottles.isEmpty()) { + return null; + } + + if (!activeThrottles.contains(ThrottleType.MANUAL) && backoffWaitMs != null) { + return getActiveThrottles().stream() + .map(t -> backoffWaitMs.getOrDefault(t, -1L)) + .max(Comparator.naturalOrder()) + .orElse(-1L); + } + + return -1L; + } + + public Set getDisabledThrottles() { + return streamTypes().filter(t -> !isThrottled(t)).collect(Collectors.toSet()); + } + + public boolean isThrottled() { + return (throttled != null && throttled.values().stream().anyMatch(b -> b)) + || (backoffWaitMs != null && backoffWaitMs.values().stream().anyMatch(wait -> wait > 0)); + } + + private boolean isThrottled(ThrottleType throttleType) { + return (throttled != null && throttled.getOrDefault(throttleType, false)) + || (backoffWaitMs != null && backoffWaitMs.getOrDefault(throttleType, 0L) > 0); + } + + private Stream streamTypes() { + return Stream.concat( + throttled != null ? throttled.keySet().stream() : Stream.empty(), + backoffWaitMs != null ? backoffWaitMs.keySet().stream() : Stream.empty()) + .distinct(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleSensor.java b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleSensor.java new file mode 100644 index 00000000000000..d92defe5edbcb7 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleSensor.java @@ -0,0 +1,7 @@ +package com.linkedin.metadata.dao.throttle; + +import java.util.function.Function; + +public interface ThrottleSensor { + ThrottleSensor addCallback(Function callback); +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleType.java b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleType.java new file mode 100644 index 00000000000000..ac6d13a58cd079 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/dao/throttle/ThrottleType.java @@ -0,0 +1,7 @@ +package com.linkedin.metadata.dao.throttle; + +public enum ThrottleType { + MCL_TIMESERIES_LAG, + MCL_VERSIONED_LAG, + MANUAL +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java index 401d40ec177cee..3f0545b6f94a85 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java @@ -6,13 +6,11 @@ import com.linkedin.metadata.entity.ebean.PartitionedStream; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.utils.metrics.MetricUtils; -import io.ebean.Transaction; import java.sql.Timestamp; import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Function; -import java.util.function.Supplier; import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -27,10 +25,10 @@ * aspect is set to 0 for efficient retrieval. In most cases only the latest state of an aspect will * be fetched. See {@link EntityServiceImpl} for more details. * - *

TODO: This interface exposes {@link #runInTransactionWithRetry(Supplier, int)} because {@link - * EntityServiceImpl} concerns itself with batching multiple commands into a single transaction. It - * exposes storage concerns somewhat and it'd be worth looking into ways to move this responsibility - * inside {@link AspectDao} implementations. + *

TODO: This interface exposes {@link #runInTransactionWithRetry(Function, int)} + * (TransactionContext)} because {@link EntityServiceImpl} concerns itself with batching multiple + * commands into a single transaction. It exposes storage concerns somewhat and it'd be worth + * looking into ways to move this responsibility inside {@link AspectDao} implementations. */ public interface AspectDao { String ASPECT_WRITE_COUNT_METRIC_NAME = "aspectWriteCount"; @@ -77,7 +75,7 @@ Map> getLatestAspects( Map> urnAspects, boolean forUpdate); void saveAspect( - @Nullable Transaction tx, + @Nullable TransactionContext txContext, @Nonnull final String urn, @Nonnull final String aspectName, @Nonnull final String aspectMetadata, @@ -89,10 +87,12 @@ void saveAspect( final boolean insert); void saveAspect( - @Nullable Transaction tx, @Nonnull final EntityAspect aspect, final boolean insert); + @Nullable TransactionContext txContext, + @Nonnull final EntityAspect aspect, + final boolean insert); long saveLatestAspect( - @Nullable Transaction tx, + @Nullable TransactionContext txContext, @Nonnull final String urn, @Nonnull final String aspectName, @Nullable final String oldAspectMetadata, @@ -107,7 +107,7 @@ long saveLatestAspect( @Nullable final String newSystemMetadata, final Long nextVersion); - void deleteAspect(@Nullable Transaction tx, @Nonnull final EntityAspect aspect); + void deleteAspect(@Nullable TransactionContext txContext, @Nonnull final EntityAspect aspect); @Nonnull ListResult listUrns( @@ -125,7 +125,7 @@ ListResult listUrns( @Nonnull Stream streamAspects(String entityName, String aspectName); - int deleteUrn(@Nullable Transaction tx, @Nonnull final String urn); + int deleteUrn(@Nullable TransactionContext txContext, @Nonnull final String urn); @Nonnull ListResult listLatestAspectMetadata( @@ -159,11 +159,11 @@ default Map getNextVersions( @Nonnull T runInTransactionWithRetry( - @Nonnull final Function block, final int maxTransactionRetry); + @Nonnull final Function block, final int maxTransactionRetry); @Nonnull default List runInTransactionWithRetry( - @Nonnull final Function block, + @Nonnull final Function block, AspectsBatch batch, final int maxTransactionRetry) { return List.of(runInTransactionWithRetry(block, maxTransactionRetry)); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index fd6ad57c0adf52..69135a8a64805d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -6,6 +6,7 @@ import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; import static com.linkedin.metadata.Constants.SYSTEM_ACTOR; import static com.linkedin.metadata.Constants.UI_SOURCE; +import static com.linkedin.metadata.entity.TransactionContext.DEFAULT_MAX_TRANSACTION_RETRY; import static com.linkedin.metadata.utils.PegasusUtils.constructMCL; import static com.linkedin.metadata.utils.PegasusUtils.getDataTemplateClassFromSchema; import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; @@ -50,6 +51,10 @@ import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.config.PreProcessHooks; +import com.linkedin.metadata.dao.throttle.APIThrottle; +import com.linkedin.metadata.dao.throttle.ThrottleControl; +import com.linkedin.metadata.dao.throttle.ThrottleEvent; +import com.linkedin.metadata.dao.throttle.ThrottleType; import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.ebean.PartitionedStream; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; @@ -79,7 +84,6 @@ import com.linkedin.r2.RemoteInvocationException; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; -import io.ebean.Transaction; import io.opentelemetry.extension.annotations.WithSpan; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; @@ -96,6 +100,7 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; @@ -146,8 +151,6 @@ public class EntityServiceImpl implements EntityService { * As described above, the latest version of an aspect should always take the value 0, with * monotonically increasing version incrementing as usual once the latest version is replaced. */ - private static final int DEFAULT_MAX_TRANSACTION_RETRY = 3; - protected final AspectDao aspectDao; @VisibleForTesting @Getter private final EventProducer producer; @@ -161,6 +164,9 @@ public class EntityServiceImpl implements EntityService { private final Integer ebeanMaxTransactionRetry; private final boolean enableBrowseV2; + @Getter + private final Map, ThrottleEvent> throttleEvents = new ConcurrentHashMap<>(); + public EntityServiceImpl( @Nonnull final AspectDao aspectDao, @Nonnull final EventProducer producer, @@ -196,6 +202,17 @@ public void setUpdateIndicesService(@Nullable SearchIndicesService updateIndices this.updateIndicesService = updateIndicesService; } + public ThrottleControl handleThrottleEvent(ThrottleEvent throttleEvent) { + final Set activeEvents = throttleEvent.getActiveThrottles(); + // store throttle event + throttleEvents.put(activeEvents, throttleEvent); + + return ThrottleControl.builder() + // clear throttle event + .callback(clearThrottle -> throttleEvents.remove(clearThrottle.getDisabledThrottles())) + .build(); + } + @Override public RecordTemplate getLatestAspect( @Nonnull OperationContext opContext, @Nonnull Urn urn, @Nonnull String aspectName) { @@ -771,6 +788,9 @@ public List ingestAspects( return Collections.emptyList(); } + // Handle throttling + APIThrottle.evaluate(opContext, new HashSet<>(throttleEvents.values()), false); + List ingestResults = ingestAspectsToLocalDB(opContext, aspectsBatch, overwrite); @@ -837,7 +857,7 @@ private List ingestAspectsToLocalDB( return aspectDao .runInTransactionWithRetry( - (tx) -> { + (txContext) -> { // Generate default aspects within the transaction (they are re-calculated on retry) AspectsBatch batchWithDefaults = DefaultAspectsUtil.withAdditionalChanges( @@ -852,7 +872,8 @@ private List ingestAspectsToLocalDB( aspectDao.getLatestAspects(urnAspects, true)); // read #2 (potentially) final Map> nextVersions = - EntityUtils.calculateNextVersions(aspectDao, latestAspects, urnAspects); + EntityUtils.calculateNextVersions( + txContext, aspectDao, latestAspects, urnAspects); // 1. Convert patches to full upserts // 2. Run any entity/aspect level hooks @@ -872,7 +893,7 @@ private List ingestAspectsToLocalDB( Map> newNextVersions = EntityUtils.calculateNextVersions( - aspectDao, updatedLatestAspects, updatedItems.getFirst()); + txContext, aspectDao, updatedLatestAspects, updatedItems.getFirst()); // merge updatedNextVersions = AspectsBatch.merge(nextVersions, newNextVersions); } else { @@ -939,7 +960,7 @@ private List ingestAspectsToLocalDB( if (overwrite || latest == null) { result = ingestAspectToLocalDB( - tx, + txContext, item.getUrn(), item.getAspectName(), item.getRecordTemplate(), @@ -973,8 +994,8 @@ private List ingestAspectsToLocalDB( .collect(Collectors.toList()); // commit upserts prior to retention or kafka send, if supported by impl - if (tx != null) { - tx.commitAndContinue(); + if (txContext != null) { + txContext.commitAndContinue(); } long took = ingestToLocalDBTimer.stop(); log.info( @@ -1020,6 +1041,7 @@ private List ingestAspectsToLocalDB( inputBatch, DEFAULT_MAX_TRANSACTION_RETRY) .stream() + .filter(Objects::nonNull) .flatMap(List::stream) .collect(Collectors.toList()); } @@ -1184,6 +1206,9 @@ private Stream ingestTimeseriesProposal( } if (!async) { + // Handle throttling + APIThrottle.evaluate(opContext, new HashSet<>(throttleEvents.values()), true); + // Create default non-timeseries aspects for timeseries aspects List timeseriesKeyAspects = aspectsBatch.getMCPItems().stream() @@ -2209,7 +2234,7 @@ private RollbackResult deleteAspectWithoutMCL( final RollbackResult result = aspectDao.runInTransactionWithRetry( - (tx) -> { + (txContext) -> { Integer additionalRowsDeleted = 0; // 1. Fetch the latest existing version of the aspect. @@ -2282,7 +2307,7 @@ private RollbackResult deleteAspectWithoutMCL( } // 5. Apply deletes and fix up latest row - aspectsToDelete.forEach(aspect -> aspectDao.deleteAspect(tx, aspect)); + aspectsToDelete.forEach(aspect -> aspectDao.deleteAspect(txContext, aspect)); if (survivingAspect != null) { // if there was a surviving aspect, copy its information into the latest row @@ -2300,16 +2325,16 @@ private RollbackResult deleteAspectWithoutMCL( latest .getEntityAspect() .setCreatedFor(survivingAspect.getEntityAspect().getCreatedFor()); - aspectDao.saveAspect(tx, latest.getEntityAspect(), false); + aspectDao.saveAspect(txContext, latest.getEntityAspect(), false); // metrics aspectDao.incrementWriteMetrics( aspectName, 1, latest.getMetadataRaw().getBytes(StandardCharsets.UTF_8).length); - aspectDao.deleteAspect(tx, survivingAspect.getEntityAspect()); + aspectDao.deleteAspect(txContext, survivingAspect.getEntityAspect()); } else { if (isKeyAspect) { if (hardDelete) { // If this is the key aspect, delete the entity entirely. - additionalRowsDeleted = aspectDao.deleteUrn(tx, urn); + additionalRowsDeleted = aspectDao.deleteUrn(txContext, urn); } else if (deleteItem.getEntitySpec().hasAspect(Constants.STATUS_ASPECT_NAME)) { // soft delete by setting status.removed=true (if applicable) final Status statusAspect = new Status(); @@ -2326,7 +2351,7 @@ private RollbackResult deleteAspectWithoutMCL( } } else { // Else, only delete the specific aspect. - aspectDao.deleteAspect(tx, latest.getEntityAspect()); + aspectDao.deleteAspect(txContext, latest.getEntityAspect()); } } @@ -2466,7 +2491,7 @@ private Map getEnvelopedAspects( @Nonnull private UpdateAspectResult ingestAspectToLocalDB( - @Nullable Transaction tx, + @Nullable TransactionContext txContext, @Nonnull final Urn urn, @Nonnull final String aspectName, @Nonnull final RecordTemplate newValue, @@ -2495,7 +2520,7 @@ private UpdateAspectResult ingestAspectToLocalDB( latest.getEntityAspect().setSystemMetadata(RecordUtils.toJsonString(latestSystemMetadata)); log.info("Ingesting aspect with name {}, urn {}", aspectName, urn); - aspectDao.saveAspect(tx, latest.getEntityAspect(), false); + aspectDao.saveAspect(txContext, latest.getEntityAspect(), false); // metrics aspectDao.incrementWriteMetrics( @@ -2518,7 +2543,7 @@ private UpdateAspectResult ingestAspectToLocalDB( String newValueStr = EntityApiUtils.toJsonAspect(newValue); long versionOfOld = aspectDao.saveLatestAspect( - tx, + txContext, urn.toString(), aspectName, latest == null ? null : EntityApiUtils.toJsonAspect(oldValue), diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java index 7842365ce429be..3c4109970e9d0b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java @@ -285,38 +285,51 @@ public static List toSystemAspects( * Use the precalculated next version from system metadata if it exists, otherwise lookup the next * version the normal way from the database * + * @param txContext * @param aspectDao database access * @param latestAspects aspect version 0 with system metadata * @param urnAspects urn/aspects which we need next version information for * @return map of the urn/aspect to the next aspect version */ public static Map> calculateNextVersions( + TransactionContext txContext, AspectDao aspectDao, Map> latestAspects, Map> urnAspects) { - Map> precalculatedVersions = - latestAspects.entrySet().stream() - .map( - entry -> - Map.entry( - entry.getKey(), convertSystemAspectToNextVersionMap(entry.getValue()))) - .filter(entry -> !entry.getValue().isEmpty()) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - - Map> missingAspectVersions = - urnAspects.entrySet().stream() - .flatMap( - entry -> - entry.getValue().stream() - .map(aspectName -> Pair.of(entry.getKey(), aspectName))) - .filter( - urnAspectName -> - !precalculatedVersions - .getOrDefault(urnAspectName.getKey(), Map.of()) - .containsKey(urnAspectName.getValue())) - .collect( - Collectors.groupingBy( - Pair::getKey, Collectors.mapping(Pair::getValue, Collectors.toSet()))); + + final Map> precalculatedVersions; + final Map> missingAspectVersions; + if (txContext.getFailedAttempts() > 2 && txContext.lastExceptionIsDuplicateKey()) { + log.warn( + "Multiple exceptions detected, last exception detected as DuplicateKey, fallback to database max(version)+1"); + precalculatedVersions = Map.of(); + missingAspectVersions = urnAspects; + } else { + precalculatedVersions = + latestAspects.entrySet().stream() + .map( + entry -> + Map.entry( + entry.getKey(), convertSystemAspectToNextVersionMap(entry.getValue()))) + .filter(entry -> !entry.getValue().isEmpty()) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + missingAspectVersions = + urnAspects.entrySet().stream() + .flatMap( + entry -> + entry.getValue().stream() + .map(aspectName -> Pair.of(entry.getKey(), aspectName))) + .filter( + urnAspectName -> + !precalculatedVersions + .getOrDefault(urnAspectName.getKey(), Map.of()) + .containsKey(urnAspectName.getValue())) + .collect( + Collectors.groupingBy( + Pair::getKey, Collectors.mapping(Pair::getValue, Collectors.toSet()))); + } + Map> databaseVersions = missingAspectVersions.isEmpty() ? Map.of() diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/TransactionContext.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/TransactionContext.java new file mode 100644 index 00000000000000..69f2f1c8981c03 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/TransactionContext.java @@ -0,0 +1,69 @@ +package com.linkedin.metadata.entity; + +import io.ebean.DuplicateKeyException; +import io.ebean.Transaction; +import java.util.ArrayList; +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NonNull; +import lombok.experimental.Accessors; +import org.springframework.lang.Nullable; + +/** Wrap the transaction with additional information about the exceptions during retry. */ +@Data +@AllArgsConstructor +@Accessors(fluent = true) +public class TransactionContext { + public static final int DEFAULT_MAX_TRANSACTION_RETRY = 3; + + public static TransactionContext empty() { + return empty(DEFAULT_MAX_TRANSACTION_RETRY); + } + + public static TransactionContext empty(@Nullable Integer maxRetries) { + return empty(null, maxRetries == null ? DEFAULT_MAX_TRANSACTION_RETRY : maxRetries); + } + + public static TransactionContext empty(Transaction tx, int maxRetries) { + return new TransactionContext(tx, maxRetries, new ArrayList<>()); + } + + @Nullable private Transaction tx; + private int maxRetries; + @NonNull private List exceptions; + + public TransactionContext success() { + exceptions.clear(); + return this; + } + + public TransactionContext addException(RuntimeException e) { + exceptions.add(e); + return this; + } + + public int getFailedAttempts() { + return exceptions.size(); + } + + @Nullable + public RuntimeException lastException() { + return exceptions.isEmpty() ? null : exceptions.get(exceptions.size() - 1); + } + + public boolean lastExceptionIsDuplicateKey() { + return lastException() instanceof DuplicateKeyException; + } + + public boolean shouldAttemptRetry() { + return exceptions.size() <= maxRetries; + } + + public void commitAndContinue() { + if (tx != null) { + tx.commitAndContinue(); + } + success(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java index 51f898d3122af3..9e7387947a9547 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java @@ -29,13 +29,13 @@ import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.entity.EntityAspectIdentifier; import com.linkedin.metadata.entity.ListResult; +import com.linkedin.metadata.entity.TransactionContext; import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.ebean.PartitionedStream; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.query.ExtraInfo; import com.linkedin.metadata.query.ExtraInfoArray; import com.linkedin.metadata.query.ListResultMetadata; -import io.ebean.Transaction; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.sql.Timestamp; @@ -187,7 +187,7 @@ private Map getMaxVersions( @Override public void saveAspect( - @Nullable Transaction tx, @Nonnull EntityAspect aspect, final boolean insert) { + @Nullable TransactionContext txContext, @Nonnull EntityAspect aspect, final boolean insert) { validateConnection(); SimpleStatement statement = generateSaveStatement(aspect, insert); _cqlSession.execute(statement); @@ -287,23 +287,21 @@ public ListResult listAspectMetadata( @Override @Nonnull public T runInTransactionWithRetry( - @Nonnull final Function block, final int maxTransactionRetry) { + @Nonnull final Function block, final int maxTransactionRetry) { validateConnection(); - int retryCount = 0; - Exception lastException; - + TransactionContext txContext = TransactionContext.empty(maxTransactionRetry); do { try { // TODO: Try to bend this code to make use of Cassandra batches. This method is called from // single-urn operations, so perf should not suffer much - return block.apply(null); + return block.apply(txContext); } catch (DriverException exception) { - lastException = exception; + txContext.addException(exception); } - } while (++retryCount <= maxTransactionRetry); + } while (txContext.shouldAttemptRetry()); throw new RetryLimitReached( - "Failed to add after " + maxTransactionRetry + " retries", lastException); + "Failed to add after " + maxTransactionRetry + " retries", txContext.lastException()); } private ListResult toListResult( @@ -368,7 +366,8 @@ private static AuditStamp toAuditStamp(@Nonnull final EntityAspect aspect) { } @Override - public void deleteAspect(@Nullable Transaction tx, @Nonnull final EntityAspect aspect) { + public void deleteAspect( + @Nullable TransactionContext txContext, @Nonnull final EntityAspect aspect) { validateConnection(); SimpleStatement ss = deleteFrom(CassandraAspect.TABLE_NAME) @@ -385,7 +384,7 @@ public void deleteAspect(@Nullable Transaction tx, @Nonnull final EntityAspect a } @Override - public int deleteUrn(@Nullable Transaction tx, @Nonnull final String urn) { + public int deleteUrn(@Nullable TransactionContext txContext, @Nonnull final String urn) { validateConnection(); SimpleStatement ss = deleteFrom(CassandraAspect.TABLE_NAME) @@ -569,7 +568,7 @@ public Map> getNextVersions(Map> u @Override public long saveLatestAspect( - @Nullable Transaction tx, + @Nullable TransactionContext txContext, @Nonnull final String urn, @Nonnull final String aspectName, @Nullable final String oldAspectMetadata, @@ -675,7 +674,7 @@ public void setWritable(boolean canWrite) { @Override public void saveAspect( - @Nullable Transaction tx, + @Nullable TransactionContext txContext, @Nonnull final String urn, @Nonnull final String aspectName, @Nonnull final String aspectMetadata, @@ -698,7 +697,7 @@ public void saveAspect( actor, impersonator); - saveAspect(tx, aspect, insert); + saveAspect(txContext, aspect, insert); // metrics incrementWriteMetrics(aspectName, 1, aspectMetadata.getBytes(StandardCharsets.UTF_8).length); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java index 93c06b9236d501..4304be1aa2a00a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java @@ -19,6 +19,7 @@ import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.entity.EntityAspectIdentifier; import com.linkedin.metadata.entity.ListResult; +import com.linkedin.metadata.entity.TransactionContext; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.models.AspectSpec; @@ -143,7 +144,7 @@ private boolean validateConnection() { @Override public long saveLatestAspect( - @Nullable Transaction tx, + @Nullable TransactionContext txContext, @Nonnull final String urn, @Nonnull final String aspectName, @Nullable final String oldAspectMetadata, @@ -167,7 +168,7 @@ public long saveLatestAspect( if (oldAspectMetadata != null && oldTime != null) { largestVersion = nextVersion; saveAspect( - tx, + txContext, urn, aspectName, oldAspectMetadata, @@ -181,7 +182,7 @@ public long saveLatestAspect( // Save newValue as the latest version (v0) saveAspect( - tx, + txContext, urn, aspectName, newAspectMetadata, @@ -197,7 +198,7 @@ public long saveLatestAspect( @Override public void saveAspect( - @Nullable Transaction tx, + @Nullable TransactionContext txContext, @Nonnull final String urn, @Nonnull final String aspectName, @Nonnull final String aspectMetadata, @@ -220,23 +221,27 @@ public void saveAspect( aspect.setCreatedFor(impersonator); } - saveEbeanAspect(tx, aspect, insert); + saveEbeanAspect(txContext, aspect, insert); } @Override public void saveAspect( - @Nullable Transaction tx, @Nonnull final EntityAspect aspect, final boolean insert) { + @Nullable TransactionContext txContext, + @Nonnull final EntityAspect aspect, + final boolean insert) { EbeanAspectV2 ebeanAspect = EbeanAspectV2.fromEntityAspect(aspect); - saveEbeanAspect(tx, ebeanAspect, insert); + saveEbeanAspect(txContext, ebeanAspect, insert); } private void saveEbeanAspect( - @Nullable Transaction tx, @Nonnull final EbeanAspectV2 ebeanAspect, final boolean insert) { + @Nullable TransactionContext txContext, + @Nonnull final EbeanAspectV2 ebeanAspect, + final boolean insert) { validateConnection(); if (insert) { - _server.insert(ebeanAspect, tx); + _server.insert(ebeanAspect, txContext.tx()); } else { - _server.update(ebeanAspect, tx); + _server.update(ebeanAspect, txContext.tx()); } } @@ -304,20 +309,21 @@ public EntityAspect getAspect(@Nonnull final EntityAspectIdentifier key) { } @Override - public void deleteAspect(@Nullable Transaction tx, @Nonnull final EntityAspect aspect) { + public void deleteAspect( + @Nullable TransactionContext txContext, @Nonnull final EntityAspect aspect) { validateConnection(); EbeanAspectV2 ebeanAspect = EbeanAspectV2.fromEntityAspect(aspect); - _server.delete(ebeanAspect, tx); + _server.delete(ebeanAspect, txContext.tx()); } @Override - public int deleteUrn(@Nullable Transaction tx, @Nonnull final String urn) { + public int deleteUrn(@Nullable TransactionContext txContext, @Nonnull final String urn) { validateConnection(); return _server .createQuery(EbeanAspectV2.class) .where() .eq(EbeanAspectV2.URN_COLUMN, urn) - .delete(tx); + .delete(txContext.tx()); } @Override @@ -658,14 +664,14 @@ public ListResult listLatestAspectMetadata( @Override @Nonnull public T runInTransactionWithRetry( - @Nonnull final Function block, final int maxTransactionRetry) { + @Nonnull final Function block, final int maxTransactionRetry) { return runInTransactionWithRetry(block, null, maxTransactionRetry).get(0); } @Override @Nonnull public List runInTransactionWithRetry( - @Nonnull final Function block, + @Nonnull final Function block, @Nullable AspectsBatch batch, final int maxTransactionRetry) { @@ -720,13 +726,12 @@ public List runInTransactionWithRetry( @Nonnull public T runInTransactionWithRetryUnlocked( - @Nonnull final Function block, + @Nonnull final Function block, @Nullable AspectsBatch batch, final int maxTransactionRetry) { validateConnection(); - int retryCount = 0; - Exception lastException = null; + TransactionContext transactionContext = TransactionContext.empty(maxTransactionRetry); T result = null; do { @@ -734,9 +739,8 @@ public T runInTransactionWithRetryUnlocked( _server.beginTransaction( TxScope.requiresNew().setIsolation(TxIsolation.REPEATABLE_READ))) { transaction.setBatchMode(true); - result = block.apply(transaction); + result = block.apply(transactionContext.tx(transaction)); transaction.commit(); - lastException = null; break; } catch (PersistenceException exception) { if (exception instanceof DuplicateKeyException) { @@ -749,20 +753,21 @@ public T runInTransactionWithRetryUnlocked( log.warn( "Skipping DuplicateKeyException retry since aspect is the key aspect. {}", batch.getUrnAspectsMap().keySet()); - continue; + break; } } MetricUtils.counter(MetricRegistry.name(this.getClass(), "txFailed")).inc(); log.warn("Retryable PersistenceException: {}", exception.getMessage()); - lastException = exception; + transactionContext.addException(exception); } - } while (++retryCount <= maxTransactionRetry); + } while (transactionContext.shouldAttemptRetry()); - if (lastException != null) { + if (transactionContext.lastException() != null) { MetricUtils.counter(MetricRegistry.name(this.getClass(), "txFailedAfterRetries")).inc(); throw new RetryLimitReached( - "Failed to add after " + maxTransactionRetry + " retries", lastException); + "Failed to add after " + maxTransactionRetry + " retries", + transactionContext.lastException()); } return result; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index 84fcc2c0a0f911..bae1d6ce92ece7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -711,10 +711,7 @@ private LineageSearchEntity buildLineageSearchEntity( .getOperationContextConfig() .getViewAuthorizationConfiguration() .isEnabled()) { - return canViewEntity( - opContext.getSessionAuthentication().getActor().toUrnStr(), - opContext.getAuthorizerContext().getAuthorizer(), - urn); + return canViewEntity(opContext, urn); } return true; })) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java index 9905d5e7790ade..e66b12db891df8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java @@ -257,7 +257,9 @@ public BrowseResult browse( entityName, path, filters, from, size)); return esBrowseDAO.browse( opContext.withSearchFlags( - flags -> applyDefaultSearchFlags(flags, null, DEFAULT_SERVICE_SEARCH_FLAGS)), + flags -> + applyDefaultSearchFlags(flags, null, DEFAULT_SERVICE_SEARCH_FLAGS) + .setFulltext(true)), entityName, path, filters, @@ -278,7 +280,9 @@ public BrowseResultV2 browseV2( return esBrowseDAO.browseV2( opContext.withSearchFlags( - flags -> applyDefaultSearchFlags(flags, null, DEFAULT_SERVICE_SEARCH_FLAGS)), + flags -> + applyDefaultSearchFlags(flags, null, DEFAULT_SERVICE_SEARCH_FLAGS) + .setFulltext(true)), entityName, path, filter, @@ -300,7 +304,9 @@ public BrowseResultV2 browseV2( return esBrowseDAO.browseV2( opContext.withSearchFlags( - flags -> applyDefaultSearchFlags(flags, input, DEFAULT_SERVICE_SEARCH_FLAGS)), + flags -> + applyDefaultSearchFlags(flags, input, DEFAULT_SERVICE_SEARCH_FLAGS) + .setFulltext(true)), entityNames, path, filter, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java index b55418d12c7c29..9d4980f6f37a29 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java @@ -23,6 +23,7 @@ import com.linkedin.metadata.models.annotation.SearchableAnnotation; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler; import com.linkedin.metadata.search.utils.ESUtils; import com.linkedin.metadata.search.utils.SearchUtils; @@ -69,6 +70,7 @@ public class ESBrowseDAO { private final RestHighLevelClient client; @Nonnull private final SearchConfiguration searchConfiguration; @Nullable private final CustomSearchConfiguration customSearchConfiguration; + @Nonnull private final QueryFilterRewriteChain queryFilterRewriteChain; private static final String BROWSE_PATH = "browsePaths"; private static final String BROWSE_PATH_DEPTH = "browsePaths.length"; @@ -85,7 +87,7 @@ public class ESBrowseDAO { private static final SearchFlags DEFAULT_BROWSE_SEARCH_FLAGS = new SearchFlags() - .setFulltext(false) + .setFulltext(true) .setSkipHighlighting(true) .setGetSuggestions(false) .setIncludeSoftDeleted(false) @@ -607,7 +609,8 @@ private QueryBuilder buildQueryStringV2( EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(entityName); QueryBuilder query = - SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpec, searchConfiguration, customSearchConfiguration, queryFilterRewriteChain) .getQuery( finalOpContext, input, @@ -623,7 +626,7 @@ private QueryBuilder buildQueryStringV2( queryBuilder.filter( SearchRequestHandler.getFilterQuery( - finalOpContext, filter, entitySpec.getSearchableFieldTypes())); + finalOpContext, filter, entitySpec.getSearchableFieldTypes(), queryFilterRewriteChain)); return queryBuilder; } @@ -643,7 +646,11 @@ private QueryBuilder buildQueryStringBrowseAcrossEntities( final BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery(); QueryBuilder query = - SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpecs, + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain) .getQuery( finalOpContext, input, @@ -669,7 +676,8 @@ private QueryBuilder buildQueryStringBrowseAcrossEntities( return set1; })); queryBuilder.filter( - SearchRequestHandler.getFilterQuery(finalOpContext, filter, searchableFields)); + SearchRequestHandler.getFilterQuery( + finalOpContext, filter, searchableFields, queryFilterRewriteChain)); return queryBuilder; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index cb342794aff585..d6329ba75d4282 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -20,6 +20,7 @@ import com.linkedin.metadata.search.FilterValueArray; import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.AggregationQueryBuilder; import com.linkedin.metadata.search.elasticsearch.query.request.AutocompleteRequestHandler; import com.linkedin.metadata.search.elasticsearch.query.request.SearchAfterWrapper; @@ -76,6 +77,7 @@ public class ESSearchDAO { private final String elasticSearchImplementation; @Nonnull private final SearchConfiguration searchConfiguration; @Nullable private final CustomSearchConfiguration customSearchConfiguration; + @Nonnull private final QueryFilterRewriteChain queryFilterRewriteChain; public long docCount(@Nonnull OperationContext opContext, @Nonnull String entityName) { return docCount(opContext, entityName, null); @@ -88,7 +90,10 @@ public long docCount( new CountRequest(opContext.getSearchContext().getIndexConvention().getIndexName(entitySpec)) .query( SearchRequestHandler.getFilterQuery( - opContext, filter, entitySpec.getSearchableFieldTypes())); + opContext, + filter, + entitySpec.getSearchableFieldTypes(), + queryFilterRewriteChain)); try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "docCount").time()) { return client.count(countRequest, RequestOptions.DEFAULT).getCount(); } catch (IOException e) { @@ -115,7 +120,10 @@ private SearchResult executeAndExtract( return transformIndexIntoEntityName( opContext.getSearchContext().getIndexConvention(), SearchRequestHandler.getBuilder( - entitySpec, searchConfiguration, customSearchConfiguration) + entitySpec, + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain) .extractResult(opContext, searchResponse, filter, from, size)); } catch (Exception e) { log.error("Search query failed", e); @@ -212,7 +220,10 @@ private ScrollResult executeAndExtract( return transformIndexIntoEntityName( opContext.getSearchContext().getIndexConvention(), SearchRequestHandler.getBuilder( - entitySpecs, searchConfiguration, customSearchConfiguration) + entitySpecs, + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain) .extractScrollResult( opContext, searchResponse, filter, keepAlive, size, supportsPointInTime())); } catch (Exception e) { @@ -255,7 +266,11 @@ public SearchResult search( Filter transformedFilters = transformFilterForEntities(postFilters, indexConvention); // Step 1: construct the query final SearchRequest searchRequest = - SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpecs, + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain) .getSearchRequest( opContext, finalInput, transformedFilters, sortCriteria, from, size, facets); searchRequest.indices( @@ -288,7 +303,8 @@ public SearchResult filter( EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(entityName); Filter transformedFilters = transformFilterForEntities(filters, indexConvention); final SearchRequest searchRequest = - SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpec, searchConfiguration, customSearchConfiguration, queryFilterRewriteChain) .getFilterRequest(opContext, transformedFilters, sortCriteria, from, size); searchRequest.indices(indexConvention.getIndexName(entitySpec)); @@ -321,7 +337,8 @@ public AutoCompleteResult autoComplete( EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(entityName); IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); AutocompleteRequestHandler builder = - AutocompleteRequestHandler.getBuilder(entitySpec, customSearchConfiguration); + AutocompleteRequestHandler.getBuilder( + entitySpec, customSearchConfiguration, queryFilterRewriteChain); SearchRequest req = builder.getSearchRequest( opContext, @@ -366,7 +383,11 @@ public Map aggregateByValue( } IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); final SearchRequest searchRequest = - SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) + SearchRequestHandler.getBuilder( + entitySpecs, + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain) .getAggregationRequest( opContext, field, @@ -481,7 +502,7 @@ private SearchRequest getScrollRequest( } return SearchRequestHandler.getBuilder( - entitySpecs, searchConfiguration, customSearchConfiguration) + entitySpecs, searchConfiguration, customSearchConfiguration, queryFilterRewriteChain) .getSearchRequest( opContext, finalInput, postFilters, sortCriteria, sort, pitId, keepAlive, size, facets); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java new file mode 100644 index 00000000000000..800d59bacc1d8d --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java @@ -0,0 +1,239 @@ +package com.linkedin.metadata.search.elasticsearch.query.filter; + +import static com.linkedin.metadata.search.utils.QueryUtils.EMPTY_FILTER; +import static com.linkedin.metadata.search.utils.QueryUtils.newCriterion; +import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.models.graph.Edge; +import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.metadata.search.utils.QueryUtils; +import io.datahubproject.metadata.context.OperationContext; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.TermsQueryBuilder; + +@Slf4j +public abstract class BaseQueryFilterRewriter implements QueryFilterRewriter { + + protected T expandUrnsByGraph( + @Nonnull OperationContext opContext, + T queryBuilder, + List relationshipTypes, + RelationshipDirection relationshipDirection, + int pageSize, + int limit) { + + if (matchTermsQueryFieldName(queryBuilder, getRewriterFieldNames())) { + return (T) + expandTerms( + opContext, + (TermsQueryBuilder) queryBuilder, + relationshipTypes, + relationshipDirection, + pageSize, + limit); + } else if (queryBuilder instanceof BoolQueryBuilder) { + return (T) + handleNestedFilters( + opContext, + (BoolQueryBuilder) queryBuilder, + relationshipTypes, + relationshipDirection, + pageSize, + limit); + } + return queryBuilder; + } + + /** + * The assumption here is that the input query builder is part of the `filter` of a parent query + * builder + * + * @param boolQueryBuilder bool query builder that is part of a filter + * @return terms query builders needing exp + */ + private BoolQueryBuilder handleNestedFilters( + OperationContext opContext, + BoolQueryBuilder boolQueryBuilder, + List relationshipTypes, + RelationshipDirection relationshipDirection, + int pageSize, + int limit) { + + List filterQueryBuilders = + boolQueryBuilder.filter().stream() + .map( + qb -> + expandUrnsByGraph( + opContext, qb, relationshipTypes, relationshipDirection, pageSize, limit)) + .collect(Collectors.toList()); + List shouldQueryBuilders = + boolQueryBuilder.should().stream() + .map( + qb -> + expandUrnsByGraph( + opContext, qb, relationshipTypes, relationshipDirection, pageSize, limit)) + .collect(Collectors.toList()); + List mustQueryBuilders = + boolQueryBuilder.must().stream() + .map( + qb -> + expandUrnsByGraph( + opContext, qb, relationshipTypes, relationshipDirection, pageSize, limit)) + .collect(Collectors.toList()); + List mustNotQueryBuilders = + boolQueryBuilder.mustNot().stream() + .map( + qb -> + expandUrnsByGraph( + opContext, qb, relationshipTypes, relationshipDirection, pageSize, limit)) + .collect(Collectors.toList()); + + BoolQueryBuilder expandedQueryBuilder = QueryBuilders.boolQuery(); + filterQueryBuilders.forEach(expandedQueryBuilder::filter); + shouldQueryBuilders.forEach(expandedQueryBuilder::should); + mustQueryBuilders.forEach(expandedQueryBuilder::must); + mustNotQueryBuilders.forEach(expandedQueryBuilder::mustNot); + expandedQueryBuilder.queryName(boolQueryBuilder.queryName()); + expandedQueryBuilder.adjustPureNegative(boolQueryBuilder.adjustPureNegative()); + expandedQueryBuilder.minimumShouldMatch(boolQueryBuilder.minimumShouldMatch()); + expandedQueryBuilder.boost(boolQueryBuilder.boost()); + + return expandedQueryBuilder; + } + + /** + * Expand URNs by graph walk + * + * @param opContext context + * @param termsQueryBuilder initial terms query builder + * @param relationshipTypes relationship to walk + * @param relationshipDirection direction to walk + * @param pageSize pagination size + * @param limit max results + * @return updated query builder with expanded terms + */ + private static QueryBuilder expandTerms( + OperationContext opContext, + TermsQueryBuilder termsQueryBuilder, + List relationshipTypes, + RelationshipDirection relationshipDirection, + int pageSize, + int limit) { + Set queryUrns = + termsQueryBuilder.values().stream() + .map(urnObj -> UrnUtils.getUrn(urnObj.toString())) + .collect(Collectors.toSet()); + Set expandedUrns = new HashSet<>(queryUrns); + + if (!queryUrns.isEmpty()) { + + scrollGraph( + opContext.getRetrieverContext().get().getGraphRetriever(), + queryUrns, + relationshipTypes, + relationshipDirection, + expandedUrns, + pageSize, + limit); + + return expandTermsQueryUrnValues(termsQueryBuilder, expandedUrns); + } + + return termsQueryBuilder; + } + + private static boolean matchTermsQueryFieldName( + QueryBuilder queryBuilder, Set fieldNames) { + if (queryBuilder instanceof TermsQueryBuilder) { + return fieldNames.stream() + .anyMatch(fieldName -> fieldName.equals(((TermsQueryBuilder) queryBuilder).fieldName())); + } + return false; + } + + private static TermsQueryBuilder expandTermsQueryUrnValues( + TermsQueryBuilder termsQueryBuilder, Set values) { + return QueryBuilders.termsQuery( + termsQueryBuilder.fieldName(), values.stream().map(Urn::toString).sorted().toArray()) + .queryName(termsQueryBuilder.queryName()) + .boost(termsQueryBuilder.boost()); + } + + private static void scrollGraph( + @Nonnull GraphRetriever graphRetriever, + @Nonnull Set queryUrns, + List relationshipTypes, + RelationshipDirection relationshipDirection, + @Nonnull Set visitedUrns, + int pageSize, + int limit) { + + List entityTypes = + queryUrns.stream().map(Urn::getEntityType).distinct().collect(Collectors.toList()); + List queryUrnStrs = queryUrns.stream().map(Urn::toString).collect(Collectors.toList()); + + Set nextUrns = new HashSet<>(); + + Supplier earlyExitCriteria = + () -> (queryUrns.size() + visitedUrns.size() + nextUrns.size()) >= limit; + + Function consumer = + result -> { + if (result != null) { + // track next hop urns + nextUrns.addAll( + result.getEntities().stream() + .map(e -> UrnUtils.getUrn(e.asRelatedEntity().getUrn())) + .filter(urn -> !visitedUrns.contains(urn)) + .collect(Collectors.toSet())); + } + + // exit early if we have enough + return earlyExitCriteria.get(); + }; + + graphRetriever.consumeRelatedEntities( + consumer, + entityTypes, + QueryUtils.newDisjunctiveFilter(newCriterion("urn", queryUrnStrs)), + entityTypes, + EMPTY_FILTER, + relationshipTypes, + newRelationshipFilter(EMPTY_FILTER, relationshipDirection), + Edge.EDGE_SORT_CRITERION, + pageSize, + null, + null); + + // mark visited + visitedUrns.addAll(queryUrns); + + if (earlyExitCriteria.get()) { + visitedUrns.addAll(nextUrns); + } else if (!nextUrns.isEmpty()) { + // next hop + scrollGraph( + graphRetriever, + nextUrns, + relationshipTypes, + relationshipDirection, + visitedUrns, + pageSize, + limit); + } + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/ContainerExpansionRewriter.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/ContainerExpansionRewriter.java new file mode 100644 index 00000000000000..ca2b67ad32f643 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/ContainerExpansionRewriter.java @@ -0,0 +1,85 @@ +package com.linkedin.metadata.search.elasticsearch.query.filter; + +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.AUTOCOMPLETE; +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.FULLTEXT_SEARCH; +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.STRUCTURED_SEARCH; + +import com.linkedin.metadata.config.search.QueryFilterRewriterConfiguration; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import java.util.Set; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; +import org.opensearch.index.query.QueryBuilder; + +@Builder +public class ContainerExpansionRewriter extends BaseQueryFilterRewriter { + + @Getter + private final Set rewriterSearchTypes = + Set.of(AUTOCOMPLETE, FULLTEXT_SEARCH, STRUCTURED_SEARCH); + + @Builder.Default private Condition defaultCondition = Condition.DESCENDANTS_INCL; + + @Nonnull private final QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration config; + + @Nonnull + @Override + public Set getRewriterFieldNames() { + return Set.of("container.keyword"); + } + + @Override + public T rewrite( + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriterContext rewriterContext, + @Nullable T filterQuery) { + + if (filterQuery != null && isQueryTimeEnabled(rewriterContext)) { + switch (rewriterContext.getCondition() == null + ? defaultCondition + : rewriterContext.getCondition()) { + case DESCENDANTS_INCL: + return expandUrnsByGraph( + opContext, + filterQuery, + List.of("IsPartOf"), + RelationshipDirection.INCOMING, + config.getPageSize(), + config.getLimit()); + case ANCESTORS_INCL: + return expandUrnsByGraph( + opContext, + filterQuery, + List.of("IsPartOf"), + RelationshipDirection.OUTGOING, + config.getPageSize(), + config.getLimit()); + default: + // UNDIRECTED doesn't work at the graph service layer + // RelationshipDirection.UNDIRECTED; + T descendantQuery = + expandUrnsByGraph( + opContext, + filterQuery, + List.of("IsPartOf"), + RelationshipDirection.INCOMING, + config.getPageSize(), + config.getLimit()); + return expandUrnsByGraph( + opContext, + descendantQuery, + List.of("IsPartOf"), + RelationshipDirection.OUTGOING, + config.getPageSize(), + config.getLimit()); + } + } + + return filterQuery; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/DomainExpansionRewriter.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/DomainExpansionRewriter.java new file mode 100644 index 00000000000000..fbe8337d6e5998 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/DomainExpansionRewriter.java @@ -0,0 +1,85 @@ +package com.linkedin.metadata.search.elasticsearch.query.filter; + +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.AUTOCOMPLETE; +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.FULLTEXT_SEARCH; +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.STRUCTURED_SEARCH; + +import com.linkedin.metadata.config.search.QueryFilterRewriterConfiguration; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import java.util.Set; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; +import org.opensearch.index.query.QueryBuilder; + +@Builder +public class DomainExpansionRewriter extends BaseQueryFilterRewriter { + + @Getter + private final Set rewriterSearchTypes = + Set.of(AUTOCOMPLETE, FULLTEXT_SEARCH, STRUCTURED_SEARCH); + + @Builder.Default private Condition defaultCondition = Condition.DESCENDANTS_INCL; + + @Nonnull private final QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration config; + + @Nonnull + @Override + public Set getRewriterFieldNames() { + return Set.of("domains.keyword"); + } + + @Override + public T rewrite( + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriterContext rewriterContext, + @Nullable T filterQuery) { + + if (filterQuery != null && isQueryTimeEnabled(rewriterContext)) { + switch (rewriterContext.getCondition() == null + ? defaultCondition + : rewriterContext.getCondition()) { + case DESCENDANTS_INCL: + return expandUrnsByGraph( + opContext, + filterQuery, + List.of("IsPartOf"), + RelationshipDirection.INCOMING, + config.getPageSize(), + config.getLimit()); + case ANCESTORS_INCL: + return expandUrnsByGraph( + opContext, + filterQuery, + List.of("IsPartOf"), + RelationshipDirection.OUTGOING, + config.getPageSize(), + config.getLimit()); + default: + // UNDIRECTED doesn't work at the graph service layer + // RelationshipDirection.UNDIRECTED; + T descendantQuery = + expandUrnsByGraph( + opContext, + filterQuery, + List.of("IsPartOf"), + RelationshipDirection.INCOMING, + config.getPageSize(), + config.getLimit()); + return expandUrnsByGraph( + opContext, + descendantQuery, + List.of("IsPartOf"), + RelationshipDirection.OUTGOING, + config.getPageSize(), + config.getLimit()); + } + } + + return filterQuery; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriteChain.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriteChain.java new file mode 100644 index 00000000000000..48fc5c0625e338 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriteChain.java @@ -0,0 +1,32 @@ +package com.linkedin.metadata.search.elasticsearch.query.filter; + +import io.datahubproject.metadata.context.OperationContext; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import org.opensearch.index.query.QueryBuilder; + +public class QueryFilterRewriteChain { + public static final QueryFilterRewriteChain EMPTY = new QueryFilterRewriteChain(List.of()); + private final List filterRewriters; + + public static QueryFilterRewriteChain of(@Nonnull QueryFilterRewriter... filters) { + return new QueryFilterRewriteChain(Arrays.stream(filters).collect(Collectors.toList())); + } + + public QueryFilterRewriteChain(List filterRewriters) { + this.filterRewriters = filterRewriters; + } + + public T rewrite( + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriterContext rewriterContext, + @Nullable T filterQuery) { + for (QueryFilterRewriter queryFilterRewriter : filterRewriters) { + filterQuery = queryFilterRewriter.rewrite(opContext, rewriterContext, filterQuery); + } + return filterQuery; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriter.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriter.java new file mode 100644 index 00000000000000..95d4cb2887624b --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriter.java @@ -0,0 +1,35 @@ +package com.linkedin.metadata.search.elasticsearch.query.filter; + +import com.linkedin.metadata.query.SearchFlags; +import io.datahubproject.metadata.context.OperationContext; +import java.util.Set; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import org.opensearch.index.query.QueryBuilder; + +public interface QueryFilterRewriter { + + T rewrite( + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriterContext rewriterContext, + @Nullable T filterQuery); + + @Nonnull + Set getRewriterFieldNames(); + + @Nonnull + Set getRewriterSearchTypes(); + + default boolean isQueryTimeEnabled( + @Nonnull QueryFilterRewriterContext queryFilterRewriterContext) { + return isQueryTimeEnabled( + queryFilterRewriterContext.getSearchType(), queryFilterRewriterContext.getSearchFlags()); + } + + default boolean isQueryTimeEnabled( + @Nonnull QueryFilterRewriterSearchType rewriteSearchType, @Nullable SearchFlags searchFlags) { + return getRewriterSearchTypes().contains(rewriteSearchType) && searchFlags == null + || searchFlags.isRewriteQuery() == null + || Boolean.TRUE.equals(searchFlags.isRewriteQuery()); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriterContext.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriterContext.java new file mode 100644 index 00000000000000..274b97f01b29fc --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriterContext.java @@ -0,0 +1,50 @@ +package com.linkedin.metadata.search.elasticsearch.query.filter; + +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.AUTOCOMPLETE; +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.FULLTEXT_SEARCH; +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.STRUCTURED_SEARCH; +import static com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType.TIMESERIES; + +import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.Condition; +import io.datahubproject.metadata.context.OperationContext; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; +import org.opensearch.index.query.QueryBuilder; + +@Builder +@Getter +public class QueryFilterRewriterContext { + @Nonnull private final QueryFilterRewriteChain queryFilterRewriteChain; + @Nonnull private final QueryFilterRewriterSearchType searchType; + @Nullable private final Condition condition; + @Nullable private final SearchFlags searchFlags; + + public T rewrite( + @Nonnull OperationContext opContext, @Nullable T filterQuery) { + return queryFilterRewriteChain.rewrite(opContext, this, filterQuery); + } + + public static class QueryFilterRewriterContextBuilder { + private QueryFilterRewriterContext build() { + return null; + } + + public QueryFilterRewriterContext build(boolean isTimeseries) { + if (this.searchType == null) { + if (isTimeseries) { + this.searchType = TIMESERIES; + } else if (this.searchFlags != null) { + this.searchType = this.searchFlags.isFulltext() ? FULLTEXT_SEARCH : STRUCTURED_SEARCH; + } else { + this.searchType = AUTOCOMPLETE; + } + } + + return new QueryFilterRewriterContext( + this.queryFilterRewriteChain, this.searchType, this.condition, this.searchFlags); + } + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriterSearchType.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriterSearchType.java new file mode 100644 index 00000000000000..5cef4c8371d8da --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/QueryFilterRewriterSearchType.java @@ -0,0 +1,9 @@ +package com.linkedin.metadata.search.elasticsearch.query.filter; + +public enum QueryFilterRewriterSearchType { + STRUCTURED_SEARCH, + FULLTEXT_SEARCH, + AUTOCOMPLETE, + TIMESERIES, + PREDICATE +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java index 8ee9587ca2ae40..24598d7fdaadb8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java @@ -18,6 +18,7 @@ import com.linkedin.metadata.query.AutoCompleteEntityArray; import com.linkedin.metadata.query.AutoCompleteResult; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.utils.ESUtils; import io.datahubproject.metadata.context.OperationContext; import java.net.URISyntaxException; @@ -54,10 +55,12 @@ public class AutocompleteRequestHandler { private final CustomizedQueryHandler customizedQueryHandler; private final EntitySpec entitySpec; + private final QueryFilterRewriteChain queryFilterRewriteChain; public AutocompleteRequestHandler( @Nonnull EntitySpec entitySpec, - @Nullable CustomSearchConfiguration customSearchConfiguration) { + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { this.entitySpec = entitySpec; List fieldSpecs = entitySpec.getSearchableFieldSpecs(); this.customizedQueryHandler = CustomizedQueryHandler.builder(customSearchConfiguration).build(); @@ -83,13 +86,18 @@ public AutocompleteRequestHandler( set1.addAll(set2); return set1; })); + this.queryFilterRewriteChain = queryFilterRewriteChain; } public static AutocompleteRequestHandler getBuilder( @Nonnull EntitySpec entitySpec, - @Nullable CustomSearchConfiguration customSearchConfiguration) { + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { return AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME.computeIfAbsent( - entitySpec, k -> new AutocompleteRequestHandler(entitySpec, customSearchConfiguration)); + entitySpec, + k -> + new AutocompleteRequestHandler( + entitySpec, customSearchConfiguration, queryFilterRewriteChain)); } public SearchRequest getSearchRequest( @@ -113,7 +121,7 @@ public SearchRequest getSearchRequest( // Initial query with input filters BoolQueryBuilder filterQuery = ESUtils.buildFilterQuery( - filter, false, searchableFieldTypes, opContext.getAspectRetriever()); + filter, false, searchableFieldTypes, opContext, queryFilterRewriteChain); baseQuery.filter(filterQuery); // Add autocomplete query @@ -218,10 +226,12 @@ private static BoolQueryBuilder defaultQuery( // Get HighlightBuilder to highlight the matched field private HighlightBuilder getHighlights(@Nullable String field) { - HighlightBuilder highlightBuilder = new HighlightBuilder(); - // Don't set tags to get the original field value - highlightBuilder.preTags(""); - highlightBuilder.postTags(""); + HighlightBuilder highlightBuilder = + new HighlightBuilder() + // Don't set tags to get the original field value + .preTags("") + .postTags("") + .numOfFragments(1); // Check for each field name and any subfields getAutocompleteFields(field) .forEach( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 6e4210de6ef80a..91cfeaf43a4cb4 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -27,6 +27,7 @@ import com.linkedin.metadata.search.SearchResultMetadata; import com.linkedin.metadata.search.SearchSuggestion; import com.linkedin.metadata.search.SearchSuggestionArray; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.features.Features; import com.linkedin.metadata.search.utils.ESAccessControlUtil; import com.linkedin.metadata.search.utils.ESUtils; @@ -49,6 +50,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections.CollectionUtils; import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; import org.opensearch.common.unit.TimeValue; @@ -77,17 +79,21 @@ public class SearchRequestHandler { private final AggregationQueryBuilder aggregationQueryBuilder; private final Map> searchableFieldTypes; + private final QueryFilterRewriteChain queryFilterRewriteChain; + private SearchRequestHandler( @Nonnull EntitySpec entitySpec, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration) { - this(ImmutableList.of(entitySpec), configs, customSearchConfiguration); + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { + this(ImmutableList.of(entitySpec), configs, customSearchConfiguration, queryFilterRewriteChain); } private SearchRequestHandler( @Nonnull List entitySpecs, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration) { + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { this.entitySpecs = entitySpecs; Map> entitySearchAnnotations = getSearchableAnnotations(); @@ -111,24 +117,31 @@ private SearchRequestHandler( set1.addAll(set2); return set1; })); + this.queryFilterRewriteChain = queryFilterRewriteChain; } public static SearchRequestHandler getBuilder( @Nonnull EntitySpec entitySpec, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration) { + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { return REQUEST_HANDLER_BY_ENTITY_NAME.computeIfAbsent( ImmutableList.of(entitySpec), - k -> new SearchRequestHandler(entitySpec, configs, customSearchConfiguration)); + k -> + new SearchRequestHandler( + entitySpec, configs, customSearchConfiguration, queryFilterRewriteChain)); } public static SearchRequestHandler getBuilder( @Nonnull List entitySpecs, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration) { + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { return REQUEST_HANDLER_BY_ENTITY_NAME.computeIfAbsent( ImmutableList.copyOf(entitySpecs), - k -> new SearchRequestHandler(entitySpecs, configs, customSearchConfiguration)); + k -> + new SearchRequestHandler( + entitySpecs, configs, customSearchConfiguration, queryFilterRewriteChain)); } private Map> getSearchableAnnotations() { @@ -155,16 +168,17 @@ private Set getDefaultQueryFieldNames(List annotat public BoolQueryBuilder getFilterQuery( @Nonnull OperationContext opContext, @Nullable Filter filter) { - return getFilterQuery(opContext, filter, searchableFieldTypes); + return getFilterQuery(opContext, filter, searchableFieldTypes, queryFilterRewriteChain); } public static BoolQueryBuilder getFilterQuery( @Nonnull OperationContext opContext, @Nullable Filter filter, - Map> searchableFieldTypes) { + Map> searchableFieldTypes, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { BoolQueryBuilder filterQuery = ESUtils.buildFilterQuery( - filter, false, searchableFieldTypes, opContext.getAspectRetriever()); + filter, false, searchableFieldTypes, opContext, queryFilterRewriteChain); return applyDefaultSearchFilters(opContext, filter, filterQuery); } @@ -211,8 +225,14 @@ public SearchRequest getSearchRequest( .forEach(searchSourceBuilder::aggregation); } if (Boolean.FALSE.equals(searchFlags.isSkipHighlighting())) { - searchSourceBuilder.highlighter(highlights); + if (CollectionUtils.isNotEmpty(searchFlags.getCustomHighlightingFields())) { + searchSourceBuilder.highlighter( + getValidatedHighlighter(searchFlags.getCustomHighlightingFields())); + } else { + searchSourceBuilder.highlighter(highlights); + } } + ESUtils.buildSortOrder(searchSourceBuilder, sortCriteria, entitySpecs); if (Boolean.TRUE.equals(searchFlags.isGetSuggestions())) { @@ -346,11 +366,12 @@ public QueryBuilder getQuery( @VisibleForTesting public HighlightBuilder getHighlights() { - HighlightBuilder highlightBuilder = new HighlightBuilder(); - - // Don't set tags to get the original field value - highlightBuilder.preTags(""); - highlightBuilder.postTags(""); + HighlightBuilder highlightBuilder = + new HighlightBuilder() + // Don't set tags to get the original field value + .preTags("") + .postTags("") + .numOfFragments(1); // Check for each field name and any subfields defaultQueryFieldNames.stream() @@ -556,4 +577,16 @@ private List extractSearchSuggestions(@Nonnull SearchResponse } return searchSuggestions; } + + private HighlightBuilder getValidatedHighlighter(Collection fieldsToHighlight) { + HighlightBuilder highlightBuilder = new HighlightBuilder(); + highlightBuilder.preTags(""); + highlightBuilder.postTags(""); + fieldsToHighlight.stream() + .filter(defaultQueryFieldNames::contains) + .flatMap(fieldName -> Stream.of(fieldName, fieldName + ".*")) + .distinct() + .forEach(highlightBuilder::field); + return highlightBuilder; + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java index dd36f0a9456a74..bff6ebdd317c44 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java @@ -287,8 +287,10 @@ public void setSearchableValue( .forEach( fieldValue -> { String[] keyValues = fieldValue.toString().split("="); - String key = keyValues[0]; - String value = keyValues[1]; + String key = keyValues[0], value = ""; + if (keyValues.length > 1) { + value = keyValues[1]; + } dictDoc.put(key, value); }); searchDocument.set(fieldName, dictDoc); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESAccessControlUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESAccessControlUtil.java index 6f5dcee07a5aae..d1895de3055489 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESAccessControlUtil.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESAccessControlUtil.java @@ -2,9 +2,7 @@ import static com.datahub.authorization.AuthUtil.VIEW_RESTRICTED_ENTITY_TYPES; -import com.datahub.authentication.Authentication; import com.datahub.authorization.AuthUtil; -import com.datahub.plugins.auth.authorization.Authorizer; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -40,8 +38,6 @@ public static Collection restrictSearchResult( final EntityRegistry entityRegistry = Objects.requireNonNull(opContext.getEntityRegistry()); final RestrictedService restrictedService = Objects.requireNonNull(opContext.getServicesRegistryContext()).getRestrictedService(); - final Authentication auth = opContext.getSessionActorContext().getAuthentication(); - final Authorizer authorizer = opContext.getAuthorizerContext().getAuthorizer(); if (opContext.getSearchContext().isRestrictedSearch()) { for (SearchEntity searchEntity : searchEntities) { @@ -50,8 +46,7 @@ public static Collection restrictSearchResult( entityRegistry.getEntitySpec(entityType); if (VIEW_RESTRICTED_ENTITY_TYPES.contains(entityType) - && !AuthUtil.canViewEntity( - auth.getActor().toUrnStr(), authorizer, searchEntity.getEntity())) { + && !AuthUtil.canViewEntity(opContext, searchEntity.getEntity())) { // Not authorized && restricted response requested if (opContext.getSearchContext().isRestrictedSearch()) { @@ -72,9 +67,7 @@ public static Collection restrictSearchResult( public static boolean restrictUrn(@Nonnull OperationContext opContext, @Nonnull Urn urn) { if (opContext.getOperationContextConfig().getViewAuthorizationConfiguration().isEnabled() && !opContext.isSystemAuth()) { - final Authentication auth = opContext.getSessionActorContext().getAuthentication(); - final Authorizer authorizer = opContext.getAuthorizerContext().getAuthorizer(); - return !AuthUtil.canViewEntity(auth.getActor().toUrnStr(), authorizer, urn); + return !AuthUtil.canViewEntity(opContext, urn); } return false; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index 6c9b339af42c7b..f9ca0760aaf665 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -2,6 +2,9 @@ import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.models.annotation.SearchableAnnotation.OBJECT_FIELD_TYPES; +import static com.linkedin.metadata.query.filter.Condition.ANCESTORS_INCL; +import static com.linkedin.metadata.query.filter.Condition.DESCENDANTS_INCL; +import static com.linkedin.metadata.query.filter.Condition.RELATED_INCL; import static com.linkedin.metadata.search.elasticsearch.indexbuilder.MappingsBuilder.SUBFIELDS; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.KEYWORD_FIELDS; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.PATH_HIERARCHY_FIELDS; @@ -19,6 +22,8 @@ import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterContext; import io.datahubproject.metadata.context.OperationContext; import java.util.Arrays; import java.util.Collections; @@ -140,13 +145,14 @@ public static BoolQueryBuilder buildFilterQuery( @Nullable Filter filter, boolean isTimeseries, final Map> searchableFieldTypes, - @Nullable AspectRetriever aspectRetriever) { + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { BoolQueryBuilder finalQueryBuilder = QueryBuilders.boolQuery(); if (filter == null) { return finalQueryBuilder; } - StructuredPropertyUtils.validateFilter(filter, aspectRetriever); + StructuredPropertyUtils.validateFilter(filter, opContext.getAspectRetriever()); if (filter.getOr() != null) { // If caller is using the new Filters API, build boolean query from that. @@ -156,7 +162,11 @@ public static BoolQueryBuilder buildFilterQuery( or -> finalQueryBuilder.should( ESUtils.buildConjunctiveFilterQuery( - or, isTimeseries, searchableFieldTypes, aspectRetriever))); + or, + isTimeseries, + searchableFieldTypes, + opContext, + queryFilterRewriteChain))); // The default is not always 1 (ensure consistent default) finalQueryBuilder.minimumShouldMatch(1); } else if (filter.getCriteria() != null) { @@ -172,7 +182,11 @@ public static BoolQueryBuilder buildFilterQuery( || criterion.getCondition() == Condition.IS_NULL) { andQueryBuilder.must( getQueryBuilderFromCriterion( - criterion, isTimeseries, searchableFieldTypes, aspectRetriever)); + criterion, + isTimeseries, + searchableFieldTypes, + opContext, + queryFilterRewriteChain)); } }); finalQueryBuilder.should(andQueryBuilder); @@ -187,7 +201,8 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery( @Nonnull ConjunctiveCriterion conjunctiveCriterion, boolean isTimeseries, Map> searchableFieldTypes, - @Nullable AspectRetriever aspectRetriever) { + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { final BoolQueryBuilder andQueryBuilder = new BoolQueryBuilder(); conjunctiveCriterion .getAnd() @@ -200,11 +215,19 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery( // `filter` instead of `must` (enables caching and bypasses scoring) andQueryBuilder.filter( getQueryBuilderFromCriterion( - criterion, isTimeseries, searchableFieldTypes, aspectRetriever)); + criterion, + isTimeseries, + searchableFieldTypes, + opContext, + queryFilterRewriteChain)); } else { andQueryBuilder.mustNot( getQueryBuilderFromCriterion( - criterion, isTimeseries, searchableFieldTypes, aspectRetriever)); + criterion, + isTimeseries, + searchableFieldTypes, + opContext, + queryFilterRewriteChain)); } } }); @@ -243,8 +266,9 @@ public static QueryBuilder getQueryBuilderFromCriterion( @Nonnull final Criterion criterion, boolean isTimeseries, final Map> searchableFieldTypes, - @Nullable AspectRetriever aspectRetriever) { - final String fieldName = toParentField(criterion.getField(), aspectRetriever); + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { + final String fieldName = toParentField(criterion.getField(), opContext.getAspectRetriever()); /* * Check the field-name for a "sibling" field, or one which should ALWAYS @@ -259,11 +283,21 @@ public static QueryBuilder getQueryBuilderFromCriterion( if (maybeFieldToExpand.isPresent()) { return getQueryBuilderFromCriterionForFieldToExpand( - maybeFieldToExpand.get(), criterion, isTimeseries, searchableFieldTypes, aspectRetriever); + maybeFieldToExpand.get(), + criterion, + isTimeseries, + searchableFieldTypes, + opContext, + queryFilterRewriteChain); } return getQueryBuilderFromCriterionForSingleField( - criterion, isTimeseries, searchableFieldTypes, criterion.getField(), aspectRetriever); + criterion, + isTimeseries, + searchableFieldTypes, + criterion.getField(), + opContext, + queryFilterRewriteChain); } public static String getElasticTypeForFieldType(SearchableAnnotation.FieldType fieldType) { @@ -501,7 +535,8 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( @Nonnull final Criterion criterion, final boolean isTimeseries, final Map> searchableFieldTypes, - @Nonnull AspectRetriever aspectRetriever) { + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { final BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); for (String field : fields) { Criterion criterionToQuery = new Criterion(); @@ -513,10 +548,16 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( if (criterion.hasValue()) { criterionToQuery.setValue(criterion.getValue()); } - criterionToQuery.setField(toKeywordField(field, isTimeseries, aspectRetriever)); + criterionToQuery.setField( + toKeywordField(field, isTimeseries, opContext.getAspectRetriever())); orQueryBuilder.should( getQueryBuilderFromCriterionForSingleField( - criterionToQuery, isTimeseries, searchableFieldTypes, null, aspectRetriever) + criterionToQuery, + isTimeseries, + searchableFieldTypes, + null, + opContext, + queryFilterRewriteChain) .queryName(field)); } return orQueryBuilder; @@ -528,8 +569,10 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField( boolean isTimeseries, final Map> searchableFieldTypes, @Nullable String queryName, - @Nonnull AspectRetriever aspectRetriever) { + @Nonnull OperationContext opContext, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { final Condition condition = criterion.getCondition(); + final AspectRetriever aspectRetriever = opContext.getAspectRetriever(); final String fieldName = toParentField(criterion.getField(), aspectRetriever); if (condition == Condition.IS_NULL) { @@ -563,6 +606,18 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField( } else if (condition == Condition.END_WITH) { return buildEndsWithConditionFromCriterion( fieldName, criterion, queryName, isTimeseries, aspectRetriever); + } else if (Set.of(ANCESTORS_INCL, DESCENDANTS_INCL, RELATED_INCL).contains(condition)) { + + return QueryFilterRewriterContext.builder() + .queryFilterRewriteChain(queryFilterRewriteChain) + .condition(condition) + .searchFlags(opContext.getSearchContext().getSearchFlags()) + .build(isTimeseries) + .rewrite( + opContext, + buildEqualsConditionFromCriterion( + fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever)) + .queryName(queryName != null ? queryName : fieldName); } } throw new UnsupportedOperationException("Unsupported condition: " + condition); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/dataset/DatasetSchemaFieldChangeEvent.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/dataset/DatasetSchemaFieldChangeEvent.java index 84308d9b2311fe..6c549e254934cb 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/dataset/DatasetSchemaFieldChangeEvent.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/dataset/DatasetSchemaFieldChangeEvent.java @@ -27,16 +27,24 @@ public DatasetSchemaFieldChangeEvent( String description, String fieldPath, Urn fieldUrn, - boolean nullable) { + boolean nullable, + SchemaFieldModificationCategory modificationCategory) { super( entityUrn, category, operation, modifier, ImmutableMap.of( - "fieldPath", fieldPath, - "fieldUrn", fieldUrn.toString(), - "nullable", nullable), + "fieldPath", + fieldPath, + "fieldUrn", + fieldUrn.toString(), + "nullable", + nullable, + "modificationCategory", + modificationCategory != null + ? modificationCategory.toString() + : SchemaFieldModificationCategory.OTHER.toString()), auditStamp, semVerChange, description); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/dataset/SchemaFieldModificationCategory.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/dataset/SchemaFieldModificationCategory.java new file mode 100644 index 00000000000000..67c0cdb70a0de3 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/dataset/SchemaFieldModificationCategory.java @@ -0,0 +1,13 @@ +package com.linkedin.metadata.timeline.data.dataset; + +/* + * Enum to allow us to distinguish between the different schema field modifications when creating entity change events. + */ +public enum SchemaFieldModificationCategory { + // when a schema field is renamed + RENAME, + // when a schema field has a type change + TYPE_CHANGE, + // a default option when no other modification category has been given + OTHER, +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java index 0ab55893d48d2a..c0cec830faf2b9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java @@ -15,6 +15,7 @@ import com.linkedin.metadata.timeline.data.ChangeTransaction; import com.linkedin.metadata.timeline.data.SemanticChangeType; import com.linkedin.metadata.timeline.data.dataset.DatasetSchemaFieldChangeEvent; +import com.linkedin.metadata.timeline.data.dataset.SchemaFieldModificationCategory; import com.linkedin.schema.SchemaField; import com.linkedin.schema.SchemaFieldArray; import com.linkedin.schema.SchemaMetadata; @@ -246,6 +247,7 @@ private static void processFieldPathDataTypeChange( .fieldPath(curBaseField.getFieldPath()) .fieldUrn(getSchemaFieldUrn(datasetUrn, curBaseField)) .nullable(curBaseField.isNullable()) + .modificationCategory(SchemaFieldModificationCategory.TYPE_CHANGE) .auditStamp(auditStamp) .build()); } @@ -483,6 +485,7 @@ private static ChangeEvent generateRenameEvent( .fieldPath(curBaseField.getFieldPath()) .fieldUrn(getSchemaFieldUrn(datasetUrn, curBaseField)) .nullable(curBaseField.isNullable()) + .modificationCategory(SchemaFieldModificationCategory.RENAME) .auditStamp(auditStamp) .build(); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index 9b4d373d25d8fb..cb364f41aa218f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -20,6 +20,7 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ReindexConfig; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.SearchAfterWrapper; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.utils.ESUtils; @@ -100,18 +101,21 @@ public class ElasticSearchTimeseriesAspectService private final TimeseriesAspectIndexBuilders indexBuilders; private final RestHighLevelClient searchClient; private final ESAggregatedStatsDAO esAggregatedStatsDAO; + private final QueryFilterRewriteChain queryFilterRewriteChain; public ElasticSearchTimeseriesAspectService( @Nonnull RestHighLevelClient searchClient, @Nonnull TimeseriesAspectIndexBuilders indexBuilders, @Nonnull ESBulkProcessor bulkProcessor, - int numRetries) { + int numRetries, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { this.indexBuilders = indexBuilders; this.searchClient = searchClient; this.bulkProcessor = bulkProcessor; this.numRetries = numRetries; + this.queryFilterRewriteChain = queryFilterRewriteChain; - esAggregatedStatsDAO = new ESAggregatedStatsDAO(searchClient); + esAggregatedStatsDAO = new ESAggregatedStatsDAO(searchClient, queryFilterRewriteChain); } private static EnvelopedAspect parseDocument(@Nonnull SearchHit doc) { @@ -298,7 +302,8 @@ public long countByFilter( .getEntityRegistry() .getEntitySpec(entityName) .getSearchableFieldTypes(), - opContext.getAspectRetriever())); + opContext, + queryFilterRewriteChain)); CountRequest countRequest = new CountRequest(); countRequest.query(filterQueryBuilder); countRequest.indices(indexName); @@ -328,7 +333,7 @@ public List getAspectValues( QueryBuilders.boolQuery() .must( ESUtils.buildFilterQuery( - filter, true, searchableFieldTypes, opContext.getAspectRetriever())); + filter, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); filterQueryBuilder.must(QueryBuilders.matchQuery("urn", urn.toString())); // NOTE: We are interested only in the un-exploded rows as only they carry the `event` payload. filterQueryBuilder.mustNot(QueryBuilders.termQuery(MappingsBuilder.IS_EXPLODED_FIELD, true)); @@ -340,7 +345,7 @@ public List getAspectValues( .setValue(startTimeMillis.toString()); filterQueryBuilder.must( ESUtils.getQueryBuilderFromCriterion( - startTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); + startTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); } if (endTimeMillis != null) { Criterion endTimeCriterion = @@ -350,7 +355,7 @@ public List getAspectValues( .setValue(endTimeMillis.toString()); filterQueryBuilder.must( ESUtils.getQueryBuilderFromCriterion( - endTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); + endTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); } final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(filterQueryBuilder); @@ -435,7 +440,8 @@ public DeleteAspectValuesResult deleteAspectValues( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getAspectRetriever()); + opContext, + queryFilterRewriteChain); final Optional result = bulkProcessor @@ -471,7 +477,8 @@ public String deleteAspectValuesAsync( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getAspectRetriever()); + opContext, + queryFilterRewriteChain); final int batchSize = options.getBatchSize() > 0 ? options.getBatchSize() : DEFAULT_LIMIT; TimeValue timeout = options.getTimeoutSeconds() > 0 @@ -505,7 +512,8 @@ public String reindexAsync( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getAspectRetriever()); + opContext, + queryFilterRewriteChain); try { return this.reindexAsync(indexName, filterQueryBuilder, options); } catch (Exception e) { @@ -563,7 +571,7 @@ public TimeseriesScrollResult scrollAspects( QueryBuilders.boolQuery() .filter( ESUtils.buildFilterQuery( - filter, true, searchableFieldTypes, opContext.getAspectRetriever())); + filter, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); if (startTimeMillis != null) { Criterion startTimeCriterion = @@ -573,7 +581,7 @@ public TimeseriesScrollResult scrollAspects( .setValue(startTimeMillis.toString()); filterQueryBuilder.filter( ESUtils.getQueryBuilderFromCriterion( - startTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); + startTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); } if (endTimeMillis != null) { Criterion endTimeCriterion = @@ -583,7 +591,7 @@ public TimeseriesScrollResult scrollAspects( .setValue(endTimeMillis.toString()); filterQueryBuilder.filter( ESUtils.getQueryBuilderFromCriterion( - endTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); + endTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); } SearchResponse response = diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java index 1bf96841e5fe13..0ad8bd6f4bc544 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java @@ -11,6 +11,7 @@ import com.linkedin.metadata.models.TimeseriesFieldCollectionSpec; import com.linkedin.metadata.models.TimeseriesFieldSpec; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.utils.ESUtils; import com.linkedin.metadata.timeseries.elastic.indexbuilder.MappingsBuilder; import com.linkedin.timeseries.AggregationSpec; @@ -61,9 +62,13 @@ public class ESAggregatedStatsDAO { ES_AGGREGATION_PREFIX + ES_MAX_AGGREGATION_PREFIX + ES_FIELD_TIMESTAMP; private static final int MAX_TERM_BUCKETS = 24 * 60; // minutes in a day. private final RestHighLevelClient searchClient; + @Nonnull private final QueryFilterRewriteChain queryFilterRewriteChain; - public ESAggregatedStatsDAO(@Nonnull RestHighLevelClient searchClient) { + public ESAggregatedStatsDAO( + @Nonnull RestHighLevelClient searchClient, + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { this.searchClient = searchClient; + this.queryFilterRewriteChain = queryFilterRewriteChain; } private static String toEsAggName(final String aggName) { @@ -375,7 +380,8 @@ public GenericTable getAggregatedStats( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getAspectRetriever()); + opContext, + queryFilterRewriteChain); AspectSpec aspectSpec = getTimeseriesAspectSpec(opContext, entityName, aspectName); // Build and attach the grouping aggregations diff --git a/metadata-io/src/test/java/com/linkedin/metadata/TestEntityUtil.java b/metadata-io/src/test/java/com/linkedin/metadata/TestEntityUtil.java index 9ee4a16a3ab991..062041253aa1fe 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/TestEntityUtil.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/TestEntityUtil.java @@ -65,6 +65,8 @@ public static TestEntityInfo getTestEntityInfo(Urn urn) { "value1", "key2", "value2", + "key3", + "", "shortValue", "123", "longValue", diff --git a/metadata-io/src/test/java/com/linkedin/metadata/dao/throttle/APIThrottleTest.java b/metadata-io/src/test/java/com/linkedin/metadata/dao/throttle/APIThrottleTest.java new file mode 100644 index 00000000000000..c86d80be2d7fd2 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/dao/throttle/APIThrottleTest.java @@ -0,0 +1,162 @@ +package com.linkedin.metadata.dao.throttle; + +import static com.linkedin.metadata.dao.throttle.ThrottleType.MANUAL; +import static com.linkedin.metadata.dao.throttle.ThrottleType.MCL_TIMESERIES_LAG; +import static com.linkedin.metadata.dao.throttle.ThrottleType.MCL_VERSIONED_LAG; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class APIThrottleTest { + private static final ThrottleEvent MANUAL_THROTTLED_EVENT = + ThrottleEvent.builder().throttled(Map.of(MANUAL, true)).build(); + private static final ThrottleEvent MCL_TIMESERIES_THROTTLED_EVENT = + ThrottleEvent.builder().throttled(Map.of(MCL_TIMESERIES_LAG, true)).build(); + private static final ThrottleEvent MCL_VERSIONED_THROTTLED_EVENT = + ThrottleEvent.builder().throttled(Map.of(MCL_VERSIONED_LAG, true)).build(); + private static final ThrottleEvent ALL_MCL_THROTTLED_EVENT = + ThrottleEvent.builder() + .throttled(Map.of(MCL_TIMESERIES_LAG, true, MCL_VERSIONED_LAG, true)) + .build(); + private static final ThrottleEvent ALL_THROTTLED_EVENT = + ThrottleEvent.builder() + .throttled(Map.of(MANUAL, true, MCL_TIMESERIES_LAG, true, MCL_VERSIONED_LAG, true)) + .build(); + public static final Set ALL_EVENTS = + Set.of( + MANUAL_THROTTLED_EVENT, + MCL_TIMESERIES_THROTTLED_EVENT, + MCL_VERSIONED_THROTTLED_EVENT, + ALL_MCL_THROTTLED_EVENT, + ALL_THROTTLED_EVENT); + + private OperationContext opContext; + private RequestContext mockRequestContext; + + @BeforeMethod + public void init() { + mockRequestContext = mock(RequestContext.class); + opContext = TestOperationContexts.userContextNoSearchAuthorization(mockRequestContext); + } + + @Test + public void testExemptions() { + List exemptions = + List.of( + "", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:129.0) Gecko/20100101 Firefox/129.0", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15"); + + for (ThrottleEvent event : ALL_EVENTS) { + when(mockRequestContext.getUserAgent()).thenReturn(null); + try { + APIThrottle.evaluate(opContext, Set.of(event), false); + } catch (Exception ex) { + Assert.fail("Exception was thrown and NOT expected! " + event); + } + try { + APIThrottle.evaluate(opContext, Set.of(event), true); + } catch (Exception ex) { + Assert.fail("Exception was thrown and NOT expected! " + event); + } + + // Browser tests + for (String ua : exemptions) { + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), true); + } catch (Exception ex) { + Assert.fail("Exception was thrown and NOT expected! " + event); + } + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), false); + } catch (Exception ex) { + Assert.fail("Exception was thrown and NOT expected! " + event); + } + } + } + } + + @Test + public void testThrottleException() { + List applicable = + List.of( + "python-requests/2.28.2", + "Apache-HttpClient/4.5.5 (Java/1.8.0_162)", + "okhttp/4.9.3.7", + "Go-http-client/1.1"); + + for (ThrottleEvent event : ALL_EVENTS) { + for (String ua : applicable) { + // timeseries lag present + if (event.getActiveThrottles().contains(MCL_TIMESERIES_LAG) + && !event.getActiveThrottles().contains(MANUAL)) { + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), true); + Assert.fail(String.format("Exception WAS expected! %s %s", ua, event)); + } catch (Exception ignored) { + } + } + if (!event.getActiveThrottles().contains(MCL_TIMESERIES_LAG) + && !event.getActiveThrottles().contains(MANUAL)) { + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), true); + } catch (Exception ex) { + Assert.fail(String.format("Exception was thrown and NOT expected! %s %s", ua, event)); + } + } + + // versioned lag present + if (event.getActiveThrottles().contains(MCL_VERSIONED_LAG) + && !event.getActiveThrottles().contains(MANUAL)) { + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), false); + Assert.fail(String.format("Exception WAS expected! %s %s", ua, event)); + } catch (Exception ignored) { + } + } + if (!event.getActiveThrottles().contains(MCL_VERSIONED_LAG) + && !event.getActiveThrottles().contains(MANUAL)) { + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), false); + } catch (Exception ex) { + Assert.fail(String.format("Exception was thrown and NOT expected! %s %s", ua, event)); + } + } + + // manual throttle active + if (event.getActiveThrottles().contains(MANUAL)) { + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), true); + Assert.fail(String.format("Exception WAS expected! %s %s", ua, event)); + } catch (Exception ignored) { + } + try { + when(mockRequestContext.getUserAgent()).thenReturn(ua); + APIThrottle.evaluate(opContext, Set.of(event), false); + Assert.fail(String.format("Exception WAS expected! %s %s", ua, event)); + } catch (Exception ignored) { + } + } + } + } + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java index b9f5984e576678..e8d3c654f6f639 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java @@ -1,14 +1,19 @@ package com.linkedin.metadata.entity; +import static com.linkedin.metadata.Constants.CORP_USER_ENTITY_NAME; +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; import static org.mockito.Mockito.mock; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertTrue; import com.linkedin.common.AuditStamp; +import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.DataTemplateUtil; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.entity.EnvelopedAspect; import com.linkedin.identity.CorpUserInfo; import com.linkedin.metadata.AspectGenerationUtils; import com.linkedin.metadata.Constants; @@ -36,6 +41,8 @@ import io.ebean.TxScope; import io.ebean.annotation.TxIsolation; import java.net.URISyntaxException; +import java.sql.Timestamp; +import java.time.Instant; import java.util.Collection; import java.util.List; import java.util.Map; @@ -292,6 +299,139 @@ public void testNestedTransactions() throws AssertionError { System.out.println("done"); } + @Test + public void testSystemMetadataDuplicateKey() throws Exception { + Urn entityUrn = UrnUtils.getUrn("urn:li:corpuser:duplicateKeyTest"); + SystemMetadata systemMetadata = AspectGenerationUtils.createSystemMetadata(); + ChangeItemImpl item = + ChangeItemImpl.builder() + .urn(entityUrn) + .aspectName(STATUS_ASPECT_NAME) + .recordTemplate(new Status().setRemoved(true)) + .systemMetadata(systemMetadata) + .auditStamp(TEST_AUDIT_STAMP) + .build(TestOperationContexts.emptyAspectRetriever(null)); + _entityServiceImpl.ingestAspects( + opContext, + AspectsBatchImpl.builder() + .retrieverContext(opContext.getRetrieverContext().get()) + .items(List.of(item)) + .build(), + false, + true); + + // List aspects urns + EnvelopedAspect envelopedAspect = + _entityServiceImpl.getLatestEnvelopedAspect( + opContext, CORP_USER_ENTITY_NAME, entityUrn, STATUS_ASPECT_NAME); + + assertNotNull(envelopedAspect); + assertEquals(envelopedAspect.getVersion(), 0L, "Expected version 0"); + assertEquals( + envelopedAspect.getSystemMetadata().getVersion(), + "1", + "Expected version 0 with systemMeta version 1"); + + // Corrupt the version 0 systemMeta + try (Transaction transaction = + ((EbeanAspectDao) _entityServiceImpl.aspectDao) + .getServer() + .beginTransaction(TxScope.requiresNew().setIsolation(TxIsolation.REPEATABLE_READ))) { + TransactionContext transactionContext = TransactionContext.empty(transaction, 3); + _entityServiceImpl.aspectDao.saveAspect( + transactionContext, + entityUrn.toString(), + STATUS_ASPECT_NAME, + new Status().setRemoved(false).toString(), + entityUrn.toString(), + null, + Timestamp.from(Instant.now()), + systemMetadata.toString(), + 1, + true); + transaction.commit(); + } + + // Run another update + _entityServiceImpl.ingestAspects( + opContext, + AspectsBatchImpl.builder() + .retrieverContext(opContext.getRetrieverContext().get()) + .items( + List.of( + ChangeItemImpl.builder() + .urn(entityUrn) + .aspectName(STATUS_ASPECT_NAME) + .recordTemplate(new Status().setRemoved(false)) + .systemMetadata(systemMetadata) + .auditStamp(TEST_AUDIT_STAMP) + .build(TestOperationContexts.emptyAspectRetriever(null)))) + .build(), + false, + true); + EnvelopedAspect envelopedAspect2 = + _entityServiceImpl.getLatestEnvelopedAspect( + opContext, CORP_USER_ENTITY_NAME, entityUrn, STATUS_ASPECT_NAME); + + assertNotNull(envelopedAspect2); + assertEquals(envelopedAspect2.getVersion(), 0L, "Expected version 0"); + assertEquals( + envelopedAspect2.getSystemMetadata().getVersion(), + "3", + "Expected version 0 with systemMeta version 3 accounting for the the collision"); + } + + @Test + public void testBatchDuplicate() throws Exception { + Urn entityUrn = UrnUtils.getUrn("urn:li:corpuser:batchDuplicateTest"); + SystemMetadata systemMetadata = AspectGenerationUtils.createSystemMetadata(); + ChangeItemImpl item1 = + ChangeItemImpl.builder() + .urn(entityUrn) + .aspectName(STATUS_ASPECT_NAME) + .recordTemplate(new Status().setRemoved(true)) + .systemMetadata(systemMetadata.copy()) + .auditStamp(TEST_AUDIT_STAMP) + .build(TestOperationContexts.emptyAspectRetriever(null)); + ChangeItemImpl item2 = + ChangeItemImpl.builder() + .urn(entityUrn) + .aspectName(STATUS_ASPECT_NAME) + .recordTemplate(new Status().setRemoved(false)) + .systemMetadata(systemMetadata.copy()) + .auditStamp(TEST_AUDIT_STAMP) + .build(TestOperationContexts.emptyAspectRetriever(null)); + _entityServiceImpl.ingestAspects( + opContext, + AspectsBatchImpl.builder() + .retrieverContext(opContext.getRetrieverContext().get()) + .items(List.of(item1, item2)) + .build(), + false, + true); + + // List aspects urns + ListUrnsResult batch = _entityServiceImpl.listUrns(opContext, entityUrn.getEntityType(), 0, 2); + + assertEquals(batch.getStart().intValue(), 0); + assertEquals(batch.getCount().intValue(), 1); + assertEquals(batch.getTotal().intValue(), 1); + assertEquals(batch.getEntities().size(), 1); + assertEquals(entityUrn.toString(), batch.getEntities().get(0).toString()); + + EnvelopedAspect envelopedAspect = + _entityServiceImpl.getLatestEnvelopedAspect( + opContext, CORP_USER_ENTITY_NAME, entityUrn, STATUS_ASPECT_NAME); + assertEquals( + envelopedAspect.getSystemMetadata().getVersion(), + "2", + "Expected version 2 accounting for duplicates"); + assertEquals( + envelopedAspect.getValue().toString(), + "{removed=false}", + "Expected 2nd item to be the latest"); + } + @Test public void dataGeneratorThreadingTest() { DataGenerator dataGenerator = new DataGenerator(opContext, _entityServiceImpl); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/extractor/FieldExtractorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/extractor/FieldExtractorTest.java index 1adb5d1ab3952c..f37f99c7bea603 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/extractor/FieldExtractorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/extractor/FieldExtractorTest.java @@ -57,7 +57,8 @@ public void testExtractor() { ImmutableList.of("key1=value1", "key2=value2", "shortValue=123", "longValue=0123456789")); assertEquals( result.get(nameToSpec.get("esObjectField")), - ImmutableList.of("key1=value1", "key2=value2", "shortValue=123", "longValue=0123456789")); + ImmutableList.of( + "key1=value1", "key2=value2", "shortValue=123", "key3=", "longValue=0123456789")); } @Test @@ -99,9 +100,6 @@ public void testExtractorMaxValueLength() { result.get(nameToSpec.get("customProperties")), ImmutableList.of(), "Expected no matching values because of value limit of 1"); - assertEquals( - result.get(nameToSpec.get("esObjectField")), - ImmutableList.of(), - "Expected no matching values because of value limit of 1"); + assertEquals(result.get(nameToSpec.get("esObjectField")), ImmutableList.of("key3=")); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java index eee0e0d0f2ec6f..3cb7e8bd3fb1b1 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java @@ -56,6 +56,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import com.linkedin.metadata.search.ranker.SimpleRanker; @@ -206,9 +207,14 @@ private ElasticSearchService buildEntitySearchService() { false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), - null); + null, + QueryFilterRewriteChain.EMPTY); ESBrowseDAO browseDAO = - new ESBrowseDAO(searchClientSpy, getSearchConfiguration(), getCustomSearchConfiguration()); + new ESBrowseDAO( + searchClientSpy, + getSearchConfiguration(), + getCustomSearchConfiguration(), + QueryFilterRewriteChain.EMPTY); ESWriteDAO writeDAO = new ESWriteDAO(searchClientSpy, getBulkProcessor(), 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java index 1cd0c9550a0fc3..45bc8548706bbe 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java @@ -30,6 +30,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import com.linkedin.metadata.search.ranker.SimpleRanker; @@ -131,10 +132,14 @@ private ElasticSearchService buildEntitySearchService() { false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), - null); + null, + QueryFilterRewriteChain.EMPTY); ESBrowseDAO browseDAO = new ESBrowseDAO( - getSearchClient(), getSearchConfiguration(), getCustomSearchConfiguration()); + getSearchClient(), + getSearchConfiguration(), + getCustomSearchConfiguration(), + QueryFilterRewriteChain.EMPTY); ESWriteDAO writeDAO = new ESWriteDAO(getSearchClient(), getBulkProcessor(), 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java index b04c7d2bc60b92..7b6fcd46333d2d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java @@ -11,6 +11,7 @@ import com.linkedin.common.urn.TestEntityUrn; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.browse.BrowseResult; +import com.linkedin.metadata.browse.BrowseResultV2; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; @@ -20,6 +21,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; @@ -37,6 +39,8 @@ public abstract class TestEntityTestBase extends AbstractTestNGSpringContextTests { + private static final String BROWSE_V2_DELIMITER = "␟"; + @Nonnull protected abstract RestHighLevelClient getSearchClient(); @@ -94,10 +98,14 @@ private ElasticSearchService buildService() { false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), - null); + null, + QueryFilterRewriteChain.EMPTY); ESBrowseDAO browseDAO = new ESBrowseDAO( - getSearchClient(), getSearchConfiguration(), getCustomSearchConfiguration()); + getSearchClient(), + getSearchConfiguration(), + getCustomSearchConfiguration(), + QueryFilterRewriteChain.EMPTY); ESWriteDAO writeDAO = new ESWriteDAO(getSearchClient(), getBulkProcessor(), 1); ElasticSearchService searchService = new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); @@ -125,6 +133,17 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { 0, 10); assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 0); + BrowseResultV2 browseResultV2 = + elasticSearchService.browseV2( + opContext.withSearchFlags(flags -> flags.setFulltext(false)), + ENTITY_NAME, + "", + null, + "*", + 0, + 10); + assertEquals(browseResultV2.getMetadata().getTotalNumEntities().longValue(), 0); + assertEquals( elasticSearchService.docCount( opContext.withSearchFlags(flags -> flags.setFulltext(false)), ENTITY_NAME), @@ -146,6 +165,10 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { document.set("keyPart1", JsonNodeFactory.instance.textNode("test")); document.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride")); document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); + document.set( + "browsePathV2", + JsonNodeFactory.instance.textNode( + BROWSE_V2_DELIMITER + "a" + BROWSE_V2_DELIMITER + "b" + BROWSE_V2_DELIMITER + "c")); document.set("foreignKey", JsonNodeFactory.instance.textNode("urn:li:tag:Node.Value")); elasticSearchService.upsertDocument( opContext, ENTITY_NAME, document.toString(), urn.toString()); @@ -183,6 +206,17 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { 10); assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 1); assertEquals(browseResult.getGroups().get(0).getName(), "a"); + browseResultV2 = + elasticSearchService.browseV2( + opContext.withSearchFlags(flags -> flags.setFulltext(false)), + ENTITY_NAME, + "", + null, + "*", + 0, + 10); + assertEquals(browseResultV2.getMetadata().getTotalNumEntities().longValue(), 1); + assertEquals(browseResultV2.getGroups().get(0).getName(), "a"); browseResult = elasticSearchService.browse( opContext.withSearchFlags(flags -> flags.setFulltext(false)), @@ -193,6 +227,17 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { 10); assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 1); assertEquals(browseResult.getGroups().get(0).getName(), "b"); + browseResultV2 = + elasticSearchService.browseV2( + opContext.withSearchFlags(flags -> flags.setFulltext(false)), + ENTITY_NAME, + BROWSE_V2_DELIMITER + "a", + null, + "*", + 0, + 10); + assertEquals(browseResultV2.getMetadata().getTotalNumEntities().longValue(), 1); + assertEquals(browseResultV2.getGroups().get(0).getName(), "b"); assertEquals( elasticSearchService.docCount( opContext.withSearchFlags(flags -> flags.setFulltext(false)), ENTITY_NAME), @@ -212,6 +257,9 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { document2.set("keyPart1", JsonNodeFactory.instance.textNode("random")); document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("textFieldOverride2")); document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c")); + document2.set( + "browsePathV2", + JsonNodeFactory.instance.textNode(BROWSE_V2_DELIMITER + "b" + BROWSE_V2_DELIMITER + "c")); elasticSearchService.upsertDocument( opContext, ENTITY_NAME, document2.toString(), urn2.toString()); syncAfterWrite(getBulkProcessor()); @@ -238,6 +286,18 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 2); assertEquals(browseResult.getGroups().get(0).getName(), "a"); assertEquals(browseResult.getGroups().get(1).getName(), "b"); + browseResultV2 = + elasticSearchService.browseV2( + opContext.withSearchFlags(flags -> flags.setFulltext(false)), + ENTITY_NAME, + "", + null, + "*", + 0, + 10); + assertEquals(browseResultV2.getMetadata().getTotalNumEntities().longValue(), 2); + assertEquals(browseResultV2.getGroups().get(0).getName(), "a"); + assertEquals(browseResultV2.getGroups().get(1).getName(), "b"); browseResult = elasticSearchService.browse( opContext.withSearchFlags(flags -> flags.setFulltext(false)), @@ -248,6 +308,17 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { 10); assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 1); assertEquals(browseResult.getGroups().get(0).getName(), "b"); + browseResultV2 = + elasticSearchService.browseV2( + opContext.withSearchFlags(flags -> flags.setFulltext(false)), + ENTITY_NAME, + BROWSE_V2_DELIMITER + "a", + null, + "*", + 0, + 10); + assertEquals(browseResultV2.getMetadata().getTotalNumEntities().longValue(), 1); + assertEquals(browseResultV2.getGroups().get(0).getName(), "b"); assertEquals( elasticSearchService.docCount( opContext.withSearchFlags(flags -> flags.setFulltext(false)), ENTITY_NAME), @@ -283,6 +354,16 @@ public void testElasticSearchServiceStructuredQuery() throws Exception { 0, 10); assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 0); + browseResultV2 = + elasticSearchService.browseV2( + opContext.withSearchFlags(flags -> flags.setFulltext(false)), + ENTITY_NAME, + "", + null, + "*", + 0, + 10); + assertEquals(browseResultV2.getMetadata().getTotalNumEntities().longValue(), 0); assertEquals( elasticSearchService.docCount( opContext.withSearchFlags(flags -> flags.setFulltext(false)), ENTITY_NAME), @@ -412,4 +493,125 @@ public void testElasticSearchServiceFulltext() throws Exception { .size(), 0); } + + @Test + public void testElasticSearchServiceDefaults() throws Exception { + SearchResult searchResult = + elasticSearchService.search(opContext, List.of(ENTITY_NAME), "test", null, null, 0, 10); + assertEquals(searchResult.getNumEntities().intValue(), 0); + BrowseResult browseResult = + elasticSearchService.browse(opContext, ENTITY_NAME, "", null, 0, 10); + assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 0); + BrowseResultV2 browseResultV2 = + elasticSearchService.browseV2(opContext, ENTITY_NAME, "", null, "*", 0, 10); + assertEquals(browseResultV2.getMetadata().getTotalNumEntities().longValue(), 0); + + assertEquals(elasticSearchService.docCount(opContext, ENTITY_NAME), 0); + assertEquals( + elasticSearchService + .aggregateByValue(opContext, ImmutableList.of(ENTITY_NAME), "textField", null, 10) + .size(), + 0); + + Urn urn = new TestEntityUrn("test", "urn1", "VALUE_1"); + ObjectNode document = JsonNodeFactory.instance.objectNode(); + document.set("urn", JsonNodeFactory.instance.textNode(urn.toString())); + document.set("keyPart1", JsonNodeFactory.instance.textNode("test")); + document.set("textFieldOverride", JsonNodeFactory.instance.textNode("user_id")); + document.set("browsePaths", JsonNodeFactory.instance.textNode("/a/b/c")); + document.set( + "browsePathV2", + JsonNodeFactory.instance.textNode( + BROWSE_V2_DELIMITER + "a" + BROWSE_V2_DELIMITER + "b" + BROWSE_V2_DELIMITER + "c")); + document.set("foreignKey", JsonNodeFactory.instance.textNode("urn:li:tag:Node.Value")); + elasticSearchService.upsertDocument( + opContext, ENTITY_NAME, document.toString(), urn.toString()); + syncAfterWrite(getBulkProcessor()); + + searchResult = + elasticSearchService.search(opContext, List.of(ENTITY_NAME), "test", null, null, 0, 10); + assertEquals(searchResult.getNumEntities().intValue(), 1); + assertEquals(searchResult.getEntities().get(0).getEntity(), urn); + searchResult = + elasticSearchService.search( + opContext, List.of(ENTITY_NAME), "foreignKey:Node", null, null, 0, 10); + assertEquals(searchResult.getNumEntities().intValue(), 1); + assertEquals(searchResult.getEntities().get(0).getEntity(), urn); + browseResult = elasticSearchService.browse(opContext, ENTITY_NAME, "", null, 0, 10); + assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 1); + assertEquals(browseResult.getGroups().get(0).getName(), "a"); + browseResultV2 = elasticSearchService.browseV2(opContext, ENTITY_NAME, "", null, "*", 0, 10); + assertEquals(browseResultV2.getMetadata().getTotalNumEntities().longValue(), 1); + assertEquals(browseResultV2.getGroups().get(0).getName(), "a"); + browseResult = elasticSearchService.browse(opContext, ENTITY_NAME, "/a", null, 0, 10); + assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 1); + assertEquals(browseResult.getGroups().get(0).getName(), "b"); + browseResultV2 = + elasticSearchService.browseV2( + opContext, ENTITY_NAME, BROWSE_V2_DELIMITER + "a", null, "*", 0, 10); + assertEquals(browseResultV2.getMetadata().getTotalNumEntities().longValue(), 1); + assertEquals(browseResultV2.getGroups().get(0).getName(), "b"); + assertEquals(elasticSearchService.docCount(opContext, ENTITY_NAME), 1); + assertEquals( + elasticSearchService.aggregateByValue( + opContext, ImmutableList.of(ENTITY_NAME), "textFieldOverride", null, 10), + ImmutableMap.of("user_id", 1L)); + + Urn urn2 = new TestEntityUrn("test2", "urn2", "VALUE_2"); + ObjectNode document2 = JsonNodeFactory.instance.objectNode(); + document2.set("urn", JsonNodeFactory.instance.textNode(urn2.toString())); + document2.set("keyPart1", JsonNodeFactory.instance.textNode("random")); + document2.set("textFieldOverride", JsonNodeFactory.instance.textNode("user id")); + document2.set("browsePaths", JsonNodeFactory.instance.textNode("/b/c")); + document2.set( + "browsePathV2", + JsonNodeFactory.instance.textNode(BROWSE_V2_DELIMITER + "b" + BROWSE_V2_DELIMITER + "c")); + elasticSearchService.upsertDocument( + opContext, ENTITY_NAME, document2.toString(), urn2.toString()); + syncAfterWrite(getBulkProcessor()); + + searchResult = + elasticSearchService.search(opContext, List.of(ENTITY_NAME), "user_id", null, null, 0, 10); + assertEquals(searchResult.getNumEntities().intValue(), 2); + assertEquals(searchResult.getEntities().get(0).getEntity(), urn); + browseResult = elasticSearchService.browse(opContext, ENTITY_NAME, "", null, 0, 10); + assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 2); + assertEquals(browseResult.getGroups().get(0).getName(), "a"); + assertEquals(browseResult.getGroups().get(1).getName(), "b"); + browseResultV2 = + elasticSearchService.browseV2(opContext, ENTITY_NAME, "", null, "user_id", 0, 10); + assertEquals(browseResultV2.getMetadata().getTotalNumEntities().longValue(), 2); + assertEquals(browseResultV2.getGroups().get(0).getName(), "a"); + assertEquals(browseResultV2.getGroups().get(1).getName(), "b"); + browseResult = elasticSearchService.browse(opContext, ENTITY_NAME, "/a", null, 0, 10); + assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 1); + assertEquals(browseResult.getGroups().get(0).getName(), "b"); + browseResultV2 = + elasticSearchService.browseV2( + opContext, ENTITY_NAME, BROWSE_V2_DELIMITER + "a", null, "user_id", 0, 10); + assertEquals(browseResultV2.getMetadata().getTotalNumEntities().longValue(), 1); + assertEquals(browseResultV2.getGroups().get(0).getName(), "b"); + assertEquals(elasticSearchService.docCount(opContext, ENTITY_NAME), 2); + assertEquals( + elasticSearchService.aggregateByValue( + opContext, ImmutableList.of(ENTITY_NAME), "textFieldOverride", null, 10), + ImmutableMap.of("user_id", 1L, "user id", 1L)); + + elasticSearchService.deleteDocument(opContext, ENTITY_NAME, urn.toString()); + elasticSearchService.deleteDocument(opContext, ENTITY_NAME, urn2.toString()); + syncAfterWrite(getBulkProcessor()); + searchResult = + elasticSearchService.search(opContext, List.of(ENTITY_NAME), "*", null, null, 0, 10); + assertEquals(searchResult.getNumEntities().intValue(), 0); + browseResult = elasticSearchService.browse(opContext, ENTITY_NAME, "", null, 0, 10); + assertEquals(browseResult.getMetadata().getTotalNumEntities().longValue(), 0); + browseResultV2 = elasticSearchService.browseV2(opContext, ENTITY_NAME, "", null, "*", 0, 10); + assertEquals(browseResultV2.getMetadata().getTotalNumEntities().longValue(), 0); + assertEquals(elasticSearchService.docCount(opContext, ENTITY_NAME), 0); + assertEquals( + elasticSearchService + .aggregateByValue(opContext, ImmutableList.of(ENTITY_NAME), "textField", null, 10) + .size(), + 0); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java index 75da2bc62aaad1..ea9658e9c585eb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java @@ -15,7 +15,7 @@ import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.indexbuilder.MappingsBuilder; -import com.linkedin.metadata.search.elasticsearch.query.request.TestSearchFieldConfig; +import com.linkedin.metadata.search.query.request.TestSearchFieldConfig; import com.linkedin.structured.StructuredPropertyDefinition; import com.linkedin.util.Pair; import java.io.Serializable; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java index b71e4ddc54a782..9c3d515f9322fb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java @@ -10,6 +10,7 @@ import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; import com.linkedin.r2.RemoteInvocationException; import io.datahubproject.metadata.context.OperationContext; @@ -50,7 +51,12 @@ public void setup() throws RemoteInvocationException, URISyntaxException { .prefix("es_browse_dao_test") .hashIdAlgo("MD5") .build())); - browseDAO = new ESBrowseDAO(mockClient, searchConfiguration, customSearchConfiguration); + browseDAO = + new ESBrowseDAO( + mockClient, + searchConfiguration, + customSearchConfiguration, + QueryFilterRewriteChain.EMPTY); } public static Urn makeUrn(Object id) { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java index 4b9e5da82bcd02..e0258f05933399 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java @@ -27,6 +27,7 @@ import com.linkedin.metadata.search.SearchResultMetadata; import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.opensearch.SearchDAOOpenSearchTest; import com.linkedin.metadata.utils.SearchUtil; import io.datahubproject.metadata.context.OperationContext; @@ -233,7 +234,8 @@ public void testTransformIndexIntoEntityNameSingle() { false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), - null); + null, + QueryFilterRewriteChain.EMPTY); // Empty aggregations final SearchResultMetadata searchResultMetadata = new SearchResultMetadata().setAggregations(new AggregationMetadataArray()); @@ -323,7 +325,8 @@ public void testTransformIndexIntoEntityNameNested() { false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), - null); + null, + QueryFilterRewriteChain.EMPTY); // One nested facet Map entityTypeMap = Map.of( @@ -461,7 +464,8 @@ public void testExplain() { ? ELASTICSEARCH_IMPLEMENTATION_OPENSEARCH : ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, getSearchConfiguration(), - null); + null, + QueryFilterRewriteChain.EMPTY); ExplainResponse explainResponse = searchDAO.explain( getOperationContext() diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java new file mode 100644 index 00000000000000..2c49567d49ea79 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java @@ -0,0 +1,365 @@ +package com.linkedin.metadata.search.query.filter; + +import static com.linkedin.metadata.Constants.CONTAINER_ENTITY_NAME; +import static com.linkedin.metadata.search.utils.QueryUtils.EMPTY_FILTER; +import static com.linkedin.metadata.search.utils.QueryUtils.newCriterion; +import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.isNull; +import static org.mockito.ArgumentMatchers.nullable; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; + +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.models.graph.Edge; +import com.linkedin.metadata.aspect.models.graph.RelatedEntities; +import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.config.search.QueryFilterRewriterConfiguration; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.metadata.search.elasticsearch.query.filter.ContainerExpansionRewriter; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterContext; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType; +import com.linkedin.metadata.search.utils.QueryUtils; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.List; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.TermsQueryBuilder; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class ContainerExpansionRewriterTest { + private static final String FIELD_NAME = "container.keyword"; + private final String grandParentUrn = "urn:li:container:grand"; + private final String parentUrn = "urn:li:container:foo"; + private final String parentUrn2 = "urn:li:container:foo2"; + private final String childUrn = "urn:li:container:bar"; + private final String childUrn2 = "urn:li:container:bar2"; + + private OperationContext opContext; + private GraphRetriever mockGraphRetriever; + + @BeforeMethod + public void init() { + EntityRegistry entityRegistry = new TestEntityRegistry(); + AspectRetriever mockAspectRetriever = mock(AspectRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + + mockGraphRetriever = spy(GraphRetriever.class); + RetrieverContext mockRetrieverContext = mock(RetrieverContext.class); + when(mockRetrieverContext.getAspectRetriever()).thenReturn(mockAspectRetriever); + when(mockRetrieverContext.getGraphRetriever()).thenReturn(mockGraphRetriever); + + opContext = + TestOperationContexts.systemContext( + null, + null, + null, + () -> entityRegistry, + () -> + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .graphRetriever(mockGraphRetriever) + .searchRetriever(TestOperationContexts.emptySearchRetriever) + .build(), + null, + null); + } + + @Test + public void testTermsQueryRewrite() { + ContainerExpansionRewriter test = + ContainerExpansionRewriter.builder() + .config(QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration.DEFAULT) + .build(); + + TermsQueryBuilder notTheFieldQuery = QueryBuilders.termsQuery("notTheField", childUrn); + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.ANCESTORS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + notTheFieldQuery), + notTheFieldQuery, + "Expected no rewrite due to non-applicable field"); + + TermsQueryBuilder disabledRewriteQuery = QueryBuilders.termsQuery(FIELD_NAME, childUrn); + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.ANCESTORS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .searchFlags(new SearchFlags().setRewriteQuery(false)) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + disabledRewriteQuery), + disabledRewriteQuery, + "Expected no rewrite due to disabled rewrite searchFlags"); + + // Setup nested + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(childUrn)))), + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.OUTGOING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 1, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", childUrn, parentUrn, RelationshipDirection.OUTGOING, null)))); + + TermsQueryBuilder testQuery = QueryBuilders.termsQuery(FIELD_NAME, childUrn); + TermsQueryBuilder expectedRewrite = QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn); + + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.ANCESTORS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + testQuery), + expectedRewrite, + "Expected rewrite"); + } + + @Test + public void testTermsQueryRewritePagination() { + ContainerExpansionRewriter test = + ContainerExpansionRewriter.builder() + .config( + new QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration(true, 1, 100)) + .build(); + + // Setup nested + // Page 1 + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(childUrn)))), + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.OUTGOING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + "page2", + List.of( + new RelatedEntities( + "IsPartOf", childUrn, parentUrn, RelationshipDirection.OUTGOING, null)))); + + // Page 2 + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(childUrn)))), + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.OUTGOING)), + eq(Edge.EDGE_SORT_CRITERION), + eq("page2"), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", childUrn, parentUrn2, RelationshipDirection.OUTGOING, null)))); + + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(CONTAINER_ENTITY_NAME)), + eq( + QueryUtils.newDisjunctiveFilter( + newCriterion("urn", List.of(parentUrn2, parentUrn)))), + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.OUTGOING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + "page2", + List.of( + new RelatedEntities( + "IsPartOf", + parentUrn, + grandParentUrn, + RelationshipDirection.OUTGOING, + null)))); + + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(CONTAINER_ENTITY_NAME)), + eq( + QueryUtils.newDisjunctiveFilter( + newCriterion("urn", List.of(parentUrn2, parentUrn)))), + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.OUTGOING)), + eq(Edge.EDGE_SORT_CRITERION), + eq("page2"), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", + parentUrn2, + grandParentUrn, + RelationshipDirection.OUTGOING, + null)))); + + TermsQueryBuilder testQuery = QueryBuilders.termsQuery(FIELD_NAME, childUrn); + TermsQueryBuilder expectedRewrite = + QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn, parentUrn2, grandParentUrn); + + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.ANCESTORS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + testQuery), + expectedRewrite, + "Expected rewrite"); + } + + @Test + public void testNestedBoolQueryRewrite() { + ContainerExpansionRewriter test = + ContainerExpansionRewriter.builder() + .config( + new QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration(true, 1, 100)) + .build(); + + // Setup nested container + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(childUrn)))), + eq(List.of(CONTAINER_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.OUTGOING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 1, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", childUrn, parentUrn, RelationshipDirection.OUTGOING, null)))); + + BoolQueryBuilder testQuery = QueryBuilders.boolQuery(); + testQuery.filter( + QueryBuilders.boolQuery() + .filter( + QueryBuilders.boolQuery().filter(QueryBuilders.termsQuery(FIELD_NAME, childUrn)))); + testQuery.filter(QueryBuilders.existsQuery("someField")); + testQuery.should( + QueryBuilders.boolQuery() + .should( + QueryBuilders.boolQuery().should(QueryBuilders.termsQuery(FIELD_NAME, childUrn)))); + testQuery.should(QueryBuilders.existsQuery("someField")); + testQuery.must( + QueryBuilders.boolQuery() + .must(QueryBuilders.boolQuery().must(QueryBuilders.termsQuery(FIELD_NAME, childUrn)))); + testQuery.must(QueryBuilders.existsQuery("someField")); + testQuery.mustNot( + QueryBuilders.boolQuery() + .mustNot( + QueryBuilders.boolQuery().mustNot(QueryBuilders.termsQuery(FIELD_NAME, childUrn)))); + testQuery.mustNot(QueryBuilders.existsQuery("someField")); + + BoolQueryBuilder expectedRewrite = QueryBuilders.boolQuery(); + expectedRewrite.filter( + QueryBuilders.boolQuery() + .filter( + QueryBuilders.boolQuery() + .filter(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.filter(QueryBuilders.existsQuery("someField")); + expectedRewrite.should( + QueryBuilders.boolQuery() + .should( + QueryBuilders.boolQuery() + .should(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.should(QueryBuilders.existsQuery("someField")); + expectedRewrite.must( + QueryBuilders.boolQuery() + .must( + QueryBuilders.boolQuery() + .must(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.must(QueryBuilders.existsQuery("someField")); + expectedRewrite.mustNot( + QueryBuilders.boolQuery() + .mustNot( + QueryBuilders.boolQuery() + .mustNot(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.mustNot(QueryBuilders.existsQuery("someField")); + + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.ANCESTORS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + testQuery), + expectedRewrite, + "Expected rewrite of nested filters and pass through for non-container fields"); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java new file mode 100644 index 00000000000000..8ee7dd3718ca96 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java @@ -0,0 +1,370 @@ +package com.linkedin.metadata.search.query.filter; + +import static com.linkedin.metadata.Constants.DOMAIN_ENTITY_NAME; +import static com.linkedin.metadata.search.utils.QueryUtils.EMPTY_FILTER; +import static com.linkedin.metadata.search.utils.QueryUtils.newCriterion; +import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.isNull; +import static org.mockito.ArgumentMatchers.nullable; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; + +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.models.graph.Edge; +import com.linkedin.metadata.aspect.models.graph.RelatedEntities; +import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.config.search.QueryFilterRewriterConfiguration; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.metadata.search.elasticsearch.query.filter.DomainExpansionRewriter; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterContext; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterSearchType; +import com.linkedin.metadata.search.utils.QueryUtils; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.List; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.TermsQueryBuilder; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class DomainExpansionRewriterTest { + private static final String FIELD_NAME = "domains.keyword"; + private final String grandParentUrn = "urn:li:domain:grand"; + private final String parentUrn = "urn:li:domain:foo"; + private final String parentUrn2 = "urn:li:domain:foo2"; + private final String childUrn = "urn:li:domain:bar"; + private final String childUrn2 = "urn:li:domain:bar2"; + + private OperationContext opContext; + private GraphRetriever mockGraphRetriever; + + @BeforeMethod + public void init() { + EntityRegistry entityRegistry = new TestEntityRegistry(); + AspectRetriever mockAspectRetriever = mock(AspectRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + + mockGraphRetriever = spy(GraphRetriever.class); + RetrieverContext mockRetrieverContext = mock(RetrieverContext.class); + when(mockRetrieverContext.getAspectRetriever()).thenReturn(mockAspectRetriever); + when(mockRetrieverContext.getGraphRetriever()).thenReturn(mockGraphRetriever); + + opContext = + TestOperationContexts.systemContext( + null, + null, + null, + () -> entityRegistry, + () -> + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .graphRetriever(mockGraphRetriever) + .searchRetriever(TestOperationContexts.emptySearchRetriever) + .build(), + null, + null); + } + + @Test + public void testTermsQueryRewrite() { + DomainExpansionRewriter test = + DomainExpansionRewriter.builder() + .config(QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration.DEFAULT) + .build(); + + TermsQueryBuilder notTheFieldQuery = QueryBuilders.termsQuery("notTheField", parentUrn); + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.DESCENDANTS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + notTheFieldQuery), + notTheFieldQuery, + "Expected no rewrite due to non-applicable field"); + + TermsQueryBuilder disabledRewriteQuery = QueryBuilders.termsQuery(FIELD_NAME, parentUrn); + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.DESCENDANTS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .searchFlags(new SearchFlags().setRewriteQuery(false)) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + disabledRewriteQuery), + disabledRewriteQuery, + "Expected no rewrite due to disabled rewrite searchFlags"); + + // Setup nested + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(parentUrn)))), + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 1, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", childUrn, parentUrn, RelationshipDirection.INCOMING, null)))); + + TermsQueryBuilder testQuery = QueryBuilders.termsQuery(FIELD_NAME, parentUrn); + TermsQueryBuilder expectedRewrite = QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn); + + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.DESCENDANTS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + testQuery), + expectedRewrite, + "Expected rewrite"); + } + + @Test + public void testTermsQueryRewritePagination() { + DomainExpansionRewriter test = + DomainExpansionRewriter.builder() + .config( + new QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration(true, 1, 100)) + .build(); + + // Setup nested + // Page 1 + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(grandParentUrn)))), + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + "page2", + List.of( + new RelatedEntities( + "IsPartOf", + parentUrn, + grandParentUrn, + RelationshipDirection.INCOMING, + null)))); + + // Page 2 + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(grandParentUrn)))), + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Edge.EDGE_SORT_CRITERION), + eq("page2"), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", + parentUrn2, + grandParentUrn, + RelationshipDirection.INCOMING, + null)))); + + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(DOMAIN_ENTITY_NAME)), + eq( + QueryUtils.newDisjunctiveFilter( + newCriterion("urn", List.of(parentUrn2, parentUrn)))), + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + "page2", + List.of( + new RelatedEntities( + "IsPartOf", childUrn, parentUrn, RelationshipDirection.INCOMING, null)))); + + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(DOMAIN_ENTITY_NAME)), + eq( + QueryUtils.newDisjunctiveFilter( + newCriterion("urn", List.of(parentUrn2, parentUrn)))), + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Edge.EDGE_SORT_CRITERION), + eq("page2"), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 2, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", childUrn2, parentUrn2, RelationshipDirection.INCOMING, null)))); + + TermsQueryBuilder testQuery = QueryBuilders.termsQuery(FIELD_NAME, grandParentUrn); + TermsQueryBuilder expectedRewrite = + QueryBuilders.termsQuery( + FIELD_NAME, childUrn, childUrn2, parentUrn, parentUrn2, grandParentUrn); + + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.DESCENDANTS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + testQuery), + expectedRewrite, + "Expected rewrite"); + } + + @Test + public void testNestedBoolQueryRewrite() { + DomainExpansionRewriter test = + DomainExpansionRewriter.builder() + .config( + new QueryFilterRewriterConfiguration.ExpansionRewriterConfiguration(true, 1, 100)) + .build(); + + // Setup nested + when(mockGraphRetriever.scrollRelatedEntities( + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(parentUrn)))), + eq(List.of(DOMAIN_ENTITY_NAME)), + eq(EMPTY_FILTER), + eq(List.of("IsPartOf")), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Edge.EDGE_SORT_CRITERION), + nullable(String.class), + anyInt(), + isNull(), + isNull())) + .thenReturn( + new RelatedEntitiesScrollResult( + 1, + 1, + null, + List.of( + new RelatedEntities( + "IsPartOf", childUrn, parentUrn, RelationshipDirection.INCOMING, null)))); + + BoolQueryBuilder testQuery = QueryBuilders.boolQuery(); + testQuery.filter( + QueryBuilders.boolQuery() + .filter( + QueryBuilders.boolQuery().filter(QueryBuilders.termsQuery(FIELD_NAME, parentUrn)))); + testQuery.filter(QueryBuilders.boolQuery().filter(QueryBuilders.existsQuery("someField"))); + testQuery.should( + QueryBuilders.boolQuery() + .should( + QueryBuilders.boolQuery().should(QueryBuilders.termsQuery(FIELD_NAME, parentUrn)))); + testQuery.should(QueryBuilders.boolQuery().should(QueryBuilders.existsQuery("someField"))); + testQuery.must( + QueryBuilders.boolQuery() + .must(QueryBuilders.boolQuery().must(QueryBuilders.termsQuery(FIELD_NAME, parentUrn)))); + testQuery.must(QueryBuilders.boolQuery().must(QueryBuilders.existsQuery("someField"))); + testQuery.mustNot( + QueryBuilders.boolQuery() + .mustNot( + QueryBuilders.boolQuery() + .mustNot(QueryBuilders.termsQuery(FIELD_NAME, parentUrn)))); + testQuery.mustNot(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery("someField"))); + + BoolQueryBuilder expectedRewrite = QueryBuilders.boolQuery(); + expectedRewrite.filter( + QueryBuilders.boolQuery() + .filter( + QueryBuilders.boolQuery() + .filter(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.filter( + QueryBuilders.boolQuery().filter(QueryBuilders.existsQuery("someField"))); + expectedRewrite.should( + QueryBuilders.boolQuery() + .should( + QueryBuilders.boolQuery() + .should(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.should( + QueryBuilders.boolQuery().should(QueryBuilders.existsQuery("someField"))); + expectedRewrite.must( + QueryBuilders.boolQuery() + .must( + QueryBuilders.boolQuery() + .must(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.must(QueryBuilders.boolQuery().must(QueryBuilders.existsQuery("someField"))); + expectedRewrite.mustNot( + QueryBuilders.boolQuery() + .mustNot( + QueryBuilders.boolQuery() + .mustNot(QueryBuilders.termsQuery(FIELD_NAME, childUrn, parentUrn)))); + expectedRewrite.mustNot( + QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery("someField"))); + + assertEquals( + test.rewrite( + opContext, + QueryFilterRewriterContext.builder() + .condition(Condition.DESCENDANTS_INCL) + .searchType(QueryFilterRewriterSearchType.FULLTEXT_SEARCH) + .queryFilterRewriteChain(mock(QueryFilterRewriteChain.class)) + .build(false), + testQuery), + expectedRewrite, + "Expected rewrite of nested and pass through of other fields."); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java index 9376552f7abc55..572d79ebf2f0ce 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java @@ -10,6 +10,7 @@ import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.config.search.custom.QueryConfiguration; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.AutocompleteRequestHandler; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; @@ -33,7 +34,9 @@ public class AutocompleteRequestHandlerTest { private AutocompleteRequestHandler handler = AutocompleteRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), CustomSearchConfiguration.builder().build()); + TestEntitySpecBuilder.getSpec(), + CustomSearchConfiguration.builder().build(), + QueryFilterRewriteChain.EMPTY); private OperationContext mockOpContext = TestOperationContexts.systemContextNoSearchAuthorization(mock(EntityRegistry.class)); @@ -170,7 +173,8 @@ public void testCustomConfigWithDefault() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build()); + .build(), + QueryFilterRewriteChain.EMPTY); SearchRequest autocompleteRequest = withoutDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -195,7 +199,8 @@ public void testCustomConfigWithDefault() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build()); + .build(), + QueryFilterRewriteChain.EMPTY); autocompleteRequest = withDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); sourceBuilder = autocompleteRequest.source(); @@ -237,7 +242,8 @@ public void testCustomConfigWithInheritedQueryFunctionScores() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build()); + .build(), + QueryFilterRewriteChain.EMPTY); SearchRequest autocompleteRequest = withInherit.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -275,7 +281,8 @@ public void testCustomConfigWithInheritedQueryFunctionScores() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build()); + .build(), + QueryFilterRewriteChain.EMPTY); autocompleteRequest = noQueryCustomization.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -337,7 +344,8 @@ public void testCustomConfigWithFunctionScores() { Map.of( "deprecated", Map.of("value", false))))))) .build())) - .build()); + .build(), + QueryFilterRewriteChain.EMPTY); SearchRequest autocompleteRequest = explicitNoInherit.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -389,7 +397,8 @@ public void testCustomConfigWithFunctionScores() { Map.of( "deprecated", Map.of("value", false))))))) .build())) - .build()); + .build(), + QueryFilterRewriteChain.EMPTY); autocompleteRequest = explicit.getSearchRequest(mockOpContext, "input", null, null, 10); sourceBuilder = autocompleteRequest.source(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index 1cd9a274463d30..7da0a14f212799 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.search.query.request; import static com.linkedin.metadata.utils.SearchUtil.*; +import static org.mockito.Mockito.mock; import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; @@ -17,6 +18,7 @@ import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.search.config.SearchCommonTestConfiguration; @@ -55,6 +57,14 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { private OperationContext operationContext; public static SearchConfiguration testQueryConfig; + public static List validHighlightingFields = List.of("urn", "foreignKey"); + public static StringArray customHighlightFields = + new StringArray( + List.of( + validHighlightingFields.get(0), + validHighlightingFields.get(1), + "notExistingField", + "")); static { testQueryConfig = new SearchConfiguration(); @@ -86,7 +96,8 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { public void testDatasetFieldsAndHighlights() { EntitySpec entitySpec = operationContext.getEntityRegistry().getEntitySpec("dataset"); SearchRequestHandler datasetHandler = - SearchRequestHandler.getBuilder(entitySpec, testQueryConfig, null); + SearchRequestHandler.getBuilder( + entitySpec, testQueryConfig, null, QueryFilterRewriteChain.EMPTY); /* Ensure efficient query performance, we do not expect upstream/downstream/fineGrained lineage @@ -102,10 +113,41 @@ public void testDatasetFieldsAndHighlights() { "unexpected lineage fields in highlights: " + highlightFields); } + @Test + public void testCustomHighlights() { + EntitySpec entitySpec = operationContext.getEntityRegistry().getEntitySpec("dataset"); + SearchRequestHandler requestHandler = + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), + testQueryConfig, + null, + mock(QueryFilterRewriteChain.class)); + SearchRequest searchRequest = + requestHandler.getSearchRequest( + operationContext.withSearchFlags( + flags -> + flags.setFulltext(false).setCustomHighlightingFields(customHighlightFields)), + "testQuery", + null, + null, + 0, + 10, + null); + SearchSourceBuilder sourceBuilder = searchRequest.source(); + assertNotNull(sourceBuilder.highlighter()); + assertEquals(4, sourceBuilder.highlighter().fields().size()); + assertTrue( + sourceBuilder.highlighter().fields().stream() + .map(HighlightBuilder.Field::name) + .toList() + .containsAll(validHighlightingFields)); + } + @Test public void testSearchRequestHandlerHighlightingTurnedOff() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, QueryFilterRewriteChain.EMPTY); SearchRequest searchRequest = requestHandler.getSearchRequest( operationContext.withSearchFlags( @@ -145,7 +187,8 @@ public void testSearchRequestHandlerHighlightingTurnedOff() { @Test public void testSearchRequestHandler() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, QueryFilterRewriteChain.EMPTY); SearchRequest searchRequest = requestHandler.getSearchRequest( operationContext.withSearchFlags( @@ -208,7 +251,8 @@ public void testSearchRequestHandler() { @Test public void testAggregationsInSearch() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, QueryFilterRewriteChain.EMPTY); final String nestedAggString = String.format("_entityType%stextFieldOverride", AGGREGATION_SEPARATOR_CHAR); SearchRequest searchRequest = @@ -276,7 +320,8 @@ public void testAggregationsInSearch() { public void testFilteredSearch() { final SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, QueryFilterRewriteChain.EMPTY); final BoolQueryBuilder testQuery = constructFilterQuery(requestHandler, false); @@ -632,7 +677,8 @@ public void testBrowsePathQueryFilter() { SearchRequestHandler.getFilterQuery( operationContext.withSearchFlags(flags -> flags.setFulltext(false)), filter, - new HashMap<>()); + new HashMap<>(), + QueryFilterRewriteChain.EMPTY); assertEquals(test.should().size(), 1); @@ -655,7 +701,8 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { .setAnd(new CriterionArray(ImmutableList.of(filterCriterion))))); final SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); + SearchRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), testQueryConfig, null, QueryFilterRewriteChain.EMPTY); return (BoolQueryBuilder) requestHandler diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/TestSearchFieldConfig.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/TestSearchFieldConfig.java similarity index 94% rename from metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/TestSearchFieldConfig.java rename to metadata-io/src/test/java/com/linkedin/metadata/search/query/request/TestSearchFieldConfig.java index 062298796dd7c7..61dcc5a9b49753 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/TestSearchFieldConfig.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/TestSearchFieldConfig.java @@ -1,8 +1,9 @@ -package com.linkedin.metadata.search.elasticsearch.query.request; +package com.linkedin.metadata.search.query.request; import com.linkedin.metadata.models.SearchableRefFieldSpec; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig; import java.util.Optional; import java.util.Set; import org.junit.jupiter.api.Assertions; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java index def14f9be7054a..48b1fb99d4e6d8 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java @@ -26,7 +26,7 @@ import com.linkedin.metadata.models.SearchableRefFieldSpec; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.search.elasticsearch.query.request.TestSearchFieldConfig; +import com.linkedin.metadata.search.query.request.TestSearchFieldConfig; import com.linkedin.r2.RemoteInvocationException; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RetrieverContext; @@ -85,6 +85,7 @@ public void testTransform() throws IOException { assertEquals(parsedJson.get("feature2").asInt(), 1); JsonNode browsePathV2 = (JsonNode) parsedJson.get("browsePathV2"); assertEquals(browsePathV2.asText(), "␟levelOne␟levelTwo"); + assertEquals(parsedJson.get("esObjectField").get("key3").asText(), ""); } @Test @@ -125,7 +126,8 @@ public void testTransformMaxFieldValue() throws IOException { assertEquals( parsedJson.get("customProperties"), JsonNodeFactory.instance.arrayNode().add("shortValue=123")); - assertEquals(parsedJson.get("esObjectField"), JsonNodeFactory.instance.arrayNode().add("123")); + assertEquals( + parsedJson.get("esObjectField"), JsonNodeFactory.instance.arrayNode().add("123").add("")); searchDocumentTransformer = new SearchDocumentTransformer(1000, 1000, 20); snapshot = TestEntityUtil.getSnapshot(); @@ -149,6 +151,7 @@ public void testTransformMaxFieldValue() throws IOException { .add("value1") .add("value2") .add("123") + .add("") .add("0123456789")); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java index bbc494159a498f..94241ec5e89b01 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java @@ -15,8 +15,10 @@ import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.r2.RemoteInvocationException; import com.linkedin.structured.StructuredPropertyDefinition; +import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URISyntaxException; import java.util.HashMap; @@ -87,7 +89,11 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"terms\" : {\n" @@ -108,7 +114,11 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { result = ESUtils.getQueryBuilderFromCriterion( - multiValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + multiValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); expected = "{\n" + " \"terms\" : {\n" @@ -130,7 +140,11 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { result = ESUtils.getQueryBuilderFromCriterion( - timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); + timeseriesField, + true, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); expected = "{\n" + " \"terms\" : {\n" @@ -152,7 +166,11 @@ public void testGetQueryBuilderFromCriterionContain() { QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + mock(QueryFilterRewriteChain.class)); String expected = "{\n" @@ -175,7 +193,11 @@ public void testGetQueryBuilderFromCriterionContain() { result = ESUtils.getQueryBuilderFromCriterion( - multiValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + multiValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + mock(QueryFilterRewriteChain.class)); expected = "{\n" @@ -218,7 +240,11 @@ public void testWildcardQueryBuilderFromCriterionWhenStartsWith() { QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + mock(QueryFilterRewriteChain.class)); String expected = "{\n" @@ -241,7 +267,11 @@ public void testWildcardQueryBuilderFromCriterionWhenStartsWith() { result = ESUtils.getQueryBuilderFromCriterion( - multiValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + multiValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + mock(QueryFilterRewriteChain.class)); expected = "{\n" @@ -281,7 +311,11 @@ public void testWildcardQueryBuilderFromCriterionWhenEndsWith() { QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + mock(QueryFilterRewriteChain.class)); String expected = "{\n" @@ -303,7 +337,11 @@ public void testWildcardQueryBuilderFromCriterionWhenEndsWith() { result = ESUtils.getQueryBuilderFromCriterion( - multiValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + multiValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + mock(QueryFilterRewriteChain.class)); expected = "{\n" @@ -343,7 +381,11 @@ public void testGetQueryBuilderFromCriterionExists() { QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"bool\" : {\n" @@ -368,7 +410,11 @@ public void testGetQueryBuilderFromCriterionExists() { result = ESUtils.getQueryBuilderFromCriterion( - timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); + timeseriesField, + true, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); expected = "{\n" + " \"bool\" : {\n" @@ -395,7 +441,11 @@ public void testGetQueryBuilderFromCriterionIsNull() { QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"bool\" : {\n" @@ -420,7 +470,11 @@ public void testGetQueryBuilderFromCriterionIsNull() { result = ESUtils.getQueryBuilderFromCriterion( - timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); + timeseriesField, + true, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); expected = "{\n" + " \"bool\" : {\n" @@ -453,7 +507,11 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { // Ensure that the query is expanded! QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); + singleValueCriterion, + false, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"bool\" : {\n" @@ -493,7 +551,11 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { // Ensure that the query is expanded without keyword. result = ESUtils.getQueryBuilderFromCriterion( - timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); + timeseriesField, + true, + new HashMap<>(), + mock(OperationContext.class), + QueryFilterRewriteChain.EMPTY); expected = "{\n" + " \"bool\" : {\n" @@ -535,9 +597,11 @@ public void testGetQueryBuilderFromStructPropEqualsValue() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1"))); + OperationContext opContext = mock(OperationContext.class); + when(opContext.getAspectRetriever()).thenReturn(aspectRetriever); QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), aspectRetriever); + singleValueCriterion, false, new HashMap<>(), opContext, QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"terms\" : {\n" @@ -560,9 +624,15 @@ public void testGetQueryBuilderFromStructPropEqualsValueV1() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1"))); + OperationContext opContextV1 = mock(OperationContext.class); + when(opContextV1.getAspectRetriever()).thenReturn(aspectRetrieverV1); QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), aspectRetrieverV1); + singleValueCriterion, + false, + new HashMap<>(), + opContextV1, + QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"terms\" : {\n" @@ -581,9 +651,11 @@ public void testGetQueryBuilderFromStructPropExists() { final Criterion singleValueCriterion = new Criterion().setField("structuredProperties.ab.fgh.ten").setCondition(Condition.EXISTS); + OperationContext opContext = mock(OperationContext.class); + when(opContext.getAspectRetriever()).thenReturn(aspectRetriever); QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), aspectRetriever); + singleValueCriterion, false, new HashMap<>(), opContext, QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"bool\" : {\n" @@ -608,7 +680,7 @@ public void testGetQueryBuilderFromStructPropExists() { result = ESUtils.getQueryBuilderFromCriterion( - timeseriesField, true, new HashMap<>(), aspectRetriever); + timeseriesField, true, new HashMap<>(), opContext, QueryFilterRewriteChain.EMPTY); expected = "{\n" + " \"bool\" : {\n" @@ -633,9 +705,15 @@ public void testGetQueryBuilderFromStructPropExistsV1() { final Criterion singleValueCriterion = new Criterion().setField("structuredProperties.ab.fgh.ten").setCondition(Condition.EXISTS); + OperationContext opContextV1 = mock(OperationContext.class); + when(opContextV1.getAspectRetriever()).thenReturn(aspectRetrieverV1); QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( - singleValueCriterion, false, new HashMap<>(), aspectRetrieverV1); + singleValueCriterion, + false, + new HashMap<>(), + opContextV1, + QueryFilterRewriteChain.EMPTY); String expected = "{\n" + " \"bool\" : {\n" @@ -660,7 +738,7 @@ public void testGetQueryBuilderFromStructPropExistsV1() { result = ESUtils.getQueryBuilderFromCriterion( - timeseriesField, true, new HashMap<>(), aspectRetrieverV1); + timeseriesField, true, new HashMap<>(), opContextV1, QueryFilterRewriteChain.EMPTY); expected = "{\n" + " \"bool\" : {\n" diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGeneratorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGeneratorTest.java index ef7b818c593010..f90f23cfe51789 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGeneratorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGeneratorTest.java @@ -5,6 +5,7 @@ import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.timeline.data.ChangeEvent; +import com.linkedin.metadata.timeline.data.dataset.SchemaFieldModificationCategory; import com.linkedin.mxe.SystemMetadata; import com.linkedin.restli.internal.server.util.DataMapUtils; import com.linkedin.schema.SchemaField; @@ -42,6 +43,17 @@ private static void compareDescriptions( assertEquals(expectedDescriptions, actualDescriptions); } + private static void compareModificationCategories( + Set expectedCategories, List actual) { + Set actualModificationCategories = new HashSet<>(); + actual.forEach( + changeEvent -> { + actualModificationCategories.add( + changeEvent.getParameters().get("modificationCategory").toString()); + }); + assertEquals(expectedCategories, actualModificationCategories); + } + private static Aspect getSchemaMetadata(List schemaFieldList) { return new Aspect<>( new SchemaMetadata().setFields(new SchemaFieldArray(schemaFieldList)), @@ -70,7 +82,8 @@ public void testNativeSchemaBackwardIncompatibleChange() throws Exception { Set.of( "A backwards incompatible change due to native datatype of the field 'ID' changed from 'NUMBER(16,1)' to 'NUMBER(10,1)'."), actual); - + compareModificationCategories( + Set.of(SchemaFieldModificationCategory.TYPE_CHANGE.toString()), actual); List actual2 = test.getChangeEvents(urn, entity, aspect, to, from, auditStamp); // Test single field going from NUMBER(10,1) -> NUMBER(16,1) assertEquals(1, actual2.size()); @@ -78,6 +91,8 @@ public void testNativeSchemaBackwardIncompatibleChange() throws Exception { Set.of( "A backwards incompatible change due to native datatype of the field 'ID' changed from 'NUMBER(10,1)' to 'NUMBER(16,1)'."), actual2); + compareModificationCategories( + Set.of(SchemaFieldModificationCategory.TYPE_CHANGE.toString()), actual); } @Test @@ -104,6 +119,11 @@ public void testNativeSchemaFieldAddition() throws Exception { "A backwards incompatible change due to native datatype of the field 'ID' changed from 'NUMBER(16,1)' to 'NUMBER(10,1)'.", "A forwards & backwards compatible change due to the newly added field 'aa'."), actual); + compareModificationCategories( + Set.of( + SchemaFieldModificationCategory.TYPE_CHANGE.toString(), + SchemaFieldModificationCategory.OTHER.toString()), + actual); } @Test @@ -127,6 +147,8 @@ public void testSchemaFieldRename() throws Exception { "A forwards & backwards compatible change due to renaming of the field 'ID to ID2'."), actual); assertEquals(1, actual.size()); + compareModificationCategories( + Set.of(SchemaFieldModificationCategory.RENAME.toString()), actual); } @Test diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index 6c650e725fd5cf..15597132289b2b 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -37,6 +37,7 @@ import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.utils.QueryUtils; import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService; @@ -148,7 +149,8 @@ private ElasticSearchTimeseriesAspectService buildService() { opContext.getEntityRegistry(), opContext.getSearchContext().getIndexConvention()), getBulkProcessor(), - 1); + 1, + QueryFilterRewriteChain.EMPTY); } /* diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java index b77902d34b2d74..db9d8b450ef7a6 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java @@ -5,6 +5,7 @@ import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.NumericNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService; @@ -39,7 +40,11 @@ public class TimeseriesAspectServiceUnitTest { private final RestClient _restClient = mock(RestClient.class); private final TimeseriesAspectService _timeseriesAspectService = new ElasticSearchTimeseriesAspectService( - _searchClient, _timeseriesAspectIndexBuilders, _bulkProcessor, 0); + _searchClient, + _timeseriesAspectIndexBuilders, + _bulkProcessor, + 0, + QueryFilterRewriteChain.EMPTY); private final OperationContext opContext = TestOperationContexts.systemContextNoSearchAuthorization(_indexConvention); diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java index 20f6084f953299..781201f3478f98 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java @@ -28,6 +28,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import com.linkedin.metadata.search.ranker.SearchRanker; @@ -74,6 +75,8 @@ public class SampleDataFixtureConfiguration { @Autowired private CustomSearchConfiguration _customSearchConfiguration; + @Autowired private QueryFilterRewriteChain queryFilterRewriteChain; + @Bean(name = "sampleDataPrefix") protected String sampleDataPrefix() { return "smpldat"; @@ -197,9 +200,14 @@ protected ElasticSearchService entitySearchServiceHelper(EntityIndexBuilders ind false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, _searchConfiguration, - customSearchConfiguration); + customSearchConfiguration, + queryFilterRewriteChain); ESBrowseDAO browseDAO = - new ESBrowseDAO(_searchClient, _searchConfiguration, _customSearchConfiguration); + new ESBrowseDAO( + _searchClient, + _searchConfiguration, + _customSearchConfiguration, + queryFilterRewriteChain); ESWriteDAO writeDAO = new ESWriteDAO(_searchClient, _bulkProcessor, 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); } diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java index 71ccaa1d325010..918463ec59b363 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java @@ -30,6 +30,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import com.linkedin.metadata.search.ranker.SearchRanker; @@ -132,16 +133,19 @@ protected EntityIndexBuilders entityIndexBuilders( @Bean(name = "searchLineageEntitySearchService") protected ElasticSearchService entitySearchService( - @Qualifier("searchLineageEntityIndexBuilders") EntityIndexBuilders indexBuilders) { + @Qualifier("searchLineageEntityIndexBuilders") EntityIndexBuilders indexBuilders, + final QueryFilterRewriteChain queryFilterRewriteChain) { ESSearchDAO searchDAO = new ESSearchDAO( searchClient, false, ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH, searchConfiguration, - null); + null, + queryFilterRewriteChain); ESBrowseDAO browseDAO = - new ESBrowseDAO(searchClient, searchConfiguration, customSearchConfiguration); + new ESBrowseDAO( + searchClient, searchConfiguration, customSearchConfiguration, queryFilterRewriteChain); ESWriteDAO writeDAO = new ESWriteDAO(searchClient, bulkProcessor, 1); return new ElasticSearchService(indexBuilders, searchDAO, browseDAO, writeDAO); diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java index 20fb8c38325043..547ab1d746dbe7 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchCommonTestConfiguration.java @@ -7,6 +7,7 @@ import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.WordGramConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; import org.springframework.boot.test.context.TestConfiguration; @@ -55,4 +56,9 @@ public CustomSearchConfiguration customSearchConfiguration() throws Exception { public OperationContext queryOperationContext() { return TestOperationContexts.systemContextNoSearchAuthorization(); } + + @Bean + public QueryFilterRewriteChain queryFilterRewriteChain() { + return QueryFilterRewriteChain.EMPTY; + } } diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java index 68768051eccad0..b34bb5bd0e0a81 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java @@ -40,7 +40,8 @@ "com.linkedin.metadata.dao.producer", "com.linkedin.gms.factory.change", "com.datahub.event.hook", - "com.linkedin.gms.factory.notifications" + "com.linkedin.gms.factory.notifications", + "com.linkedin.gms.factory.search.filter" }) public class MCLSpringCommonTestConfiguration { diff --git a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java index 60d9c7496dfcb5..2f3f35697e476c 100644 --- a/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java +++ b/metadata-jobs/mce-consumer/src/main/java/com/linkedin/metadata/kafka/MetadataChangeProposalsProcessor.java @@ -4,8 +4,11 @@ import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.Timer; import com.linkedin.entity.client.SystemEntityClient; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.entityclient.RestliEntityClientFactory; import com.linkedin.metadata.EventUtils; +import com.linkedin.metadata.dao.throttle.ThrottleControl; +import com.linkedin.metadata.dao.throttle.ThrottleSensor; import com.linkedin.metadata.kafka.config.MetadataChangeProposalProcessorCondition; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.mxe.FailedMetadataChangeProposal; @@ -13,7 +16,9 @@ import com.linkedin.mxe.Topics; import io.datahubproject.metadata.context.OperationContext; import java.io.IOException; +import java.util.Optional; import javax.annotation.Nonnull; +import javax.annotation.PostConstruct; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.avro.generic.GenericRecord; @@ -22,11 +27,14 @@ import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.clients.producer.ProducerRecord; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Conditional; import org.springframework.context.annotation.Import; import org.springframework.kafka.annotation.EnableKafka; import org.springframework.kafka.annotation.KafkaListener; +import org.springframework.kafka.config.KafkaListenerEndpointRegistry; +import org.springframework.kafka.listener.MessageListenerContainer; import org.springframework.stereotype.Component; @Slf4j @@ -36,11 +44,19 @@ @EnableKafka @RequiredArgsConstructor public class MetadataChangeProposalsProcessor { + private static final String CONSUMER_GROUP_ID_VALUE = + "${METADATA_CHANGE_PROPOSAL_KAFKA_CONSUMER_GROUP_ID:generic-mce-consumer-job-client}"; private final OperationContext systemOperationContext; private final SystemEntityClient entityClient; private final Producer kafkaProducer; + @Qualifier("kafkaThrottle") + private final ThrottleSensor kafkaThrottle; + + private final KafkaListenerEndpointRegistry registry; + private final ConfigurationProvider provider; + private final Histogram kafkaLagStats = MetricUtils.get().histogram(MetricRegistry.name(this.getClass(), "kafkaLag")); @@ -50,8 +66,47 @@ public class MetadataChangeProposalsProcessor { + "}") private String fmcpTopicName; + @Value(CONSUMER_GROUP_ID_VALUE) + private String mceConsumerGroupId; + + @PostConstruct + public void registerConsumerThrottle() { + if (kafkaThrottle != null + && provider + .getMetadataChangeProposal() + .getThrottle() + .getComponents() + .getMceConsumer() + .isEnabled()) { + log.info("MCE Consumer Throttle Enabled"); + kafkaThrottle.addCallback( + (throttleEvent) -> { + Optional container = + Optional.ofNullable(registry.getListenerContainer(mceConsumerGroupId)); + if (container.isEmpty()) { + log.warn( + "Expected container was missing: {} throttle is not possible.", + mceConsumerGroupId); + } else { + if (throttleEvent.isThrottled()) { + container.ifPresent(MessageListenerContainer::pause); + return ThrottleControl.builder() + // resume consumer after sleep + .callback( + (resumeEvent) -> container.ifPresent(MessageListenerContainer::resume)) + .build(); + } + } + + return ThrottleControl.NONE; + }); + } else { + log.info("MCE Consumer Throttle Disabled"); + } + } + @KafkaListener( - id = "${METADATA_CHANGE_PROPOSAL_KAFKA_CONSUMER_GROUP_ID:generic-mce-consumer-job-client}", + id = CONSUMER_GROUP_ID_VALUE, topics = "${METADATA_CHANGE_PROPOSAL_TOPIC_NAME:" + Topics.METADATA_CHANGE_PROPOSAL + "}", containerFactory = "kafkaEventConsumer") public void consume(final ConsumerRecord consumerRecord) { diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceRunEvent.pdl b/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceRunEvent.pdl index d9850c82442bf6..c18a4168a2a76a 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceRunEvent.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceRunEvent.pdl @@ -15,6 +15,9 @@ import com.linkedin.common.Urn record DataProcessInstanceRunEvent includes TimeseriesAspectBase, ExternalReference { @TimeseriesField = {} + @Searchable = { + "hasValuesFieldName": "hasRunEvents" + } status: enum DataProcessRunStatus { /** * The status where the Data processing run is in. diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl index 355a8bb7a5cb34..a3d2067ae5db25 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/SearchFlags.pdl @@ -48,4 +48,14 @@ record SearchFlags { * include restricted entities in results (default is to filter) */ includeRestricted:optional boolean = false + + /** + * Include mentioned fields inside elastic highlighting query + */ + customHighlightingFields:optional array[string] + + /** + * invoke query rewrite chain for filters based on configured rewriters + */ + rewriteQuery: optional boolean = true } diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl index 0578fd6e7c5e78..a79055ea3db547 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/query/filter/Condition.pdl @@ -59,4 +59,19 @@ enum Condition { * Represent the relation: String field starts with value, e.g. name starts with PageView */ START_WITH + + /** + * Represent the relation: URN field any nested children in addition to the given URN + */ + DESCENDANTS_INCL + + /** + * Represent the relation: URN field matches any nested parent in addition to the given URN + */ + ANCESTORS_INCL + + /** + * Represent the relation: URN field matches any nested child or parent in addition to the given URN + */ + RELATED_INCL } \ No newline at end of file diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/AuthorizationContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/AuthorizationContext.java new file mode 100644 index 00000000000000..1390fd53a2b930 --- /dev/null +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/AuthorizationContext.java @@ -0,0 +1,62 @@ +package io.datahubproject.metadata.context; + +import com.datahub.authorization.AuthorizationRequest; +import com.datahub.authorization.AuthorizationResult; +import com.datahub.authorization.EntitySpec; +import com.datahub.plugins.auth.authorization.Authorizer; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; + +@Getter +@Builder +public class AuthorizationContext implements ContextInterface { + + public static final AuthorizationContext EMPTY = + AuthorizationContext.builder().authorizer(Authorizer.EMPTY).build(); + + @Nonnull private final Authorizer authorizer; + + @Builder.Default + private final ConcurrentHashMap + sessionAuthorizationCache = new ConcurrentHashMap<>(); + + /** + * Run authorization through the actor's session cache + * + * @param actorContext the actor context + * @param privilege privilege + * @param resourceSpec resource to access + * @return authorization result + */ + public AuthorizationResult authorize( + @Nonnull ActorContext actorContext, + @Nonnull final String privilege, + @Nullable final EntitySpec resourceSpec) { + final AuthorizationRequest request = + new AuthorizationRequest( + actorContext.getActorUrn().toString(), privilege, Optional.ofNullable(resourceSpec)); + // Graphql CompletableFutures causes a recursive exception, we avoid computeIfAbsent and do work + // outside a blocking function + AuthorizationResult result = sessionAuthorizationCache.get(request); + if (result == null) { + result = authorizer.authorize(request); + sessionAuthorizationCache.putIfAbsent(request, result); + } + return result; + } + + /** + * No need to consider the authorizer in the cache context since it is ultimately determined by + * the underlying search context and actor context + * + * @return + */ + @Override + public Optional getCacheKeyComponent() { + return Optional.empty(); + } +} diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/AuthorizerContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/AuthorizerContext.java deleted file mode 100644 index fdd84f6d64557d..00000000000000 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/AuthorizerContext.java +++ /dev/null @@ -1,28 +0,0 @@ -package io.datahubproject.metadata.context; - -import com.datahub.plugins.auth.authorization.Authorizer; -import java.util.Optional; -import javax.annotation.Nonnull; -import lombok.Builder; -import lombok.Getter; - -@Builder -@Getter -public class AuthorizerContext implements ContextInterface { - - public static final AuthorizerContext EMPTY = - AuthorizerContext.builder().authorizer(Authorizer.EMPTY).build(); - - @Nonnull private final Authorizer authorizer; - - /** - * No need to consider the authorizer in the cache context since it is ultimately determined by - * the underlying search context - * - * @return - */ - @Override - public Optional getCacheKeyComponent() { - return Optional.empty(); - } -} diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java index 9928318268a3ea..be5ac921fcb2d9 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java @@ -1,6 +1,9 @@ package io.datahubproject.metadata.context; import com.datahub.authentication.Authentication; +import com.datahub.authorization.AuthorizationResult; +import com.datahub.authorization.AuthorizationSession; +import com.datahub.authorization.EntitySpec; import com.datahub.plugins.auth.authorization.Authorizer; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableSet; @@ -36,7 +39,7 @@ */ @Builder(toBuilder = true) @Getter -public class OperationContext { +public class OperationContext implements AuthorizationSession { /** * This should be the primary entry point when a request is made to Rest.li, OpenAPI, Graphql or @@ -66,10 +69,8 @@ public static OperationContext asSession( systemOperationContext.getOperationContextConfig().toBuilder() .allowSystemAuthentication(allowSystemAuthentication) .build()) - .authorizerContext(AuthorizerContext.builder().authorizer(authorizer).build()) + .authorizationContext(AuthorizationContext.builder().authorizer(authorizer).build()) .requestContext(requestContext) - // Initialize view authorization for user viewable urn tracking - .viewAuthorizationContext(ViewAuthorizationContext.builder().build()) .build(sessionAuthentication); } @@ -157,7 +158,7 @@ public static OperationContext asSystem( .entityRegistryContext(EntityRegistryContext.builder().build(entityRegistry)) .servicesRegistryContext(servicesRegistryContext) // Authorizer.EMPTY doesn't actually apply to system auth - .authorizerContext(AuthorizerContext.builder().authorizer(Authorizer.EMPTY).build()) + .authorizationContext(AuthorizationContext.builder().authorizer(Authorizer.EMPTY).build()) .retrieverContext(retrieverContext) .objectMapperContext(objectMapperContext) .build(systemAuthentication); @@ -167,11 +168,10 @@ public static OperationContext asSystem( @Nonnull private final ActorContext sessionActorContext; @Nullable private final ActorContext systemActorContext; @Nonnull private final SearchContext searchContext; - @Nonnull private final AuthorizerContext authorizerContext; + @Nonnull private final AuthorizationContext authorizationContext; @Nonnull private final EntityRegistryContext entityRegistryContext; @Nullable private final ServicesRegistryContext servicesRegistryContext; @Nullable private final RequestContext requestContext; - @Nullable private final ViewAuthorizationContext viewAuthorizationContext; @Nullable private final RetrieverContext retrieverContext; @Nonnull private final ObjectMapperContext objectMapperContext; @@ -237,7 +237,7 @@ public ActorContext getActorContext() { * @return */ public Collection getActorPeers() { - return authorizerContext.getAuthorizer().getActorPeers(sessionActorContext.getActorUrn()); + return authorizationContext.getAuthorizer().getActorPeers(sessionActorContext.getActorUrn()); } /** @@ -278,10 +278,6 @@ public AuditStamp getAuditStamp() { return getAuditStamp(null); } - public Optional getViewAuthorizationContext() { - return Optional.ofNullable(viewAuthorizationContext); - } - public Optional getRetrieverContext() { return Optional.ofNullable(retrieverContext); } @@ -295,6 +291,19 @@ public Optional getAspectRetrieverOpt() { return getRetrieverContext().map(RetrieverContext::getAspectRetriever); } + /** + * Provides a cached authorizer interface in the context of the session user + * + * @param privilege the requested privilege + * @param resourceSpec the optional resource that is the target of the privilege + * @return authorization result + */ + @Override + public AuthorizationResult authorize( + @Nonnull String privilege, @Nullable EntitySpec resourceSpec) { + return authorizationContext.authorize(getSessionActorContext(), privilege, resourceSpec); + } + /** * Return a unique id for this context. Typically useful for building cache keys. We combine the * different context components to create a single string representation of the hashcode across @@ -309,7 +318,7 @@ public String getGlobalContextId() { return String.valueOf( ImmutableSet.builder() .add(getOperationContextConfig()) - .add(getAuthorizerContext()) + .add(getAuthorizationContext()) .add(getSessionActorContext()) .add(getSearchContext()) .add( @@ -321,10 +330,6 @@ public String getGlobalContextId() { ? EmptyContext.EMPTY : getServicesRegistryContext()) .add(getRequestContext() == null ? EmptyContext.EMPTY : getRequestContext()) - .add( - getViewAuthorizationContext().isPresent() - ? getViewAuthorizationContext().get() - : EmptyContext.EMPTY) .add( getRetrieverContext().isPresent() ? getRetrieverContext().get() @@ -411,8 +416,8 @@ public OperationContext build(@Nonnull Authentication sessionAuthentication) { .getAuthentication() .getActor() .equals(sessionAuthentication.getActor())) - .policyInfoSet(this.authorizerContext.getAuthorizer().getActorPolicies(actorUrn)) - .groupMembership(this.authorizerContext.getAuthorizer().getActorGroups(actorUrn)) + .policyInfoSet(this.authorizationContext.getAuthorizer().getActorPolicies(actorUrn)) + .groupMembership(this.authorizationContext.getAuthorizer().getActorGroups(actorUrn)) .build(); return build(sessionActor); } @@ -424,11 +429,10 @@ public OperationContext build(@Nonnull ActorContext sessionActor) { sessionActor, this.systemActorContext, Objects.requireNonNull(this.searchContext), - Objects.requireNonNull(this.authorizerContext), + Objects.requireNonNull(this.authorizationContext), this.entityRegistryContext, this.servicesRegistryContext, this.requestContext, - this.viewAuthorizationContext, this.retrieverContext, this.objectMapperContext != null ? this.objectMapperContext diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RequestContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RequestContext.java index 1eee0498f112a6..779c418a56142f 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RequestContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RequestContext.java @@ -76,6 +76,11 @@ public RequestContext buildGraphql( return build(); } + public RequestContext buildRestli( + @Nonnull String actorUrn, @Nullable ResourceContext resourceContext, String action) { + return buildRestli(actorUrn, resourceContext, action, (String) null); + } + public RequestContext buildRestli( @Nonnull String actorUrn, @Nullable ResourceContext resourceContext, diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ViewAuthorizationContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ViewAuthorizationContext.java deleted file mode 100644 index 5204d7bf5f98f4..00000000000000 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ViewAuthorizationContext.java +++ /dev/null @@ -1,38 +0,0 @@ -package io.datahubproject.metadata.context; - -import com.linkedin.common.urn.Urn; -import java.util.Collection; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import javax.annotation.Nonnull; -import lombok.Builder; -import lombok.Getter; - -@Getter -@Builder -public class ViewAuthorizationContext implements ContextInterface { - - /** - * Graphql has a lot of redundant `canView` authorization checks, to reduce the repeated checks - * for view authorization, maintain a list of urns that have already been identified as viewable - * for the request. - */ - @Nonnull @Builder.Default private Set viewableUrns = ConcurrentHashMap.newKeySet(); - - public boolean canView(@Nonnull Collection urns) { - if (urns.isEmpty()) { - return false; - } - return viewableUrns.containsAll(urns); - } - - public void addViewableUrns(@Nonnull Collection urns) { - viewableUrns.addAll(urns); - } - - @Override - public Optional getCacheKeyComponent() { - return Optional.empty(); - } -} diff --git a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestAuthSession.java b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestAuthSession.java new file mode 100644 index 00000000000000..d8be3e95efbc4b --- /dev/null +++ b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestAuthSession.java @@ -0,0 +1,44 @@ +package io.datahubproject.test.metadata.context; + +import com.datahub.authentication.Authentication; +import com.datahub.authorization.AuthorizationRequest; +import com.datahub.authorization.AuthorizationResult; +import com.datahub.authorization.AuthorizationSession; +import com.datahub.authorization.EntitySpec; +import com.datahub.plugins.auth.authorization.Authorizer; +import java.util.Optional; +import java.util.function.BiFunction; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class TestAuthSession implements AuthorizationSession { + public static AuthorizationSession ALLOW_ALL = + from((priv, authorizer) -> new AuthorizationResult(null, AuthorizationResult.Type.ALLOW, "")); + + public static AuthorizationSession from(Authentication auth, Authorizer authorizer) { + return from( + (privilege, resourceSpec) -> { + final AuthorizationRequest request = + new AuthorizationRequest( + auth.getActor().toUrnStr(), privilege, Optional.ofNullable(resourceSpec)); + return authorizer.authorize(request); + }); + } + + public static AuthorizationSession from( + BiFunction authFunction) { + return new TestAuthSession(authFunction); + } + + private final BiFunction authFunction; + + public TestAuthSession(BiFunction authFunction) { + this.authFunction = authFunction; + } + + @Override + public AuthorizationResult authorize( + @Nonnull String privilege, @Nullable EntitySpec resourceSpec) { + return authFunction.apply(privilege, resourceSpec); + } +} diff --git a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java index 76f58fb4751085..cdcbb540eeda43 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java @@ -259,6 +259,12 @@ public static OperationContext userContextNoSearchAuthorization( .asSession(RequestContext.TEST, authorizer, sessionAuthorization); } + public static OperationContext userContextNoSearchAuthorization( + @Nonnull RequestContext requestContext) { + return systemContextNoSearchAuthorization(defaultEntityRegistry()) + .asSession(requestContext, Authorizer.EMPTY, TEST_USER_AUTH); + } + @Builder public static class EmptyAspectRetriever implements AspectRetriever { private final Supplier entityRegistrySupplier; diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java index 28cc9304bf913f..cc96429c65e76b 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java @@ -4,6 +4,7 @@ import com.linkedin.metadata.config.cache.CacheConfiguration; import com.linkedin.metadata.config.kafka.KafkaConfiguration; import com.linkedin.metadata.config.search.ElasticSearchConfiguration; +import com.linkedin.metadata.config.search.SearchServiceConfiguration; import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import lombok.Data; @@ -37,6 +38,9 @@ public class DataHubAppConfiguration { /** ElasticSearch configurations */ private ElasticSearchConfiguration elasticSearch; + /* Search Service configurations */ + private SearchServiceConfiguration searchService; + /** System Update configurations */ private SystemUpdateConfiguration systemUpdate; diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/MetadataChangeProposalConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/MetadataChangeProposalConfig.java index f988758beee363..4e8c18912c40ea 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/MetadataChangeProposalConfig.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/MetadataChangeProposalConfig.java @@ -14,10 +14,30 @@ public class MetadataChangeProposalConfig { @Accessors(chain = true) public static class ThrottlesConfig { Integer updateIntervalMs; + ComponentsThrottleConfig components; ThrottleConfig versioned; ThrottleConfig timeseries; } + @Data + @Accessors(chain = true) + public static class ComponentsThrottleConfig { + MceConsumerThrottleConfig mceConsumer; + ApiRequestsThrottleConfig apiRequests; + } + + @Data + @Accessors(chain = true) + public static class MceConsumerThrottleConfig { + boolean enabled; + } + + @Data + @Accessors(chain = true) + public static class ApiRequestsThrottleConfig { + boolean enabled; + } + @Data @Accessors(chain = true) public static class ThrottleConfig { diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/QueryFilterRewriterConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/QueryFilterRewriterConfiguration.java new file mode 100644 index 00000000000000..f8b62d3c4bb7fb --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/QueryFilterRewriterConfiguration.java @@ -0,0 +1,24 @@ +package com.linkedin.metadata.config.search; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +public class QueryFilterRewriterConfiguration { + + private ExpansionRewriterConfiguration containerExpansion; + private ExpansionRewriterConfiguration domainExpansion; + + @NoArgsConstructor + @AllArgsConstructor + @Data + public static class ExpansionRewriterConfiguration { + public static final ExpansionRewriterConfiguration DEFAULT = + new ExpansionRewriterConfiguration(false, 100, 100); + + boolean enabled; + private int pageSize; + private int limit; + } +} diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/SearchServiceConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/SearchServiceConfiguration.java new file mode 100644 index 00000000000000..6c6d397b3fec10 --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/SearchServiceConfiguration.java @@ -0,0 +1,8 @@ +package com.linkedin.metadata.config.search; + +import lombok.Data; + +@Data +public class SearchServiceConfiguration { + private QueryFilterRewriterConfiguration queryFilterRewriter; +} diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index 0977c64d0e8609..45a98b472b0aee 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -108,6 +108,15 @@ searchService: cache: hazelcast: serviceName: ${SEARCH_SERVICE_HAZELCAST_SERVICE_NAME:hazelcast-service} + queryFilterRewriter: + containerExpansion: + enabled: ${SEARCH_SERVICE_FILTER_CONTAINER_EXPANSION_ENABLED:true} + pageSize: ${SEARCH_SERVICE_FILTER_CONTAINER_EXPANSION_PAGE_SIZE:100} + limit: ${SEARCH_SERVICE_FILTER_CONTAINER_EXPANSION_LIMIT:100} + domainExpansion: + enabled: ${SEARCH_SERVICE_FILTER_DOMAIN_EXPANSION_ENABLED:true} + pageSize: ${SEARCH_SERVICE_FILTER_DOMAIN_EXPANSION_PAGE_SIZE:100} + limit: ${SEARCH_SERVICE_FILTER_DOMAIN_EXPANSION_LIMIT:100} configEntityRegistry: path: ${ENTITY_REGISTRY_CONFIG_PATH:../../metadata-models/src/main/resources/entity-registry.yml} @@ -387,7 +396,15 @@ systemUpdate: batchSize: ${SYSTEM_UPDATE_SCHEMA_FIELDS_DOC_IDS_BATCH_SIZE:500} delayMs: ${SYSTEM_UPDATE_SCHEMA_FIELDS_DOC_IDS_DELAY_MS:5000} limit: ${SYSTEM_UPDATE_SCHEMA_FIELDS_DOC_IDS_LIMIT:0} - + processInstanceHasRunEvents: + enabled: ${SYSTEM_UPDATE_PROCESS_INSTANCE_HAS_RUN_EVENTS_ENABLED:true} + batchSize: ${SYSTEM_UPDATE_PROCESS_INSTANCE_HAS_RUN_EVENTS_BATCH_SIZE:100} + delayMs: ${SYSTEM_UPDATE_PROCESS_INSTANCE_HAS_RUN_EVENTS_DELAY_MS:1000} + totalDays: ${SYSTEM_UPDATE_PROCESS_INSTANCE_HAS_RUN_EVENTS_TOTAL_DAYS:90} + windowDays: ${SYSTEM_UPDATE_PROCESS_INSTANCE_HAS_RUN_EVENTS_WINDOW_DAYS:1} + reprocess: + enabled: ${SYSTEM_UPDATE_PROCESS_INSTANCE_HAS_RUN_EVENTS_REPROCESS:false} + structuredProperties: enabled: ${ENABLE_STRUCTURED_PROPERTIES_HOOK:true} # applies structured properties mappings writeEnabled: ${ENABLE_STRUCTURED_PROPERTIES_WRITE:true} # write structured property values @@ -477,6 +494,9 @@ cache: status: 20 corpUserCredentials: 20 corpUserSettings: 20 + roleMembership: 20 + groupMembership: 20 + nativeGroupMembership: 20 structuredProperty: status: 300 # 5 min propertyDefinition: 300 # 5 min @@ -523,18 +543,27 @@ metadataChangeProposal: throttle: updateIntervalMs: ${MCP_THROTTLE_UPDATE_INTERVAL_MS:60000} - # Versioned MCL topic + # What component is throttled + components: + mceConsumer: + enabled: ${MCP_MCE_CONSUMER_THROTTLE_ENABLED:false} + apiRequests: + enabled: ${MCP_API_REQUESTS_THROTTLE_ENABLED:false} + + # How is it throttled + # Versioned MCL topic settings versioned: - # Whether to throttle MCP processing based on MCL backlog + # Whether to monitor MCL versioned backlog enabled: ${MCP_VERSIONED_THROTTLE_ENABLED:false} threshold: ${MCP_VERSIONED_THRESHOLD:4000} # throttle threshold maxAttempts: ${MCP_VERSIONED_MAX_ATTEMPTS:1000} initialIntervalMs: ${MCP_VERSIONED_INITIAL_INTERVAL_MS:100} multiplier: ${MCP_VERSIONED_MULTIPLIER:10} maxIntervalMs: ${MCP_VERSIONED_MAX_INTERVAL_MS:30000} - # Timeseries MCL topic + + # Timeseries MCL topic settings timeseries: - # Whether to throttle MCP processing based on MCL backlog + # Whether to monitor MCL timeseries backlog enabled: ${MCP_TIMESERIES_THROTTLE_ENABLED:false} threshold: ${MCP_TIMESERIES_THRESHOLD:4000} # throttle threshold maxAttempts: ${MCP_TIMESERIES_MAX_ATTEMPTS:1000} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/CacheConfig.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/CacheConfig.java index 185e1e3ae624c4..383716a80cc60a 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/CacheConfig.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/common/CacheConfig.java @@ -4,12 +4,18 @@ import com.hazelcast.config.Config; import com.hazelcast.config.EvictionConfig; import com.hazelcast.config.EvictionPolicy; +import com.hazelcast.config.InMemoryFormat; import com.hazelcast.config.MapConfig; import com.hazelcast.config.MaxSizePolicy; +import com.hazelcast.config.MergePolicyConfig; +import com.hazelcast.config.ReplicatedMapConfig; import com.hazelcast.core.Hazelcast; import com.hazelcast.core.HazelcastInstance; +import com.hazelcast.spi.merge.LatestUpdateMergePolicy; import com.hazelcast.spring.cache.HazelcastCacheManager; +import java.util.List; import java.util.concurrent.TimeUnit; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.cache.CacheManager; @@ -19,6 +25,7 @@ @Configuration public class CacheConfig { + public static final String THROTTLE_MAP = "distributedThrottle"; @Value("${cache.primary.ttlSeconds:600}") private int cacheTtlSeconds; @@ -45,23 +52,15 @@ private Caffeine caffeineCacheBuilder() { .recordStats(); } - @Bean + @Bean("hazelcastInstance") @ConditionalOnProperty(name = "searchService.cacheImplementation", havingValue = "hazelcast") - public CacheManager hazelcastCacheManager() { + public HazelcastInstance hazelcastInstance( + List hazelcastMapConfigs, + List hazelcastReplicatedMapConfigs) { Config config = new Config(); - // TODO: This setting is equivalent to expireAfterAccess, refreshes timer after a get, put, - // containsKey etc. - // is this behavior what we actually desire? Should we change it now? - MapConfig mapConfig = new MapConfig().setMaxIdleSeconds(cacheTtlSeconds); - EvictionConfig evictionConfig = - new EvictionConfig() - .setMaxSizePolicy(MaxSizePolicy.PER_NODE) - .setSize(cacheMaxSize) - .setEvictionPolicy(EvictionPolicy.LFU); - mapConfig.setEvictionConfig(evictionConfig); - mapConfig.setName("default"); - config.addMapConfig(mapConfig); + hazelcastMapConfigs.forEach(config::addMapConfig); + hazelcastReplicatedMapConfigs.forEach(config::addReplicatedMapConfig); // Force classloader to load from application code config.setClassLoader(this.getClass().getClassLoader()); @@ -74,8 +73,44 @@ public CacheManager hazelcastCacheManager() { .setEnabled(true) .setProperty("service-dns", hazelcastServiceName); - HazelcastInstance hazelcastInstance = Hazelcast.newHazelcastInstance(config); + return Hazelcast.newHazelcastInstance(config); + } + @Bean + @ConditionalOnProperty(name = "searchService.cacheImplementation", havingValue = "hazelcast") + public CacheManager hazelcastCacheManager( + @Qualifier("hazelcastInstance") final HazelcastInstance hazelcastInstance) { return new HazelcastCacheManager(hazelcastInstance); } + + @Bean + @ConditionalOnProperty(name = "searchService.cacheImplementation", havingValue = "hazelcast") + public MapConfig defaultMapConfig() { + // TODO: This setting is equivalent to expireAfterAccess, refreshes timer after a get, put, + // containsKey etc. + // is this behavior what we actually desire? Should we change it now? + MapConfig mapConfig = new MapConfig().setMaxIdleSeconds(cacheTtlSeconds); + + EvictionConfig evictionConfig = + new EvictionConfig() + .setMaxSizePolicy(MaxSizePolicy.PER_NODE) + .setSize(cacheMaxSize) + .setEvictionPolicy(EvictionPolicy.LFU); + mapConfig.setEvictionConfig(evictionConfig); + mapConfig.setName("default"); + return mapConfig; + } + + @Bean + @ConditionalOnProperty(name = "searchService.cacheImplementation", havingValue = "hazelcast") + public ReplicatedMapConfig distributedThrottleMapConfig() { + ReplicatedMapConfig mapConfig = new ReplicatedMapConfig(); + mapConfig + .setName(THROTTLE_MAP) + .setInMemoryFormat(InMemoryFormat.OBJECT) + .setMergePolicyConfig( + new MergePolicyConfig().setPolicy(LatestUpdateMergePolicy.class.getName())); + + return mapConfig; + } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java index 51eea1578596bb..aa29908e415074 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java @@ -3,17 +3,21 @@ import com.linkedin.datahub.graphql.featureflags.FeatureFlags; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.dao.producer.KafkaEventProducer; +import com.linkedin.metadata.dao.throttle.ThrottleSensor; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; +import java.util.List; import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.DependsOn; +@Slf4j @Configuration public class EntityServiceFactory { @@ -26,17 +30,35 @@ public class EntityServiceFactory { protected EntityService createInstance( @Qualifier("kafkaEventProducer") final KafkaEventProducer eventProducer, @Qualifier("entityAspectDao") final AspectDao aspectDao, - final ConfigurationProvider configurationProvider, - @Value("${featureFlags.showBrowseV2}") final boolean enableBrowsePathV2) { + @Qualifier("configurationProvider") ConfigurationProvider configurationProvider, + @Value("${featureFlags.showBrowseV2}") final boolean enableBrowsePathV2, + final List throttleSensors) { FeatureFlags featureFlags = configurationProvider.getFeatureFlags(); - return new EntityServiceImpl( - aspectDao, - eventProducer, - featureFlags.isAlwaysEmitChangeLog(), - featureFlags.getPreProcessHooks(), - _ebeanMaxTransactionRetry, - enableBrowsePathV2); + EntityServiceImpl entityService = + new EntityServiceImpl( + aspectDao, + eventProducer, + featureFlags.isAlwaysEmitChangeLog(), + featureFlags.getPreProcessHooks(), + _ebeanMaxTransactionRetry, + enableBrowsePathV2); + + if (throttleSensors != null + && !throttleSensors.isEmpty() + && configurationProvider + .getMetadataChangeProposal() + .getThrottle() + .getComponents() + .getApiRequests() + .isEnabled()) { + log.info("API Requests Throttle Enabled"); + throttleSensors.forEach(sensor -> sensor.addCallback(entityService::handleThrottleEvent)); + } else { + log.info("API Requests Throttle Disabled"); + } + + return entityService; } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/throttle/ManualThrottleSensor.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/throttle/ManualThrottleSensor.java new file mode 100644 index 00000000000000..72505beb5b40e1 --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/throttle/ManualThrottleSensor.java @@ -0,0 +1,117 @@ +package com.linkedin.gms.factory.entity.throttle; + +import static com.linkedin.gms.factory.common.CacheConfig.THROTTLE_MAP; + +import com.hazelcast.core.EntryEvent; +import com.hazelcast.core.EntryListener; +import com.hazelcast.core.HazelcastInstance; +import com.hazelcast.map.MapEvent; +import com.hazelcast.replicatedmap.ReplicatedMap; +import com.linkedin.metadata.dao.throttle.ThrottleControl; +import com.linkedin.metadata.dao.throttle.ThrottleEvent; +import com.linkedin.metadata.dao.throttle.ThrottleSensor; +import com.linkedin.metadata.dao.throttle.ThrottleType; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import org.springframework.stereotype.Component; + +/** + * Uses the distributed cache to propagate a manual throttle event when GMS is run in a distributed + * mode. + */ +@Component +public class ManualThrottleSensor implements ThrottleSensor { + private static final ThrottleEvent ENABLE = + ThrottleEvent.builder().throttled(Map.of(ThrottleType.MANUAL, true)).build(); + private static final ThrottleEvent DISABLE = + ThrottleEvent.builder().throttled(Map.of(ThrottleType.MANUAL, false)).build(); + + /** A list of throttle event listeners to execute when throttling occurs and ceases */ + private final List> throttleCallbacks = + new ArrayList<>(); + + private final Set registeredThrottles = new HashSet<>(); + + @Nullable private final ReplicatedMap throttleState; + + public ManualThrottleSensor(@Nullable final HazelcastInstance hazelcastInstance) { + if (hazelcastInstance != null) { + throttleState = hazelcastInstance.getReplicatedMap(THROTTLE_MAP); + throttleState.addEntryListener( + ManualThrottleTypeListener.builder().manualThrottleSensor(this).build()); + } else { + throttleState = null; + } + } + + @Override + public ManualThrottleSensor addCallback(Function callback) { + throttleCallbacks.add(callback); + return this; + } + + public void setThrottle(boolean enabled) { + if (throttleState == null) { + // set local only + setLocalThrottle(enabled); + } else { + // set shared location for distribution + throttleState.put(ThrottleType.MANUAL.toString(), enabled ? "true" : "false"); + } + } + + private void setLocalThrottle(boolean enabled) { + synchronized (this) { + registeredThrottles.forEach(listener -> listener.execute(DISABLE)); + registeredThrottles.clear(); + + if (enabled) { + registeredThrottles.addAll( + throttleCallbacks.stream() + .map(listener -> listener.apply(ENABLE)) + .collect(Collectors.toSet())); + } + } + } + + @Builder + private record ManualThrottleTypeListener(@Nonnull ManualThrottleSensor manualThrottleSensor) + implements EntryListener { + @Override + public void entryAdded(EntryEvent event) { + if (ThrottleType.MANUAL.equals(ThrottleType.valueOf(event.getKey()))) { + manualThrottleSensor.setLocalThrottle(Boolean.parseBoolean(event.getValue())); + } + } + + @Override + public void entryUpdated(EntryEvent event) { + if (ThrottleType.MANUAL.equals(ThrottleType.valueOf(event.getKey()))) { + manualThrottleSensor.setLocalThrottle(Boolean.parseBoolean(event.getValue())); + } + } + + @Override + public void entryRemoved(EntryEvent event) {} + + @Override + public void entryEvicted(EntryEvent entryEvent) {} + + @Override + public void entryExpired(EntryEvent entryEvent) {} + + @Override + public void mapCleared(MapEvent mapEvent) {} + + @Override + public void mapEvicted(MapEvent mapEvent) {} + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaProducerThrottleFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaThrottleFactory.java similarity index 59% rename from metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaProducerThrottleFactory.java rename to metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaThrottleFactory.java index 1eaff82fd517f0..e2cdca8a065c03 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaProducerThrottleFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/throttle/KafkaThrottleFactory.java @@ -1,15 +1,16 @@ package com.linkedin.gms.factory.kafka.throttle; -import com.datahub.metadata.dao.producer.KafkaProducerThrottle; +import com.datahub.metadata.dao.throttle.KafkaThrottleSensor; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.config.MetadataChangeProposalConfig; import com.linkedin.metadata.config.kafka.KafkaConfiguration; +import com.linkedin.metadata.dao.throttle.NoOpSensor; +import com.linkedin.metadata.dao.throttle.ThrottleSensor; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.mxe.Topics; import java.util.Arrays; import java.util.HashMap; import java.util.Map; -import java.util.Optional; import lombok.extern.slf4j.Slf4j; import org.apache.kafka.clients.admin.AdminClient; import org.apache.kafka.clients.admin.AdminClientConfig; @@ -19,19 +20,14 @@ import org.springframework.boot.autoconfigure.kafka.KafkaProperties; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import org.springframework.kafka.config.KafkaListenerEndpointRegistry; -import org.springframework.kafka.listener.MessageListenerContainer; @Slf4j @Configuration -public class KafkaProducerThrottleFactory { +public class KafkaThrottleFactory { @Value("${METADATA_CHANGE_LOG_KAFKA_CONSUMER_GROUP_ID:generic-mae-consumer-job-client}") private String maeConsumerGroupId; - @Value("${METADATA_CHANGE_PROPOSAL_KAFKA_CONSUMER_GROUP_ID:generic-mce-consumer-job-client}") - private String mceConsumerGroupId; - @Value("${METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_VERSIONED + "}") private String versionedTopicName; @@ -39,41 +35,28 @@ public class KafkaProducerThrottleFactory { "${METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_TIMESERIES + "}") private String timeseriesTopicName; - @Bean - public KafkaProducerThrottle kafkaProducerThrottle( + @Bean("kafkaThrottle") + public ThrottleSensor kafkaThrottle( @Qualifier("configurationProvider") ConfigurationProvider provider, final KafkaProperties kafkaProperties, - final EntityRegistry entityRegistry, - final KafkaListenerEndpointRegistry registry) { + final EntityRegistry entityRegistry) { KafkaConfiguration kafkaConfiguration = provider.getKafka(); MetadataChangeProposalConfig mcpConfig = provider.getMetadataChangeProposal(); - return KafkaProducerThrottle.builder() - .entityRegistry(entityRegistry) - .kafkaAdmin(kafkaAdmin(kafkaConfiguration, kafkaProperties)) - .config(mcpConfig.getThrottle()) - .mclConsumerGroupId(maeConsumerGroupId) - .timeseriesTopicName(timeseriesTopicName) - .versionedTopicName(versionedTopicName) - .pauseConsumer( - (pause) -> { - Optional container = - Optional.ofNullable(registry.getListenerContainer(mceConsumerGroupId)); - if (container.isEmpty()) { - log.warn( - "Expected container was missing: {} throttling is not possible.", - mceConsumerGroupId); - } else { - if (pause) { - container.ifPresent(MessageListenerContainer::pause); - } else { - container.ifPresent(MessageListenerContainer::resume); - } - } - }) - .build() - .start(); + if (mcpConfig.getThrottle().getUpdateIntervalMs() > 0) { + return KafkaThrottleSensor.builder() + .entityRegistry(entityRegistry) + .kafkaAdmin(kafkaAdmin(kafkaConfiguration, kafkaProperties)) + .config(mcpConfig.getThrottle()) + .mclConsumerGroupId(maeConsumerGroupId) + .timeseriesTopicName(timeseriesTopicName) + .versionedTopicName(versionedTopicName) + .build() + .start(); + } else { + return new NoOpSensor(); + } } private static AdminClient kafkaAdmin( diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java index 7a40c474ace747..d921e20f722ccf 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java @@ -15,6 +15,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder; import com.linkedin.metadata.search.elasticsearch.query.ESBrowseDAO; import com.linkedin.metadata.search.elasticsearch.query.ESSearchDAO; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO; import java.io.IOException; import javax.annotation.Nonnull; @@ -55,7 +56,9 @@ public class ElasticSearchServiceFactory { @Bean(name = "elasticSearchService") @Nonnull - protected ElasticSearchService getInstance(final ConfigurationProvider configurationProvider) + protected ElasticSearchService getInstance( + final ConfigurationProvider configurationProvider, + final QueryFilterRewriteChain queryFilterRewriteChain) throws IOException { log.info("Search configuration: {}", configurationProvider.getElasticSearch().getSearch()); @@ -73,12 +76,16 @@ protected ElasticSearchService getInstance(final ConfigurationProvider configura configurationProvider.getFeatureFlags().isPointInTimeCreationEnabled(), elasticSearchConfiguration.getImplementation(), searchConfiguration, - customSearchConfiguration); + customSearchConfiguration, + queryFilterRewriteChain); return new ElasticSearchService( entityIndexBuilders, esSearchDAO, new ESBrowseDAO( - components.getSearchClient(), searchConfiguration, customSearchConfiguration), + components.getSearchClient(), + searchConfiguration, + customSearchConfiguration, + queryFilterRewriteChain), new ESWriteDAO( components.getSearchClient(), components.getBulkProcessor(), diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/filter/QueryFilterRewriterChainFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/filter/QueryFilterRewriterChainFactory.java new file mode 100644 index 00000000000000..2ddb0e6af92552 --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/filter/QueryFilterRewriterChainFactory.java @@ -0,0 +1,51 @@ +package com.linkedin.gms.factory.search.filter; + +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.search.elasticsearch.query.filter.ContainerExpansionRewriter; +import com.linkedin.metadata.search.elasticsearch.query.filter.DomainExpansionRewriter; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriter; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class QueryFilterRewriterChainFactory { + + @Bean + @ConditionalOnProperty( + name = "searchService.queryFilterRewriter.containerExpansion.enabled", + havingValue = "true") + public QueryFilterRewriter containerExpansionRewriter( + final ConfigurationProvider configurationProvider) { + return ContainerExpansionRewriter.builder() + .config( + configurationProvider + .getSearchService() + .getQueryFilterRewriter() + .getContainerExpansion()) + .build(); + } + + @Bean + @ConditionalOnProperty( + name = "searchService.queryFilterRewriter.domainExpansion.enabled", + havingValue = "true") + public QueryFilterRewriter domainExpansionRewriter( + final ConfigurationProvider configurationProvider) { + return DomainExpansionRewriter.builder() + .config( + configurationProvider.getSearchService().getQueryFilterRewriter().getDomainExpansion()) + .build(); + } + + @SuppressWarnings("OptionalUsedAsFieldOrParameterType") + @Bean + public QueryFilterRewriteChain queryFilterRewriteChain( + Optional> queryFilterRewriters) { + return new QueryFilterRewriteChain(queryFilterRewriters.orElse(Collections.emptyList())); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java index c68db9c3d3e5e5..e26de0e7301951 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java @@ -3,6 +3,7 @@ import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.timeseries.elastic.ElasticSearchTimeseriesAspectService; import com.linkedin.metadata.timeseries.elastic.indexbuilder.TimeseriesAspectIndexBuilders; import javax.annotation.Nonnull; @@ -25,12 +26,14 @@ public class ElasticSearchTimeseriesAspectServiceFactory { @Bean(name = "elasticSearchTimeseriesAspectService") @Nonnull - protected ElasticSearchTimeseriesAspectService getInstance() { + protected ElasticSearchTimeseriesAspectService getInstance( + final QueryFilterRewriteChain queryFilterRewriteChain) { return new ElasticSearchTimeseriesAspectService( components.getSearchClient(), new TimeseriesAspectIndexBuilders( components.getIndexBuilder(), entityRegistry, components.getIndexConvention()), components.getBulkProcessor(), - components.getNumRetries()); + components.getNumRetries(), + queryFilterRewriteChain); } } diff --git a/metadata-service/openapi-analytics-servlet/src/main/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImpl.java b/metadata-service/openapi-analytics-servlet/src/main/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImpl.java index dc6d4a33f936ea..8a52c545dc80c2 100644 --- a/metadata-service/openapi-analytics-servlet/src/main/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImpl.java +++ b/metadata-service/openapi-analytics-servlet/src/main/java/io/datahubproject/openapi/delegates/DatahubUsageEventsImpl.java @@ -15,6 +15,7 @@ import jakarta.servlet.http.HttpServletRequest; import java.util.List; import java.util.Objects; +import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.http.ResponseEntity; @@ -35,7 +36,6 @@ public class DatahubUsageEventsImpl implements DatahubUsageEventsApiDelegate { @Override public ResponseEntity raw(String body) { Authentication authentication = AuthenticationContext.getAuthentication(); - checkAnalyticsAuthorized(authentication); OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -44,15 +44,16 @@ public ResponseEntity raw(String body) { _authorizationChain, authentication, true); + checkAnalyticsAuthorized(opContext); return ResponseEntity.of( _searchService.raw(opContext, DATAHUB_USAGE_INDEX, body).map(Objects::toString)); } - private void checkAnalyticsAuthorized(Authentication authentication) { - if (!AuthUtil.isAPIAuthorized(authentication, _authorizationChain, ANALYTICS, READ)) { + private void checkAnalyticsAuthorized(@Nonnull OperationContext opContext) { + if (!AuthUtil.isAPIAuthorized(opContext, ANALYTICS, READ)) { throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to get analytics."); + opContext.getActorContext().getActorUrn() + " is unauthorized to get analytics."); } } } diff --git a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java index c5f87c3f1dced4..84f783ea53e36e 100644 --- a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java +++ b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java @@ -178,11 +178,6 @@ public ResponseEntity head(String urn) { try { Urn entityUrn = Urn.createFromString(urn); final Authentication auth = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - auth, _authorizationChain, EXISTS, List.of(entityUrn))) { - throw new UnauthorizedException( - auth.getActor().toUrnStr() + " is unauthorized to check existence of entities."); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -193,6 +188,11 @@ public ResponseEntity head(String urn) { auth, true); + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, EXISTS, List.of(entityUrn))) { + throw new UnauthorizedException( + auth.getActor().toUrnStr() + " is unauthorized to check existence of entities."); + } + if (_entityService.exists(opContext, entityUrn, true)) { return new ResponseEntity<>(HttpStatus.NO_CONTENT); } else { @@ -242,11 +242,6 @@ public ResponseEntity headAspect(String urn, String aspect) { Urn entityUrn = Urn.createFromString(urn); final Authentication auth = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - auth, _authorizationChain, EXISTS, List.of(entityUrn))) { - throw new UnauthorizedException( - auth.getActor().toUrnStr() + " is unauthorized to check existence of entities."); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -257,6 +252,11 @@ public ResponseEntity headAspect(String urn, String aspect) { auth, true); + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, EXISTS, List.of(entityUrn))) { + throw new UnauthorizedException( + auth.getActor().toUrnStr() + " is unauthorized to check existence of entities."); + } + if (_entityService.exists(opContext, entityUrn, aspect, true)) { return new ResponseEntity<>(HttpStatus.NO_CONTENT); } else { @@ -612,13 +612,6 @@ public ResponseEntity scroll( OpenApiEntitiesUtil.responseClassToEntitySpec(_entityRegistry, _respClazz); Authentication authentication = AuthenticationContext.getAuthentication(); - - if (!AuthUtil.isAPIAuthorizedEntityType( - authentication, _authorizationChain, READ, entitySpec.getName())) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to search entities."); - } - OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -629,6 +622,11 @@ public ResponseEntity scroll( authentication, true); + if (!AuthUtil.isAPIAuthorizedEntityType(opContext, READ, entitySpec.getName())) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to search entities."); + } + List sortCriteria = Optional.ofNullable(sort).orElse(Collections.singletonList("urn")).stream() .map( @@ -653,7 +651,7 @@ public ResponseEntity scroll( null, count); - if (!AuthUtil.isAPIAuthorizedResult(authentication, _authorizationChain, result)) { + if (!AuthUtil.isAPIAuthorizedResult(opContext, result)) { throw new UnauthorizedException( authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java index 0e9fcbe15b525b..dc4726900a1c31 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java @@ -1,11 +1,13 @@ package io.datahubproject.openapi; +import com.linkedin.metadata.dao.throttle.APIThrottleException; import io.datahubproject.openapi.exception.InvalidUrnException; import java.util.Map; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.ConversionNotSupportedException; import org.springframework.core.Ordered; import org.springframework.core.convert.ConversionFailedException; +import org.springframework.http.HttpHeaders; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.ControllerAdvice; @@ -30,4 +32,17 @@ public ResponseEntity handleConflict(RuntimeException ex) { public static ResponseEntity> handleUrnException(InvalidUrnException e) { return new ResponseEntity<>(Map.of("error", e.getMessage()), HttpStatus.BAD_REQUEST); } + + @ExceptionHandler(APIThrottleException.class) + public static ResponseEntity> handleThrottleException( + APIThrottleException e) { + + HttpHeaders headers = new HttpHeaders(); + if (e.getDurationMs() >= 0) { + headers.add(HttpHeaders.RETRY_AFTER, String.valueOf(e.getDurationSeconds())); + } + + return new ResponseEntity<>( + Map.of("error", e.getMessage()), headers, HttpStatus.TOO_MANY_REQUESTS); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java index 8d89417b292155..b7f52e61e92442 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java @@ -181,11 +181,6 @@ public ResponseEntity getEntities( EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityType(authentication, authorizationChain, READ, entityName)) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); - } - OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -196,6 +191,11 @@ public ResponseEntity getEntities( authentication, true); + if (!AuthUtil.isAPIAuthorizedEntityType(opContext, READ, entityName)) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); + } + List sortCriteria; if (!CollectionUtils.isEmpty(sortFields)) { sortCriteria = new ArrayList<>(); @@ -220,7 +220,7 @@ public ResponseEntity getEntities( null, count); - if (!AuthUtil.isAPIAuthorizedResult(authentication, authorizationChain, result)) { + if (!AuthUtil.isAPIAuthorizedResult(opContext, result)) { throw new UnauthorizedException( authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); } @@ -250,11 +250,6 @@ public ResponseEntity getEntity( Urn urn = validatedUrn(entityUrn); Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, READ, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -265,6 +260,11 @@ public ResponseEntity getEntity( authentication, true); + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, READ, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); + } + return buildEntityList( opContext, List.of(urn), @@ -290,11 +290,6 @@ public ResponseEntity headEntity( Urn urn = validatedUrn(entityUrn); Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, EXISTS, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + EXISTS + " entities."); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -305,6 +300,11 @@ public ResponseEntity headEntity( authentication, true); + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, EXISTS, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + EXISTS + " entities."); + } + return exists(opContext, urn, null, includeSoftDelete) ? ResponseEntity.noContent().build() : ResponseEntity.notFound().build(); @@ -327,11 +327,6 @@ public ResponseEntity getAspect( Urn urn = validatedUrn(entityUrn); Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, READ, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -342,6 +337,11 @@ public ResponseEntity getAspect( authentication, true); + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, READ, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); + } + final List resultList; if (version == 0) { resultList = buildEntityList(opContext, List.of(urn), Set.of(aspectName), withSystemMetadata); @@ -380,11 +380,6 @@ public ResponseEntity headAspect( Urn urn = validatedUrn(entityUrn); Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, EXISTS, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + EXISTS + " entities."); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -395,6 +390,11 @@ public ResponseEntity headAspect( authentication, true); + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, EXISTS, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + EXISTS + " entities."); + } + return exists(opContext, urn, lookupAspectSpec(urn, aspectName).getName(), includeSoftDelete) ? ResponseEntity.noContent().build() : ResponseEntity.notFound().build(); @@ -406,17 +406,13 @@ public ResponseEntity headAspect( public void deleteEntity( HttpServletRequest request, @PathVariable("entityName") String entityName, - @PathVariable("entityUrn") String entityUrn) + @PathVariable("entityUrn") String entityUrn, + @RequestParam(value = "aspects", required = false, defaultValue = "") Set aspects, + @RequestParam(value = "clear", required = false, defaultValue = "false") boolean clear) throws InvalidUrnException { - EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); Urn urn = validatedUrn(entityUrn); Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, DELETE, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + DELETE + " entities."); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -427,7 +423,31 @@ public void deleteEntity( authentication, true); - entityService.deleteUrn(opContext, urn); + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, DELETE, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + DELETE + " entities."); + } + + EntitySpec entitySpec = entityRegistry.getEntitySpec(urn.getEntityType()); + + if (clear) { + // remove all aspects, preserve entity by retaining key aspect + aspects = + entitySpec.getAspectSpecs().stream() + .map(AspectSpec::getName) + .filter(name -> !name.equals(entitySpec.getKeyAspectName())) + .collect(Collectors.toSet()); + } + + if (aspects == null || aspects.isEmpty() || aspects.contains(entitySpec.getKeyAspectName())) { + entityService.deleteUrn(opContext, urn); + } else { + aspects.stream() + .map(aspectName -> lookupAspectSpec(urn, aspectName).getName()) + .forEach( + aspectName -> + entityService.deleteAspect(opContext, entityUrn, aspectName, Map.of(), true)); + } } @Tag(name = "Generic Entities") @@ -443,13 +463,6 @@ public ResponseEntity> createEntity( throws InvalidUrnException, JsonProcessingException { Authentication authentication = AuthenticationContext.getAuthentication(); - - if (!AuthUtil.isAPIAuthorizedEntityType( - authentication, authorizationChain, CREATE, entityName)) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + CREATE + " entities."); - } - OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -460,6 +473,11 @@ public ResponseEntity> createEntity( authentication, true); + if (!AuthUtil.isAPIAuthorizedEntityType(opContext, CREATE, entityName)) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + CREATE + " entities."); + } + AspectsBatch batch = toMCPBatch(opContext, jsonEntityList, authentication.getActor()); Set results = entityService.ingestProposal(opContext, batch, async); @@ -482,11 +500,6 @@ public void deleteAspect( Urn urn = validatedUrn(entityUrn); Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, DELETE, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + DELETE + " entities."); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -497,6 +510,11 @@ public void deleteAspect( authentication, true); + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, DELETE, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + DELETE + " entities."); + } + entityService.deleteAspect( opContext, entityUrn, lookupAspectSpec(urn, aspectName).getName(), Map.of(), true); } @@ -521,12 +539,6 @@ public ResponseEntity createAspect( Urn urn = validatedUrn(entityUrn); EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); Authentication authentication = AuthenticationContext.getAuthentication(); - - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, CREATE, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + CREATE + " entities."); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -537,6 +549,11 @@ public ResponseEntity createAspect( authentication, true); + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, CREATE, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + CREATE + " entities."); + } + AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName); ChangeMCP upsert = toUpsertItem( @@ -586,11 +603,6 @@ public ResponseEntity patchAspect( Urn urn = validatedUrn(entityUrn); EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, UPDATE, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + UPDATE + " entities."); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -601,6 +613,11 @@ public ResponseEntity patchAspect( authentication, true); + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, UPDATE, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + UPDATE + " entities."); + } + AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName); RecordTemplate currentValue = entityService.getAspect(opContext, urn, aspectSpec.getName(), 0); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericRelationshipController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericRelationshipController.java index efc3d9375e09e7..44f2f8ea03643a 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericRelationshipController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericRelationshipController.java @@ -17,15 +17,19 @@ import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.search.utils.QueryUtils; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.openapi.exception.UnauthorizedException; import io.datahubproject.openapi.models.GenericScrollResult; import io.datahubproject.openapi.v2.models.GenericRelationship; import io.swagger.v3.oas.annotations.Operation; +import jakarta.servlet.http.HttpServletRequest; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; import java.util.stream.Stream; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.*; @@ -36,6 +40,10 @@ public abstract class GenericRelationshipController { @Autowired private ElasticSearchGraphService graphService; @Autowired private AuthorizerChain authorizationChain; + @Qualifier("systemOperationContext") + @Autowired + protected OperationContext systemOperationContext; + /** * Returns relationship edges by type * @@ -47,12 +55,26 @@ public abstract class GenericRelationshipController { @GetMapping(value = "/{relationshipType}", produces = MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Scroll relationships of the given type.") public ResponseEntity> getRelationshipsByType( + HttpServletRequest request, @PathVariable("relationshipType") String relationshipType, @RequestParam(value = "count", defaultValue = "10") Integer count, @RequestParam(value = "scrollId", required = false) String scrollId) { Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorized(authentication, authorizationChain, RELATIONSHIP, READ)) { + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + request, + "getRelationshipsByType", + List.of()), + authorizationChain, + authentication, + true); + + if (!AuthUtil.isAPIAuthorized(opContext, RELATIONSHIP, READ)) { throw new UnauthorizedException( authentication.getActor().toUrnStr() + " is unauthorized to " @@ -76,8 +98,7 @@ public ResponseEntity> getRelationships null); if (!AuthUtil.isAPIAuthorizedUrns( - authentication, - authorizationChain, + opContext, RELATIONSHIP, READ, result.getEntities().stream() @@ -114,6 +135,7 @@ public ResponseEntity> getRelationships @GetMapping(value = "/{entityName}/{entityUrn}", produces = MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Scroll relationships from a given entity.") public ResponseEntity> getRelationshipsByEntity( + HttpServletRequest request, @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn, @RequestParam(value = "relationshipType[]", required = false, defaultValue = "*") @@ -125,12 +147,21 @@ public ResponseEntity> getRelationships final RelatedEntitiesScrollResult result; Authentication authentication = AuthenticationContext.getAuthentication(); + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + request, + "getRelationshipsByEntity", + List.of()), + authorizationChain, + authentication, + true); + if (!AuthUtil.isAPIAuthorizedUrns( - authentication, - authorizationChain, - RELATIONSHIP, - READ, - List.of(UrnUtils.getUrn(entityUrn)))) { + opContext, RELATIONSHIP, READ, List.of(UrnUtils.getUrn(entityUrn)))) { throw new UnauthorizedException( authentication.getActor().toUrnStr() + " is unauthorized to " @@ -178,8 +209,7 @@ public ResponseEntity> getRelationships } if (!AuthUtil.isAPIAuthorizedUrns( - authentication, - authorizationChain, + opContext, RELATIONSHIP, READ, result.getEntities().stream() diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java index ea72bac73edf38..083e515d055f5d 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java @@ -99,12 +99,19 @@ public void initBinder(WebDataBinder binder) { @Tag(name = "ElasticSearchOperations") @GetMapping(path = "/getTaskStatus", produces = MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Get Task Status") - public ResponseEntity getTaskStatus(String task) { + public ResponseEntity getTaskStatus(HttpServletRequest request, String task) { Authentication authentication = AuthenticationContext.getAuthentication(); String actorUrnStr = authentication.getActor().toUrnStr(); - if (!AuthUtil.isAPIAuthorized( - authentication, authorizerChain, PoliciesConfig.GET_ES_TASK_STATUS_PRIVILEGE)) { + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder().buildOpenapi(actorUrnStr, request, "getTaskStatus", List.of()), + authorizerChain, + authentication, + true); + + if (!AuthUtil.isAPIAuthorized(opContext, PoliciesConfig.GET_ES_TASK_STATUS_PRIVILEGE)) { return ResponseEntity.status(HttpStatus.FORBIDDEN) .body(String.format(actorUrnStr + " is not authorized to get ElasticSearch task status")); } @@ -139,11 +146,6 @@ public ResponseEntity getIndexSizes(HttpServletRequest request) { Authentication authentication = AuthenticationContext.getAuthentication(); String actorUrnStr = authentication.getActor().toUrnStr(); - if (!AuthUtil.isAPIAuthorized( - authentication, authorizerChain, PoliciesConfig.GET_TIMESERIES_INDEX_SIZES_PRIVILEGE)) { - return ResponseEntity.status(HttpStatus.FORBIDDEN) - .body(String.format(actorUrnStr + " is not authorized to get timeseries index sizes")); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -152,6 +154,11 @@ public ResponseEntity getIndexSizes(HttpServletRequest request) { authentication, true); + if (!AuthUtil.isAPIAuthorized(opContext, PoliciesConfig.GET_TIMESERIES_INDEX_SIZES_PRIVILEGE)) { + return ResponseEntity.status(HttpStatus.FORBIDDEN) + .body(String.format(actorUrnStr + " is not authorized to get timeseries index sizes")); + } + List indexSizeResults = timeseriesAspectService.getIndexSizes(opContext); JSONObject j = new JSONObject(); @@ -230,12 +237,6 @@ public ResponseEntity explainSearchQuery( Authentication authentication = AuthenticationContext.getAuthentication(); String actorUrnStr = authentication.getActor().toUrnStr(); - - if (!AuthUtil.isAPIAuthorized( - authentication, authorizerChain, PoliciesConfig.ES_EXPLAIN_QUERY_PRIVILEGE)) { - log.error("{} is not authorized to get explain queries", actorUrnStr); - return ResponseEntity.status(HttpStatus.FORBIDDEN).body(null); - } OperationContext opContext = systemOperationContext .asSession( @@ -254,6 +255,11 @@ public ResponseEntity explainSearchQuery( } }); + if (!AuthUtil.isAPIAuthorized(opContext, PoliciesConfig.ES_EXPLAIN_QUERY_PRIVILEGE)) { + log.error("{} is not authorized to get explain queries", actorUrnStr); + return ResponseEntity.status(HttpStatus.FORBIDDEN).body(null); + } + ExplainResponse response = searchService.explain( opContext, @@ -339,11 +345,6 @@ public ResponseEntity explainSearchQueryDiff( Authentication authentication = AuthenticationContext.getAuthentication(); String actorUrnStr = authentication.getActor().toUrnStr(); - if (!AuthUtil.isAPIAuthorized( - authentication, authorizerChain, PoliciesConfig.ES_EXPLAIN_QUERY_PRIVILEGE)) { - log.error("{} is not authorized to get explain queries", actorUrnStr); - return ResponseEntity.status(HttpStatus.FORBIDDEN).body(null); - } OperationContext opContext = systemOperationContext .asSession( @@ -362,6 +363,11 @@ public ResponseEntity explainSearchQueryDiff( } }); + if (!AuthUtil.isAPIAuthorized(opContext, PoliciesConfig.ES_EXPLAIN_QUERY_PRIVILEGE)) { + log.error("{} is not authorized to get explain queries", actorUrnStr); + return ResponseEntity.status(HttpStatus.FORBIDDEN).body(null); + } + ExplainResponse responseA = searchService.explain( opContext, @@ -433,10 +439,6 @@ public ResponseEntity> restoreIndices( @RequestParam(required = false, name = "lePitEpochMs") @Nullable Long lePitEpochMs) { Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorized( - authentication, authorizerChain, PoliciesConfig.RESTORE_INDICES_PRIVILEGE)) { - return ResponseEntity.status(HttpStatus.FORBIDDEN).build(); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -447,6 +449,10 @@ public ResponseEntity> restoreIndices( authentication, true); + if (!AuthUtil.isAPIAuthorized(opContext, PoliciesConfig.RESTORE_INDICES_PRIVILEGE)) { + return ResponseEntity.status(HttpStatus.FORBIDDEN).build(); + } + RestoreIndicesArgs args = new RestoreIndicesArgs() .aspectName(aspectName) @@ -476,10 +482,6 @@ public ResponseEntity> restoreIndices( throws RemoteInvocationException, URISyntaxException { Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorized( - authentication, authorizerChain, PoliciesConfig.RESTORE_INDICES_PRIVILEGE)) { - return ResponseEntity.status(HttpStatus.FORBIDDEN).build(); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -490,6 +492,10 @@ public ResponseEntity> restoreIndices( authentication, true); + if (!AuthUtil.isAPIAuthorized(opContext, PoliciesConfig.RESTORE_INDICES_PRIVILEGE)) { + return ResponseEntity.status(HttpStatus.FORBIDDEN).build(); + } + return ResponseEntity.of( Optional.of( entityService.restoreIndices( diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/throttle/ThrottleController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/throttle/ThrottleController.java new file mode 100644 index 00000000000000..3c44d94428f428 --- /dev/null +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/throttle/ThrottleController.java @@ -0,0 +1,113 @@ +package io.datahubproject.openapi.operations.throttle; + +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationContext; +import com.datahub.authorization.AuthUtil; +import com.datahub.authorization.AuthorizerChain; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.gms.factory.entity.throttle.ManualThrottleSensor; +import com.linkedin.metadata.authorization.PoliciesConfig; +import com.linkedin.metadata.entity.EntityServiceImpl; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import jakarta.servlet.http.HttpServletRequest; +import java.util.List; +import java.util.Map; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.http.HttpStatus; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +@RestController +@RequestMapping("/operations/throttle") +@Slf4j +@Tag(name = "GMS Throttle Control", description = "An API for GMS throttle control.") +public class ThrottleController { + + private final OperationContext systemOperationContext; + private final AuthorizerChain authorizerChain; + private final EntityServiceImpl entityService; + private final ObjectMapper objectMapper; + private final ManualThrottleSensor manualThrottleSensor; + + public ThrottleController( + @Qualifier("systemOperationContext") OperationContext systemOperationContext, + EntityServiceImpl entityService, + AuthorizerChain authorizerChain, + ObjectMapper objectMapper, + ManualThrottleSensor manualThrottleSensor) { + this.systemOperationContext = systemOperationContext; + this.authorizerChain = authorizerChain; + this.entityService = entityService; + this.objectMapper = objectMapper; + this.manualThrottleSensor = manualThrottleSensor; + } + + @Tag(name = "API Requests") + @GetMapping(path = "/requests", produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Get API Requests Throttle") + public ResponseEntity> getManualAPIRequestsThrottle( + HttpServletRequest httpServletRequest) { + Authentication authentication = AuthenticationContext.getAuthentication(); + String actorUrnStr = authentication.getActor().toUrnStr(); + + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + actorUrnStr, httpServletRequest, "getManualAPIRequestsThrottle", List.of()), + authorizerChain, + authentication, + true); + + if (!AuthUtil.isAPIAuthorized(opContext, PoliciesConfig.MANAGE_SYSTEM_OPERATIONS_PRIVILEGE)) { + return ResponseEntity.status(HttpStatus.FORBIDDEN) + .body( + Map.of( + "error", + String.format(actorUrnStr + " is not authorized for system operations."))); + } + + return ResponseEntity.ok( + objectMapper.convertValue(entityService.getThrottleEvents(), new TypeReference<>() {})); + } + + @Tag(name = "API Requests") + @PostMapping(path = "/requests/manual", produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Set API Requests Manual Throttle") + public ResponseEntity> setAPIRequestManualThrottle( + HttpServletRequest httpServletRequest, @RequestParam(name = "enabled") boolean enabled) { + + Authentication authentication = AuthenticationContext.getAuthentication(); + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + httpServletRequest, + "getManualAPIRequestsThrottle", + List.of()), + authorizerChain, + authentication, + true); + + if (!AuthUtil.isAPIAuthorized(opContext, PoliciesConfig.MANAGE_SYSTEM_OPERATIONS_PRIVILEGE)) { + return ResponseEntity.status(HttpStatus.FORBIDDEN).build(); + } + + manualThrottleSensor.setThrottle(enabled); + + return getManualAPIRequestsThrottle(httpServletRequest); + } +} diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/entities/EntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/entities/EntitiesController.java index 99eede15629d20..03050868efdcab 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/entities/EntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/entities/EntitiesController.java @@ -115,10 +115,6 @@ public ResponseEntity getEntities( log.debug("GET ENTITIES {}", entityUrns); Authentication authentication = AuthenticationContext.getAuthentication(); String actorUrnStr = authentication.getActor().toUrnStr(); - - if (!AuthUtil.isAPIAuthorizedEntityUrns(authentication, _authorizerChain, READ, entityUrns)) { - throw new UnauthorizedException(actorUrnStr + " is unauthorized to get entities."); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -135,6 +131,10 @@ public ResponseEntity getEntities( authentication, true); + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, READ, entityUrns)) { + throw new UnauthorizedException(actorUrnStr + " is unauthorized to get entities."); + } + if (entityUrns.size() <= 0) { return ResponseEntity.ok(UrnResponseMap.builder().responses(Collections.emptyMap()).build()); } @@ -209,9 +209,7 @@ public ResponseEntity> postEntities( Ingest Authorization Checks */ List> exceptions = - isAPIAuthorized( - authentication, _authorizerChain, ENTITY, opContext.getEntityRegistry(), proposals) - .stream() + isAPIAuthorized(opContext, ENTITY, opContext.getEntityRegistry(), proposals).stream() .filter(p -> p.getSecond() != com.linkedin.restli.common.HttpStatus.S_200_OK.getCode()) .collect(Collectors.toList()); if (!exceptions.isEmpty()) { @@ -277,10 +275,6 @@ public ResponseEntity> deleteEntities( .map(UrnUtils::getUrn) .collect(Collectors.toSet()); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, _authorizerChain, DELETE, entityUrns)) { - throw new UnauthorizedException(actorUrnStr + " is unauthorized to delete entities."); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -294,6 +288,10 @@ public ResponseEntity> deleteEntities( authentication, true); + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, DELETE, entityUrns)) { + throw new UnauthorizedException(actorUrnStr + " is unauthorized to delete entities."); + } + if (!soft) { return ResponseEntity.ok( entityUrns.stream() diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/relationships/RelationshipsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/relationships/RelationshipsController.java index f91ebb61123f0c..d7baac3e10561a 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/relationships/RelationshipsController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/relationships/RelationshipsController.java @@ -16,6 +16,8 @@ import com.linkedin.metadata.graph.RelatedEntitiesResult; import com.linkedin.metadata.search.utils.QueryUtils; import com.linkedin.metadata.utils.metrics.MetricUtils; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.openapi.exception.UnauthorizedException; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Parameter; @@ -23,6 +25,7 @@ import io.swagger.v3.oas.annotations.media.Schema; import io.swagger.v3.oas.annotations.responses.ApiResponse; import io.swagger.v3.oas.annotations.tags.Tag; +import jakarta.servlet.http.HttpServletRequest; import java.net.URLDecoder; import java.nio.charset.Charset; import java.util.Arrays; @@ -58,6 +61,7 @@ public enum RelationshipDirection { } private static final int MAX_DOWNSTREAM_CNT = 200; + private final OperationContext systemOperationContext; private final GraphService _graphService; private final AuthorizerChain _authorizerChain; @@ -114,6 +118,7 @@ private RelatedEntitiesResult getRelatedEntities( content = @Content(schema = @Schema(implementation = RelatedEntitiesResult.class))) }) public ResponseEntity getRelationships( + HttpServletRequest request, @Parameter( name = "urn", required = true, @@ -158,8 +163,16 @@ public ResponseEntity getRelationships( Authentication authentication = AuthenticationContext.getAuthentication(); String actorUrnStr = authentication.getActor().toUrnStr(); - if (!AuthUtil.isAPIAuthorizedUrns( - authentication, _authorizerChain, RELATIONSHIP, READ, List.of(entityUrn))) { + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi(actorUrnStr, request, "getRelationships", entityUrn.getEntityType()), + _authorizerChain, + authentication, + true); + + if (!AuthUtil.isAPIAuthorizedUrns(opContext, RELATIONSHIP, READ, List.of(entityUrn))) { throw new UnauthorizedException(actorUrnStr + " is unauthorized to get relationships."); } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/timeline/TimelineControllerV1.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/timeline/TimelineControllerV1.java index 9456843a3d8105..30cdb632d54773 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/timeline/TimelineControllerV1.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v1/timeline/TimelineControllerV1.java @@ -14,8 +14,11 @@ import com.linkedin.metadata.timeline.TimelineService; import com.linkedin.metadata.timeline.data.ChangeCategory; import com.linkedin.metadata.timeline.data.ChangeTransaction; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.openapi.exception.UnauthorizedException; import io.swagger.v3.oas.annotations.tags.Tag; +import jakarta.servlet.http.HttpServletRequest; import java.net.URISyntaxException; import java.util.List; import java.util.Set; @@ -42,6 +45,7 @@ "An API for retrieving historical updates to entities and their related documentation.") public class TimelineControllerV1 { + private final OperationContext systemOperationContext; private final TimelineService _timelineService; private final AuthorizerChain _authorizerChain; @@ -60,6 +64,7 @@ public class TimelineControllerV1 { */ @GetMapping(path = "/{urn}", produces = MediaType.APPLICATION_JSON_VALUE) public ResponseEntity> getTimeline( + HttpServletRequest request, @PathVariable("urn") String rawUrn, @RequestParam(name = "startTime", defaultValue = "-1") long startTime, @RequestParam(name = "endTime", defaultValue = "0") long endTime, @@ -72,14 +77,23 @@ public ResponseEntity> getTimeline( Urn urn = Urn.createFromString(rawUrn); Authentication authentication = AuthenticationContext.getAuthentication(); String actorUrnStr = authentication.getActor().toUrnStr(); + + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi(actorUrnStr, request, "getTimeline", urn.getEntityType()), + _authorizerChain, + authentication, + true); + EntitySpec resourceSpec = new EntitySpec(urn.getEntityType(), rawUrn); DisjunctivePrivilegeGroup orGroup = new DisjunctivePrivilegeGroup( ImmutableList.of( new ConjunctivePrivilegeGroup( ImmutableList.of(PoliciesConfig.GET_TIMELINE_PRIVILEGE.getType())))); - if (restApiAuthorizationEnabled - && !AuthUtil.isAuthorized(_authorizerChain, actorUrnStr, orGroup, resourceSpec)) { + if (restApiAuthorizationEnabled && !AuthUtil.isAuthorized(opContext, orGroup, resourceSpec)) { throw new UnauthorizedException(actorUrnStr + " is unauthorized to edit entities."); } return ResponseEntity.ok( diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index 1207eb331b795e..d20acbee79b227 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -87,10 +87,6 @@ public ResponseEntity> g List urns = request.getUrns().stream().map(UrnUtils::getUrn).collect(Collectors.toList()); Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns(authentication, authorizationChain, READ, urns)) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -104,6 +100,11 @@ public ResponseEntity> g authentication, true); + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, READ, urns)) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); + } + return ResponseEntity.of( Optional.of( BatchGetUrnResponseV2.builder() diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/PlatformEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/PlatformEntitiesController.java index 87c72064ad7a77..6c99d972dde03f 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/PlatformEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/PlatformEntitiesController.java @@ -91,9 +91,7 @@ public ResponseEntity> postEntities( Ingest Authorization Checks */ List> exceptions = - isAPIAuthorized( - authentication, _authorizerChain, ENTITY, opContext.getEntityRegistry(), proposals) - .stream() + isAPIAuthorized(opContext, ENTITY, opContext.getEntityRegistry(), proposals).stream() .filter(p -> p.getSecond() != com.linkedin.restli.common.HttpStatus.S_200_OK.getCode()) .collect(Collectors.toList()); if (!exceptions.isEmpty()) { diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimelineControllerV2.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimelineControllerV2.java index 29b7de6ae5e8f7..f3d0d5188b1e7e 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimelineControllerV2.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimelineControllerV2.java @@ -14,8 +14,11 @@ import com.linkedin.metadata.timeline.TimelineService; import com.linkedin.metadata.timeline.data.ChangeCategory; import com.linkedin.metadata.timeline.data.ChangeTransaction; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.openapi.exception.UnauthorizedException; import io.swagger.v3.oas.annotations.tags.Tag; +import jakarta.servlet.http.HttpServletRequest; import java.net.URISyntaxException; import java.util.List; import java.util.Set; @@ -38,6 +41,7 @@ "An API for retrieving historical updates to entities and their related documentation.") public class TimelineControllerV2 { + private final OperationContext systemOperationContext; private final TimelineService _timelineService; private final AuthorizerChain _authorizerChain; @@ -56,6 +60,7 @@ public class TimelineControllerV2 { */ @GetMapping(path = "/{urn}", produces = MediaType.APPLICATION_JSON_VALUE) public ResponseEntity> getTimeline( + HttpServletRequest request, @PathVariable("urn") String rawUrn, @RequestParam(name = "startTime", defaultValue = "-1") long startTime, @RequestParam(name = "endTime", defaultValue = "0") long endTime, @@ -68,14 +73,23 @@ public ResponseEntity> getTimeline( Urn urn = Urn.createFromString(rawUrn); Authentication authentication = AuthenticationContext.getAuthentication(); String actorUrnStr = authentication.getActor().toUrnStr(); + + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi(actorUrnStr, request, "getTimeline", urn.getEntityType()), + _authorizerChain, + authentication, + true); + EntitySpec resourceSpec = new EntitySpec(urn.getEntityType(), rawUrn); DisjunctivePrivilegeGroup orGroup = new DisjunctivePrivilegeGroup( ImmutableList.of( new ConjunctivePrivilegeGroup( ImmutableList.of(PoliciesConfig.GET_TIMELINE_PRIVILEGE.getType())))); - if (restApiAuthorizationEnabled - && !AuthUtil.isAuthorized(_authorizerChain, actorUrnStr, orGroup, resourceSpec)) { + if (restApiAuthorizationEnabled && !AuthUtil.isAuthorized(opContext, orGroup, resourceSpec)) { throw new UnauthorizedException(actorUrnStr + " is unauthorized to edit entities."); } return ResponseEntity.ok( diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java index 4e8c0abcb0c227..3b896dc5000822 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java @@ -69,12 +69,6 @@ public ResponseEntity> getAspects( throws URISyntaxException { Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorized(authentication, authorizationChain, TIMESERIES, READ) - || !AuthUtil.isAPIAuthorizedEntityType( - authentication, authorizationChain, READ, entityName)) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " " + TIMESERIES); - } OperationContext opContext = OperationContext.asSession( systemOperationContext, @@ -85,6 +79,12 @@ public ResponseEntity> getAspects( authentication, true); + if (!AuthUtil.isAPIAuthorized(opContext, TIMESERIES, READ) + || !AuthUtil.isAPIAuthorizedEntityType(opContext, READ, entityName)) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " " + TIMESERIES); + } + AspectSpec aspectSpec = entityRegistry.getEntitySpec(entityName).getAspectSpec(aspectName); if (!aspectSpec.isTimeseries()) { throw new IllegalArgumentException("Only timeseries aspects are supported."); @@ -108,8 +108,7 @@ public ResponseEntity> getAspects( endTimeMillis); if (!AuthUtil.isAPIAuthorizedUrns( - authentication, - authorizationChain, + opContext, TIMESERIES, READ, result.getDocuments().stream() diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index f6f248be77c670..bc487e72b680ce 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -257,7 +257,17 @@ private static PathItem buildSingleEntityPath(final EntitySpec entity) { .in(NAME_PATH) .name("urn") .description("The entity's unique URN id.") - .schema(new Schema().type(TYPE_STRING)))) + .schema(new Schema().type(TYPE_STRING)), + new Parameter() + .in(NAME_QUERY) + .name("clear") + .description("Delete all aspects, preserving the entity's key aspect.") + .schema(new Schema().type(TYPE_BOOLEAN)._default(false)), + new Parameter() + .$ref( + String.format( + "#/components/parameters/%s", + aspectParameterName + MODEL_VERSION)))) .tags(List.of(entity.getName() + " Entity")) .responses(new ApiResponses().addApiResponse("200", successDeleteResponse)); @@ -507,13 +517,13 @@ private static Parameter buildParameterSchema( .items( new Schema() .type(TYPE_STRING) - ._enum(aspectNames) + ._enum(aspectNames.stream().sorted().toList()) ._default(aspectNames.stream().findFirst().orElse(null))); return new Parameter() .in(NAME_QUERY) .name("aspects") .explode(true) - .description("Aspects to include in response.") + .description("Aspects to include.") .example(aspectNames) .schema(schema); } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index fbc9bf2956cfd3..d7694f3aed9334 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -103,8 +103,7 @@ public ResponseEntity> getEntityBatch( authentication, true); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, READ, requestMap.keySet())) { + if (!AuthUtil.isAPIAuthorizedEntityUrns(opContext, READ, requestMap.keySet())) { throw new UnauthorizedException( authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); } diff --git a/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java b/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java index 3e352403c88bca..8b530b218532d0 100644 --- a/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java @@ -14,6 +14,7 @@ import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.TransactionContext; import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.event.EventProducer; import io.datahubproject.metadata.context.OperationContext; @@ -69,14 +70,14 @@ public void setup() OperationContext opContext = TestOperationContexts.systemContextNoSearchAuthorization(); AspectDao aspectDao = Mockito.mock(AspectDao.class); when(aspectDao.runInTransactionWithRetry( - ArgumentMatchers.>>any(), + ArgumentMatchers.>>any(), any(AspectsBatch.class), anyInt())) .thenAnswer( i -> List.of( - ((Function>) i.getArgument(0)) - .apply(Mockito.mock(Transaction.class)))); + ((Function>) i.getArgument(0)) + .apply(TransactionContext.empty(Mockito.mock(Transaction.class), 0)))); EventProducer mockEntityEventProducer = Mockito.mock(EventProducer.class); PreProcessHooks preProcessHooks = new PreProcessHooks(); diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java index 60425fc7e756ed..2dc915f4aaee79 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java @@ -1,5 +1,6 @@ package io.datahubproject.openapi.v3.controller; +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyMap; @@ -7,6 +8,9 @@ import static org.mockito.ArgumentMatchers.eq; import static org.mockito.ArgumentMatchers.nullable; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; import static org.testng.Assert.assertNotNull; @@ -26,6 +30,7 @@ import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; +import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortOrder; @@ -68,6 +73,7 @@ public class EntityControllerTest extends AbstractTestNGSpringContextTests { @Autowired private MockMvc mockMvc; @Autowired private SearchService mockSearchService; @Autowired private EntityService mockEntityService; + @Autowired private EntityRegistry entityRegistry; @Test public void initTest() { @@ -171,6 +177,57 @@ public void testSearchOrderPreserved() throws Exception { MockMvcResultMatchers.jsonPath("$.entities[2].urn").value(TEST_URNS.get(0).toString())); } + @Test + public void testDeleteEntity() throws Exception { + Urn TEST_URN = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,4,PROD)"); + + // test delete entity + mockMvc + .perform( + MockMvcRequestBuilders.delete(String.format("/v3/entity/dataset/%s", TEST_URN)) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()); + + // test delete entity by aspect key + mockMvc + .perform( + MockMvcRequestBuilders.delete(String.format("/v3/entity/dataset/%s", TEST_URN)) + .param("aspects", "datasetKey") + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()); + + verify(mockEntityService, times(2)).deleteUrn(any(), eq(TEST_URN)); + + // test delete entity by non-key aspect + reset(mockEntityService); + mockMvc + .perform( + MockMvcRequestBuilders.delete(String.format("/v3/entity/dataset/%s", TEST_URN)) + .param("aspects", "status") + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()); + verify(mockEntityService, times(1)) + .deleteAspect(any(), eq(TEST_URN.toString()), eq("status"), anyMap(), eq(true)); + + // test delete entity clear + reset(mockEntityService); + mockMvc + .perform( + MockMvcRequestBuilders.delete(String.format("/v3/entity/dataset/%s", TEST_URN)) + .param("clear", "true") + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()); + + entityRegistry.getEntitySpec(DATASET_ENTITY_NAME).getAspectSpecs().stream() + .map(AspectSpec::getName) + .filter(aspectName -> !"datasetKey".equals(aspectName)) + .forEach( + aspectName -> + verify(mockEntityService) + .deleteAspect( + any(), eq(TEST_URN.toString()), eq(aspectName), anyMap(), eq(true))); + } + @TestConfiguration public static class EntityControllerTestConfig { @MockBean public EntityServiceImpl entityService; diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json index c4532cba9e6be3..e8cc193f3458d0 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.analytics.analytics.snapshot.json @@ -56,9 +56,11 @@ "type" : "enum", "name" : "Condition", "doc" : "The matching condition in a filter criterion", - "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH" ], + "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbolDocs" : { + "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", + "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", @@ -68,6 +70,7 @@ "IS_NULL" : "Represent the relation: field is null, e.g. platform is null", "LESS_THAN" : "Represent the relation less than, e.g. ownerCount < 3", "LESS_THAN_OR_EQUAL_TO" : "Represent the relation less than or equal to, e.g. ownerCount <= 3", + "RELATED_INCL" : "Represent the relation: URN field matches any nested child or parent in addition to the given URN", "START_WITH" : "Represent the relation: String field starts with value, e.g. name starts with PageView" } }, diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index 3688311b1f2345..bc4d222e316b0e 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -162,9 +162,11 @@ "type" : "enum", "name" : "Condition", "doc" : "The matching condition in a filter criterion", - "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH" ], + "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbolDocs" : { + "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", + "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", @@ -174,6 +176,7 @@ "IS_NULL" : "Represent the relation: field is null, e.g. platform is null", "LESS_THAN" : "Represent the relation less than, e.g. ownerCount < 3", "LESS_THAN_OR_EQUAL_TO" : "Represent the relation less than or equal to, e.g. ownerCount <= 3", + "RELATED_INCL" : "Represent the relation: URN field matches any nested child or parent in addition to the given URN", "START_WITH" : "Represent the relation: String field starts with value, e.g. name starts with PageView" } }, diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 59894ed083a2ef..982a409ef8e4ba 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -4542,7 +4542,11 @@ "name" : "description", "type" : "string", "doc" : "Documentation of the MLPrimaryKey", - "optional" : true + "optional" : true, + "Searchable" : { + "fieldType" : "TEXT", + "hasValuesFieldName" : "hasDescription" + } }, { "name" : "dataType", "type" : "com.linkedin.common.MLFeatureDataType", @@ -6032,15 +6036,31 @@ "doc" : "include restricted entities in results (default is to filter)", "default" : false, "optional" : true + }, { + "name" : "customHighlightingFields", + "type" : { + "type" : "array", + "items" : "string" + }, + "doc" : "Include mentioned fields inside elastic highlighting query", + "optional" : true + }, { + "name" : "rewriteQuery", + "type" : "boolean", + "doc" : "invoke query rewrite chain for filters based on configured rewriters", + "default" : true, + "optional" : true } ] }, { "type" : "enum", "name" : "Condition", "namespace" : "com.linkedin.metadata.query.filter", "doc" : "The matching condition in a filter criterion", - "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH" ], + "symbols" : [ "CONTAIN", "END_WITH", "EQUAL", "IS_NULL", "EXISTS", "GREATER_THAN", "GREATER_THAN_OR_EQUAL_TO", "IN", "LESS_THAN", "LESS_THAN_OR_EQUAL_TO", "START_WITH", "DESCENDANTS_INCL", "ANCESTORS_INCL", "RELATED_INCL" ], "symbolDocs" : { + "ANCESTORS_INCL" : "Represent the relation: URN field matches any nested parent in addition to the given URN", "CONTAIN" : "Represent the relation: String field contains value, e.g. name contains Profile", + "DESCENDANTS_INCL" : "Represent the relation: URN field any nested children in addition to the given URN", "END_WITH" : "Represent the relation: String field ends with value, e.g. name ends with Event", "EQUAL" : "Represent the relation: field = value, e.g. platform = hdfs", "EXISTS" : "Represents the relation: field exists and is non-empty, e.g. owners is not null and != [] (empty)", @@ -6050,6 +6070,7 @@ "IS_NULL" : "Represent the relation: field is null, e.g. platform is null", "LESS_THAN" : "Represent the relation less than, e.g. ownerCount < 3", "LESS_THAN_OR_EQUAL_TO" : "Represent the relation less than or equal to, e.g. ownerCount <= 3", + "RELATED_INCL" : "Represent the relation: URN field matches any nested child or parent in addition to the given URN", "START_WITH" : "Represent the relation: String field starts with value, e.g. name starts with PageView" } }, { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index 3d16550db1e0f1..1a35b52474e4f6 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -4536,7 +4536,11 @@ "name" : "description", "type" : "string", "doc" : "Documentation of the MLPrimaryKey", - "optional" : true + "optional" : true, + "Searchable" : { + "fieldType" : "TEXT", + "hasValuesFieldName" : "hasDescription" + } }, { "name" : "dataType", "type" : "com.linkedin.common.MLFeatureDataType", diff --git a/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClientCache.java b/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClientCache.java index 959bee565acfc6..340632a2fde9dd 100644 --- a/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClientCache.java +++ b/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClientCache.java @@ -4,6 +4,7 @@ import com.github.benmanes.caffeine.cache.LoadingCache; import com.github.benmanes.caffeine.cache.Weigher; +import com.google.common.annotations.VisibleForTesting; import com.linkedin.common.client.ClientCache; import com.linkedin.common.urn.Urn; import com.linkedin.entity.EntityResponse; @@ -80,6 +81,8 @@ public Map batchGetV2( Set responses = envelopedAspects.entrySet().stream() + // Exclude cached nulls + .filter(entry -> !(entry.getValue() instanceof NullEnvelopedAspect)) .map(entry -> Pair.of(entry.getKey().getUrn(), entry.getValue())) .collect( Collectors.groupingBy( @@ -104,6 +107,11 @@ public Map batchGetV2( return response; } + @VisibleForTesting + ClientCache getCache() { + return this.cache; + } + private static EntityResponse toEntityResponse( Urn urn, Collection envelopedAspects) { final EntityResponse response = new EntityResponse(); @@ -130,9 +138,13 @@ private EntityClientCacheBuilder loadFunction( public EntityClientCache build( @Nonnull final Function> fetchFunction, Class metricClazz) { + // estimate size Weigher weighByEstimatedSize = - (key, value) -> value.getValue().data().toString().getBytes().length; + (key, value) -> + value instanceof NullEnvelopedAspect + ? key.getUrn().toString().getBytes().length + : value.getValue().data().toString().getBytes().length; // batch loads data from entity client (restli or java) Function, Map> loader = @@ -192,37 +204,53 @@ private static Map loadByEntity( String contextId, Map> keysByEntity, Function> loadFunction) { - return keysByEntity.entrySet().stream() - .flatMap( - entry -> { - Set urns = - entry.getValue().stream().map(Key::getUrn).collect(Collectors.toSet()); - Set aspects = - entry.getValue().stream().map(Key::getAspectName).collect(Collectors.toSet()); - return loadFunction - .apply( - CollectionKey.builder() - .contextId(contextId) - .urns(urns) - .aspectNames(aspects) - .build()) - .entrySet() - .stream(); - }) - .flatMap( - resp -> - resp.getValue().getAspects().values().stream() - .map( - envAspect -> { - Key key = - Key.builder() - .contextId(contextId) - .urn(resp.getKey()) - .aspectName(envAspect.getName()) - .build(); - return Map.entry(key, envAspect); - })) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + Map result = + keysByEntity.entrySet().stream() + .flatMap( + entry -> { + Set urns = + entry.getValue().stream().map(Key::getUrn).collect(Collectors.toSet()); + Set aspects = + entry.getValue().stream().map(Key::getAspectName).collect(Collectors.toSet()); + return loadFunction + .apply( + CollectionKey.builder() + .contextId(contextId) + .urns(urns) + .aspectNames(aspects) + .build()) + .entrySet() + .stream(); + }) + .flatMap( + resp -> + resp.getValue().getAspects().values().stream() + .map( + envAspect -> { + Key key = + Key.builder() + .contextId(contextId) + .urn(resp.getKey()) + .aspectName(envAspect.getName()) + .build(); + return Map.entry(key, envAspect); + })) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + /* + * Traditionally responses from the API omit non-existent aspects. For the cache, + * we re-introduce the missing keys. + */ + Map missingAspects = + keysByEntity.values().stream() + .flatMap(Set::stream) + .filter(key -> !result.containsKey(key)) + .map(missingKey -> Map.entry(missingKey, NullEnvelopedAspect.NULL)) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + result.putAll(missingAspects); + + return result; } @Data @@ -244,4 +272,10 @@ public static class CollectionKey { private final Set urns; private final Set aspectNames; } + + /** Represents a cached null aspect */ + @VisibleForTesting + static class NullEnvelopedAspect extends EnvelopedAspect { + private static final NullEnvelopedAspect NULL = new NullEnvelopedAspect(); + } } diff --git a/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java b/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java index e6d53fc98e2e37..817b76f74268c8 100644 --- a/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java +++ b/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java @@ -1,5 +1,7 @@ package com.linkedin.entity.client; +import static com.linkedin.metadata.Constants.DATASET_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.reset; @@ -14,7 +16,6 @@ import com.linkedin.entity.Aspect; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.EnvelopedAspectMap; -import com.linkedin.metadata.Constants; import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig; import com.linkedin.parseq.retry.backoff.ConstantBackoff; import com.linkedin.r2.RemoteInvocationException; @@ -57,7 +58,7 @@ public void testCache() throws RemoteInvocationException, URISyntaxException { TestOperationContexts.systemContextNoSearchAuthorization(), TEST_URN.getEntityType(), TEST_URN, - Set.of(Constants.STATUS_ASPECT_NAME)), + Set.of(STATUS_ASPECT_NAME)), responseStatusTrue, "Expected un-cached Status.removed=true result"); @@ -67,7 +68,7 @@ public void testCache() throws RemoteInvocationException, URISyntaxException { TestOperationContexts.systemContextNoSearchAuthorization(), TEST_URN.getEntityType(), TEST_URN, - Set.of(Constants.STATUS_ASPECT_NAME)), + Set.of(STATUS_ASPECT_NAME)), responseStatusFalse, "Expected un-cached Status.removed=false result"); @@ -81,7 +82,7 @@ public void testCache() throws RemoteInvocationException, URISyntaxException { cacheConfig.setEnabled(true); cacheConfig.setMaxBytes(100); cacheConfig.setEntityAspectTTLSeconds( - Map.of(TEST_URN.getEntityType(), Map.of(Constants.STATUS_ASPECT_NAME, 60))); + Map.of(TEST_URN.getEntityType(), Map.of(STATUS_ASPECT_NAME, 60))); SystemRestliEntityClient cacheTest = new SystemRestliEntityClient( @@ -93,7 +94,7 @@ public void testCache() throws RemoteInvocationException, URISyntaxException { TestOperationContexts.systemContextNoSearchAuthorization(), TEST_URN.getEntityType(), TEST_URN, - Set.of(Constants.STATUS_ASPECT_NAME)), + Set.of(STATUS_ASPECT_NAME)), responseStatusTrue, "Expected initial un-cached Status.removed=true result"); @@ -103,7 +104,7 @@ public void testCache() throws RemoteInvocationException, URISyntaxException { TestOperationContexts.systemContextNoSearchAuthorization(), TEST_URN.getEntityType(), TEST_URN, - Set.of(Constants.STATUS_ASPECT_NAME)), + Set.of(STATUS_ASPECT_NAME)), responseStatusTrue, "Expected CACHED Status.removed=true result"); @@ -131,7 +132,7 @@ public void testBatchCache() throws RemoteInvocationException, URISyntaxExceptio TestOperationContexts.systemContextNoSearchAuthorization(), TEST_URN.getEntityType(), Set.of(TEST_URN), - Set.of(Constants.STATUS_ASPECT_NAME)), + Set.of(STATUS_ASPECT_NAME)), Map.of(TEST_URN, responseStatusTrue), "Expected un-cached Status.removed=true result"); @@ -141,7 +142,7 @@ public void testBatchCache() throws RemoteInvocationException, URISyntaxExceptio TestOperationContexts.systemContextNoSearchAuthorization(), TEST_URN.getEntityType(), Set.of(TEST_URN), - Set.of(Constants.STATUS_ASPECT_NAME)), + Set.of(STATUS_ASPECT_NAME)), Map.of(TEST_URN, responseStatusFalse), "Expected un-cached Status.removed=false result"); @@ -155,7 +156,7 @@ public void testBatchCache() throws RemoteInvocationException, URISyntaxExceptio cacheConfig.setEnabled(true); cacheConfig.setMaxBytes(100); cacheConfig.setEntityAspectTTLSeconds( - Map.of(TEST_URN.getEntityType(), Map.of(Constants.STATUS_ASPECT_NAME, 60))); + Map.of(TEST_URN.getEntityType(), Map.of(STATUS_ASPECT_NAME, 60))); SystemRestliEntityClient cacheTest = new SystemRestliEntityClient( @@ -167,7 +168,7 @@ public void testBatchCache() throws RemoteInvocationException, URISyntaxExceptio TestOperationContexts.systemContextNoSearchAuthorization(), TEST_URN.getEntityType(), Set.of(TEST_URN), - Set.of(Constants.STATUS_ASPECT_NAME)), + Set.of(STATUS_ASPECT_NAME)), Map.of(TEST_URN, responseStatusTrue), "Expected initial un-cached Status.removed=true result"); @@ -177,19 +178,111 @@ public void testBatchCache() throws RemoteInvocationException, URISyntaxExceptio TestOperationContexts.systemContextNoSearchAuthorization(), TEST_URN.getEntityType(), Set.of(TEST_URN), - Set.of(Constants.STATUS_ASPECT_NAME)), + Set.of(STATUS_ASPECT_NAME)), Map.of(TEST_URN, responseStatusTrue), "Expected CACHED Status.removed=true result"); verify(mockRestliClient, times(1)).sendRequest(any(Request.class)); } + @Test + public void testCacheNullValue() throws RemoteInvocationException, URISyntaxException { + Client mockRestliClient = mock(Client.class); + + // Test No Cache Config + EntityClientCacheConfig noCacheConfig = new EntityClientCacheConfig(); + noCacheConfig.setEnabled(true); + + SystemRestliEntityClient noCacheTest = + new SystemRestliEntityClient( + mockRestliClient, new ConstantBackoff(0), 0, noCacheConfig, 1, 2); + + com.linkedin.entity.EntityResponse responseStatusTrue = buildStatusResponse(true); + com.linkedin.entity.EntityResponse responseStatusFalse = buildStatusResponse(false); + + mockResponse(mockRestliClient, responseStatusTrue); + assertEquals( + noCacheTest.getV2( + TestOperationContexts.systemContextNoSearchAuthorization(), + TEST_URN.getEntityType(), + TEST_URN, + Set.of(STATUS_ASPECT_NAME)), + responseStatusTrue, + "Expected un-cached Status.removed=true result"); + + mockResponse(mockRestliClient, responseStatusFalse); + assertEquals( + noCacheTest.getV2( + TestOperationContexts.systemContextNoSearchAuthorization(), + TEST_URN.getEntityType(), + TEST_URN, + Set.of(STATUS_ASPECT_NAME)), + responseStatusFalse, + "Expected un-cached Status.removed=false result"); + + verify(mockRestliClient, times(2)).sendRequest(any(Request.class)); + + // Test Cache Config + reset(mockRestliClient); + + // Enable caching for MULTIPLE entity/aspect + EntityClientCacheConfig cacheConfig = new EntityClientCacheConfig(); + cacheConfig.setEnabled(true); + cacheConfig.setMaxBytes(100); + cacheConfig.setEntityAspectTTLSeconds( + Map.of( + TEST_URN.getEntityType(), + Map.of( + STATUS_ASPECT_NAME, 60, + DATASET_PROPERTIES_ASPECT_NAME, 60))); + + SystemRestliEntityClient cacheTest = + new SystemRestliEntityClient( + mockRestliClient, new ConstantBackoff(0), 0, cacheConfig, 1, 2); + + mockResponse(mockRestliClient, responseStatusTrue); + assertEquals( + cacheTest.getV2( + TestOperationContexts.systemContextNoSearchAuthorization(), + TEST_URN.getEntityType(), + TEST_URN, + Set.of(STATUS_ASPECT_NAME, DATASET_PROPERTIES_ASPECT_NAME)), + responseStatusTrue, + "Expected initial un-cached Status.removed=true result with no DatasetProperties (since it doesn't exist in this scenario)"); + + mockResponse(mockRestliClient, responseStatusFalse); + assertEquals( + cacheTest.getV2( + TestOperationContexts.systemContextNoSearchAuthorization(), + TEST_URN.getEntityType(), + TEST_URN, + Set.of(STATUS_ASPECT_NAME)), + responseStatusTrue, + "Expected CACHED Status.removed=true result with no DatasetProperties (since it doesn't exist in this scenario)"); + + verify(mockRestliClient, times(1)).sendRequest(any(Request.class)); + + // However in this scenario we DO expect a cached null + assertEquals( + cacheTest + .getEntityClientCache() + .getCache() + .get( + EntityClientCache.Key.builder() + .urn(TEST_URN) + .aspectName(DATASET_PROPERTIES_ASPECT_NAME) + .contextId("1379821641") + .build()), + new EntityClientCache.NullEnvelopedAspect(), + "Expected null object for the non-existent cache entry"); + } + private static com.linkedin.entity.EntityResponse buildStatusResponse(boolean value) { EnvelopedAspectMap aspects = new EnvelopedAspectMap(); aspects.put( - Constants.STATUS_ASPECT_NAME, + STATUS_ASPECT_NAME, new EnvelopedAspect() - .setName(Constants.STATUS_ASPECT_NAME) + .setName(STATUS_ASPECT_NAME) .setValue(new Aspect(new Status().setRemoved(value).data()))); return new com.linkedin.entity.EntityResponse() .setUrn(TEST_URN) diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/analytics/Analytics.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/analytics/Analytics.java index 753dd9b807fd12..9bbe1bb35fc654 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/analytics/Analytics.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/analytics/Analytics.java @@ -73,17 +73,17 @@ public Task getTimeseriesStats( return RestliUtils.toTask( () -> { final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_GET_TIMESERIES_STATS, entityName), authorizer, auth, true); + if (!AuthUtil.isAPIAuthorizedEntityType( - auth, - authorizer, + opContext, TIMESERIES, READ, entityName)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity " + entityName); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), - ACTION_GET_TIMESERIES_STATS, entityName), authorizer, auth, true); log.info("Attempting to query timeseries stats"); GetTimeseriesAggregatedStatsResponse resp = new GetTimeseriesAggregatedStatsResponse(); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index cbca464d569a83..42265e902cc6fd 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -29,7 +29,7 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.resources.operations.Utils; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.mxe.MetadataChangeProposal; @@ -133,26 +133,27 @@ public Task get( throws URISyntaxException { log.info("GET ASPECT urn: {} aspect: {} version: {}", urnStr, aspectName, version); final Urn urn = Urn.createFromString(urnStr); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { Authentication auth = AuthenticationContext.getAuthentication(); - if (!isAPIAuthorizedEntityUrns( - auth, - _authorizer, + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "authorizerChain", urn.getEntityType()), _authorizer, auth, true); + + if (!isAPIAuthorizedEntityUrns( + opContext, READ, List.of(urn))) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get aspect for " + urn); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "authorizerChain", urn.getEntityType()), _authorizer, auth, true); final VersionedAspect aspect = _entityService.getVersionedAspect(opContext, urn, aspectName, version); if (aspect == null) { log.warn("Did not find urn: {} aspect: {} version: {}", urn, aspectName, version); - throw RestliUtil.nonExceptionResourceNotFound(); + throw RestliUtils.nonExceptionResourceNotFound(); } return new AnyRecord(aspect.data()); }, @@ -182,21 +183,22 @@ public Task getTimeseriesAspectValues( endTimeMillis, limit); final Urn urn = Urn.createFromString(urnStr); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { Authentication auth = AuthenticationContext.getAuthentication(); - if (!isAPIAuthorizedUrns( - auth, - _authorizer, + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_GET_TIMESERIES_ASPECT, urn.getEntityType()), _authorizer, auth, true); + + if (!isAPIAuthorizedUrns( + opContext, TIMESERIES, READ, List.of(urn))) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get timeseries aspect for " + urn); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_GET_TIMESERIES_ASPECT, urn.getEntityType()), _authorizer, auth, true); GetTimeseriesAspectValuesResponse response = new GetTimeseriesAspectValuesResponse(); response.setEntityName(entityName); @@ -277,10 +279,11 @@ private Task ingestProposals( .map(MetadataChangeProposal::getEntityType) .collect(Collectors.toSet()); final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(authentication.getActor().toUrnStr(), getContext(), ACTION_INGEST_PROPOSAL, entityTypes), _authorizer, authentication, true); + systemOperationContext, RequestContext.builder().buildRestli(authentication.getActor().toUrnStr(), getContext(), + ACTION_INGEST_PROPOSAL, entityTypes), _authorizer, authentication, true); // Ingest Authorization Checks - List> exceptions = isAPIAuthorized(authentication, _authorizer, ENTITY, + List> exceptions = isAPIAuthorized(opContext, ENTITY, opContext.getEntityRegistry(), metadataChangeProposals) .stream().filter(p -> p.getSecond() != HttpStatus.S_200_OK.getCode()) .collect(Collectors.toList()); @@ -295,7 +298,7 @@ private Task ingestProposals( final AuditStamp auditStamp = new AuditStamp().setTime(_clock.millis()).setActor(Urn.createFromString(actorUrnStr)); - return RestliUtil.toTask(() -> { + return RestliUtils.toTask(() -> { log.debug("Proposals: {}", metadataChangeProposals); try { final AspectsBatch batch = AspectsBatchImpl.builder() @@ -329,19 +332,20 @@ private Task ingestProposals( public Task getCount( @ActionParam(PARAM_ASPECT) @Nonnull String aspectName, @ActionParam(PARAM_URN_LIKE) @Optional @Nullable String urnLike) { - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { Authentication authentication = AuthenticationContext.getAuthentication(); - if (!isAPIAuthorized( - authentication, - _authorizer, + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(authentication.getActor().toUrnStr(), + getContext(), ACTION_GET_COUNT), _authorizer, authentication, true); + + if (!isAPIAuthorized( + opContext, COUNTS, READ)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get aspect counts."); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(authentication.getActor().toUrnStr(), getContext(), ACTION_GET_COUNT, List.of()), _authorizer, authentication, true); return _entityService.getCountAspect(opContext, aspectName, urnLike); }, @@ -360,11 +364,16 @@ public Task restoreIndices( @ActionParam("limit") @Optional @Nullable Integer limit, @ActionParam("gePitEpochMs") @Optional @Nullable Long gePitEpochMs, @ActionParam("lePitEpochMs") @Optional @Nullable Long lePitEpochMs) { - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { + + Authentication authentication = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(authentication.getActor().toUrnStr(), + getContext(), ACTION_RESTORE_INDICES), _authorizer, authentication, true); + if (!isAPIAuthorized( - AuthenticationContext.getAuthentication(), - _authorizer, + opContext, PoliciesConfig.RESTORE_INDICES_PRIVILEGE)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to update entities."); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java index 599bbf9ce4df60..ebbfc6bb6c2983 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java @@ -18,7 +18,7 @@ import com.linkedin.entity.EnvelopedAspect; import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.run.AspectRowSummary; import com.linkedin.metadata.run.AspectRowSummaryArray; import com.linkedin.metadata.run.IngestionRunSummary; @@ -87,15 +87,16 @@ public Task rollback( throws Exception { Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "rollback", List.of()), authorizer, auth, true); + + if (!AuthUtil.isAPIAuthorized( - auth, - authorizer, + opContext, ENTITY, MANAGE)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to update entity"); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "rollback", List.of()), authorizer, auth, true); log.info("ROLLBACK RUN runId: {} dry run: {}", runId, dryRun); @@ -107,7 +108,7 @@ public Task rollback( "Both Safe & hardDelete flags were defined, honouring safe flag as hardDelete is deprecated"); } try { - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { try { @@ -135,7 +136,7 @@ public Task list( @ActionParam("includeSoft") @Optional @Nullable Boolean includeSoft) { log.info("LIST RUNS offset: {} size: {}", pageOffset, pageSize); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { List summaries = systemMetadataService.listRuns( @@ -159,19 +160,20 @@ public Task describe( @ActionParam("includeAspect") @Optional @Nullable Boolean includeAspect) { log.info("DESCRIBE RUN runId: {}, start: {}, count: {}", runId, start, count); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "describe", List.of()), authorizer, auth, true); + if (!AuthUtil.isAPIAuthorized( - auth, - authorizer, + opContext, ENTITY, READ)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity"); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "describe", List.of()), authorizer, auth, true); List summaries = systemMetadataService.findByRunId( diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index 8a5473da95ba2a..16901853245604 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -21,6 +21,7 @@ import com.datahub.authorization.AuthUtil; import com.datahub.authorization.EntitySpec; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.utils.SystemMetadataUtils; import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.metadata.services.RestrictedService; @@ -52,7 +53,7 @@ import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.run.AspectRowSummary; import com.linkedin.metadata.run.AspectRowSummaryArray; import com.linkedin.metadata.run.DeleteEntityResponse; @@ -193,18 +194,19 @@ public Task get( final Urn urn = Urn.createFromString(urnStr); Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "restrictedService", urn.getEntityType()), authorizer, auth, true); + if (!isAPIAuthorizedEntityUrns( - auth, - authorizer, + opContext, READ, List.of(urn))) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity " + urn); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "restrictedService", urn.getEntityType()), authorizer, auth, true); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final Set projectedAspects = aspectNames == null @@ -212,7 +214,7 @@ public Task get( : new HashSet<>(Arrays.asList(aspectNames)); final Entity entity = entityService.getEntity(opContext, urn, projectedAspects, true); if (entity == null) { - throw RestliUtil.resourceNotFoundException(String.format("Did not find %s", urnStr)); + throw RestliUtils.resourceNotFoundException(String.format("Did not find %s", urnStr)); } return new AnyRecord(entity.data()); }, @@ -233,18 +235,19 @@ public Task> batchGet( } Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "batchGet", urnStrs), authorizer, auth, true); + if (!isAPIAuthorizedEntityUrns( - auth, - authorizer, + opContext, READ, urns)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entities: " + urnStrs); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "batchGet", urnStrs), authorizer, auth, true); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final Set projectedAspects = aspectNames == null @@ -269,16 +272,17 @@ public Task ingest( Authentication authentication = AuthenticationContext.getAuthentication(); String actorUrnStr = authentication.getActor().toUrnStr(); final Urn urn = com.datahub.util.ModelUtils.getUrnFromSnapshotUnion(entity.getValue()); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(authentication.getActor().toUrnStr(), getContext(), + ACTION_INGEST, urn.getEntityType()), authorizer, authentication, true); + if (!isAPIAuthorizedEntityUrns( - authentication, - authorizer, + opContext, CREATE, List.of(urn))) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to edit entity " + urn); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(authentication.getActor().toUrnStr(), getContext(), ACTION_INGEST, urn.getEntityType()), authorizer, authentication, true); try { validateOrThrow(entity); @@ -293,7 +297,7 @@ public Task ingest( // variables referenced in lambdas are required to be final final SystemMetadata finalSystemMetadata = systemMetadata; - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { entityService.ingestEntity(opContext, entity, auditStamp, finalSystemMetadata); return null; @@ -311,20 +315,20 @@ public Task batchIngest( Authentication authentication = AuthenticationContext.getAuthentication(); String actorUrnStr = authentication.getActor().toUrnStr(); - List urns = Arrays.stream(entities) .map(Entity::getValue) .map(com.datahub.util.ModelUtils::getUrnFromSnapshotUnion).collect(Collectors.toList()); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(authentication.getActor().toUrnStr(), + getContext(), ACTION_BATCH_INGEST, urns.stream().map(Urn::getEntityType).collect(Collectors.toList())), + authorizer, authentication, true); + if (!isAPIAuthorizedEntityUrns( - authentication, - authorizer, + opContext, CREATE, urns)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to edit entities."); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(authentication.getActor().toUrnStr(), getContext(), ACTION_BATCH_INGEST, urns.stream() - .map(Urn::getEntityType).collect(Collectors.toList())), authorizer, authentication, true); for (Entity entity : entities) { try { @@ -342,7 +346,7 @@ public Task batchIngest( } if (entities.length != systemMetadataList.length) { - throw RestliUtil.invalidArgumentsException("entities and systemMetadata length must match"); + throw RestliUtils.invalidArgumentsException("entities and systemMetadata length must match"); } final List finalSystemMetadataList = @@ -350,7 +354,7 @@ public Task batchIngest( .map(SystemMetadataUtils::generateSystemMetadataIfEmpty) .collect(Collectors.toList()); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { entityService.ingestEntities(opContext, Arrays.asList(entities), auditStamp, finalSystemMetadataList); @@ -374,24 +378,24 @@ public Task search( @Optional @Nullable @ActionParam(PARAM_SEARCH_FLAGS) SearchFlags searchFlags) { final Authentication auth = AuthenticationContext.getAuthentication(); + OperationContext opContext = OperationContext.asSession(systemOperationContext, + RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_SEARCH, entityName), authorizer, auth, true) + .withSearchFlags(flags -> searchFlags != null ? searchFlags : new SearchFlags().setFulltext(Boolean.TRUE.equals(fulltext))); + + if (!AuthUtil.isAPIAuthorizedEntityType( - auth, - authorizer, + opContext, READ, entityName)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); } - OperationContext opContext = OperationContext.asSession(systemOperationContext, - RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_SEARCH, entityName), authorizer, auth, true) - .withSearchFlags(flags -> searchFlags != null ? searchFlags : new SearchFlags().setFulltext(Boolean.TRUE.equals(fulltext))); - List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); log.info("GET SEARCH RESULTS for {} with query {}", entityName, input); // TODO - change it to use _searchService once we are confident on it's latency - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final SearchResult result; // This API is not used by the frontend for search bars so we default to structured @@ -400,8 +404,7 @@ public Task search( List.of(entityName), input, filter, sortCriterionList, start, count); if (!isAPIAuthorizedResult( - auth, - authorizer, + opContext, result)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized get entity."); @@ -427,13 +430,13 @@ public Task searchAcrossEntities( final Authentication auth = AuthenticationContext.getAuthentication(); OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_SEARCH_ACROSS_ENTITIES, entities), authorizer, auth, true) + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_SEARCH_ACROSS_ENTITIES, entities), authorizer, auth, true) .withSearchFlags(flags -> searchFlags != null ? searchFlags : new SearchFlags().setFulltext(true)); List entityList = searchService.getEntitiesToSearch(opContext, entities == null ? Collections.emptyList() : Arrays.asList(entities), count); if (!isAPIAuthorizedEntityType( - auth, - authorizer, + opContext, READ, entityList)) { throw new RestLiServiceException( @@ -443,12 +446,11 @@ public Task searchAcrossEntities( List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); log.info("GET SEARCH RESULTS ACROSS ENTITIES for {} with query {}", entityList, input); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { SearchResult result = searchService.searchAcrossEntities(opContext, entityList, input, filter, sortCriterionList, start, count); if (!isAPIAuthorizedResult( - auth, - authorizer, + opContext, result)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized get entity."); @@ -486,13 +488,13 @@ public Task scrollAcrossEntities( final Authentication auth = AuthenticationContext.getAuthentication(); OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_SCROLL_ACROSS_ENTITIES, entities), authorizer, auth, true) + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_SCROLL_ACROSS_ENTITIES, entities), authorizer, auth, true) .withSearchFlags(flags -> searchFlags != null ? searchFlags : new SearchFlags().setFulltext(true)); List entityList = searchService.getEntitiesToSearch(opContext, entities == null ? Collections.emptyList() : Arrays.asList(entities), count); if (!isAPIAuthorizedEntityType( - auth, - authorizer, + opContext, READ, entityList)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); @@ -506,7 +508,7 @@ public Task scrollAcrossEntities( input, scrollId); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { ScrollResult result = searchService.scrollAcrossEntities( opContext, @@ -518,8 +520,7 @@ public Task scrollAcrossEntities( keepAlive, count); if (!isAPIAuthorizedResult( - auth, - authorizer, + opContext, result)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized get entity."); @@ -549,9 +550,16 @@ public Task searchAcrossLineage( @Optional @Nullable @ActionParam(PARAM_SEARCH_FLAGS) SearchFlags searchFlags) throws URISyntaxException { final Authentication auth = AuthenticationContext.getAuthentication(); + OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_SEARCH_ACROSS_LINEAGE, entities), authorizer, auth, true) + .withSearchFlags(flags -> (searchFlags != null ? searchFlags : new SearchFlags().setFulltext(true)) + .setIncludeRestricted(true)) + .withLineageFlags(flags -> flags.setStartTimeMillis(startTimeMillis, SetMode.REMOVE_IF_NULL) + .setEndTimeMillis(endTimeMillis, SetMode.REMOVE_IF_NULL)); + if (!isAPIAuthorized( - auth, - authorizer, + opContext, LINEAGE, READ)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); @@ -559,13 +567,6 @@ public Task searchAcrossLineage( List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); - OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_SEARCH_ACROSS_LINEAGE, entities), authorizer, auth, true) - .withSearchFlags(flags -> (searchFlags != null ? searchFlags : new SearchFlags().setFulltext(true)) - .setIncludeRestricted(true)) - .withLineageFlags(flags -> flags.setStartTimeMillis(startTimeMillis, SetMode.REMOVE_IF_NULL) - .setEndTimeMillis(endTimeMillis, SetMode.REMOVE_IF_NULL)); - Urn urn = Urn.createFromString(urnStr); List entityList = entities == null ? Collections.emptyList() : Arrays.asList(entities); log.info( @@ -574,7 +575,7 @@ public Task searchAcrossLineage( direction, entityList, input); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> validateLineageSearchResult(opContext, lineageSearchService.searchAcrossLineage( opContext, urn, @@ -611,22 +612,21 @@ public Task scrollAcrossLineage( throws URISyntaxException { final Authentication auth = AuthenticationContext.getAuthentication(); - if (!isAPIAuthorized( - auth, - authorizer, - LINEAGE, READ)) { - throw new RestLiServiceException( - HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); - } - OperationContext opContext = OperationContext.asSession( systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_SCROLL_ACROSS_LINEAGE, entities), - authorizer, auth, true) + authorizer, auth, true) .withSearchFlags(flags -> (searchFlags != null ? searchFlags : new SearchFlags().setSkipCache(true)) .setIncludeRestricted(true)) .withLineageFlags(flags -> flags.setStartTimeMillis(startTimeMillis, SetMode.REMOVE_IF_NULL) .setEndTimeMillis(endTimeMillis, SetMode.REMOVE_IF_NULL)); + if (!isAPIAuthorized( + opContext, + LINEAGE, READ)) { + throw new RestLiServiceException( + HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); + } + Urn urn = Urn.createFromString(urnStr); List entityList = entities == null ? Collections.emptyList() : Arrays.asList(entities); log.info( @@ -638,7 +638,7 @@ public Task scrollAcrossLineage( List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> validateLineageScrollResult(opContext, lineageSearchService.scrollAcrossLineage( @@ -669,27 +669,26 @@ public Task list( @ActionParam(PARAM_COUNT) int count) { final Authentication auth = AuthenticationContext.getAuthentication(); + OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_LIST, entityName), authorizer, auth, true) + .withSearchFlags(flags -> new SearchFlags().setFulltext(false)); + if (!AuthUtil.isAPIAuthorizedEntityType( - auth, - authorizer, + opContext, READ, entityName)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); } - OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_LIST, entityName), authorizer, auth, true) - .withSearchFlags(flags -> new SearchFlags().setFulltext(false)); - List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); log.info("GET LIST RESULTS for {} with filter {}", entityName, filter); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { SearchResult result = entitySearchService.filter(opContext, entityName, filter, sortCriterionList, start, count); if (!AuthUtil.isAPIAuthorizedResult( - auth, - authorizer, + opContext, result)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized get entity."); @@ -712,24 +711,23 @@ public Task autocomplete( @ActionParam(PARAM_SEARCH_FLAGS) @Optional @Nullable SearchFlags searchFlags) { final Authentication auth = AuthenticationContext.getAuthentication(); + OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_AUTOCOMPLETE, entityName), authorizer, auth, true) + .withSearchFlags(flags -> searchFlags != null ? searchFlags : flags); + if (!AuthUtil.isAPIAuthorizedEntityType( - auth, - authorizer, + opContext, READ, entityName)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); } - OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_AUTOCOMPLETE, entityName), authorizer, auth, true) - .withSearchFlags(flags -> searchFlags != null ? searchFlags : flags); - - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { AutoCompleteResult result = entitySearchService.autoComplete(opContext, entityName, query, field, filter, limit); if (!isAPIAuthorizedResult( - auth, - authorizer, + opContext, result)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized get entity."); @@ -750,25 +748,24 @@ public Task browse( @ActionParam(PARAM_SEARCH_FLAGS) @Optional @Nullable SearchFlags searchFlags) { final Authentication auth = AuthenticationContext.getAuthentication(); + OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_BROWSE, entityName), authorizer, auth, true) + .withSearchFlags(flags -> searchFlags != null ? searchFlags : flags); + if (!AuthUtil.isAPIAuthorizedEntityType( - auth, - authorizer, + opContext, READ, entityName)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); } - OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_BROWSE, entityName), authorizer, auth, true) - .withSearchFlags(flags -> searchFlags != null ? searchFlags : flags); - log.info("GET BROWSE RESULTS for {} at path {}", entityName, path); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { BrowseResult result = entitySearchService.browse(opContext, entityName, path, filter, start, limit); if (!isAPIAuthorizedResult( - auth, - authorizer, + opContext, result)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized get entity."); @@ -787,9 +784,12 @@ public Task getBrowsePaths( @ActionParam(value = PARAM_URN, typeref = com.linkedin.common.Urn.class) @Nonnull Urn urn) { final Authentication auth = AuthenticationContext.getAuthentication(); + OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_GET_BROWSE_PATHS, urn.getEntityType()), authorizer, auth, true); + if (!isAPIAuthorizedEntityUrns( - auth, - authorizer, + opContext, READ, List.of(urn))) { throw new RestLiServiceException( @@ -797,10 +797,7 @@ public Task getBrowsePaths( } log.info("GET BROWSE PATHS for {}", urn); - OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_GET_BROWSE_PATHS, urn.getEntityType()), authorizer, auth, true); - - return RestliUtil.toTask( + return RestliUtils.toTask( () -> new StringArray(entitySearchService.getBrowsePaths(opContext, urnToEntityName(urn), urn)), MetricRegistry.name(this.getClass(), "getBrowsePaths")); } @@ -840,7 +837,7 @@ public Task deleteEntities( ComparableVersion finalRegistryVersion = registryVersion; String finalRegistryName1 = registryName; ComparableVersion finalRegistryVersion1 = registryVersion; - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { RollbackResponse response = new RollbackResponse(); List aspectRowsToDelete = @@ -858,16 +855,17 @@ public Task deleteEntities( .keySet(); final Authentication auth = AuthenticationContext.getAuthentication(); + OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "deleteAll", urns), authorizer, auth, true); + if (!isAPIAuthorizedEntityUrns( - auth, - authorizer, + opContext, DELETE, urns.stream().map(UrnUtils::getUrn).collect(Collectors.toSet()))) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to delete entities."); } - OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "deleteAll", urns), authorizer, auth, true); response.setEntitiesAffected(urns.size()); response.setEntitiesDeleted( @@ -909,18 +907,19 @@ public Task deleteEntity( Urn urn = Urn.createFromString(urnStr); final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_DELETE, urn.getEntityType()), authorizer, auth, true); + if (!isAPIAuthorizedEntityUrns( - auth, - authorizer, + opContext, DELETE, List.of(urn))) { throw new RestLiServiceException( - HttpStatus.S_403_FORBIDDEN, "User is unauthorized to delete entity: " + urnStr); + HttpStatus.S_403_FORBIDDEN, "User is unauthorized to delete entity: " + urnStr); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_DELETE, urn.getEntityType()), authorizer, auth, true); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { // Find the timeseries aspects to delete. If aspectName is null, delete all. List timeseriesAspectNames = @@ -971,16 +970,17 @@ private Long deleteTimeseriesAspects( long totalNumberOfDocsDeleted = 0; final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "deleteTimeseriesAspects", urn.getEntityType()), authorizer, auth, true); + if (!isAPIAuthorizedUrns( - auth, - authorizer, + opContext, TIMESERIES, DELETE, List.of(urn))) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to delete entity " + urn); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "deleteTimeseriesAspects", urn.getEntityType()), authorizer, auth, true); // Construct the filter. List criteria = new ArrayList<>(); @@ -1027,18 +1027,19 @@ public Task deleteReferencesTo( Urn urn = Urn.createFromString(urnStr); final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "deleteReferences", urn.getEntityType()), authorizer, auth, true); + if (!isAPIAuthorizedEntityUrns( - auth, - authorizer, + opContext, DELETE, List.of(urn))) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to delete entity " + urnStr); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "deleteReferences", urn.getEntityType()), authorizer, auth, true); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> deleteEntityService.deleteReferencesTo(opContext, urn, dryRun), MetricRegistry.name(this.getClass(), "deleteReferences")); } @@ -1052,15 +1053,19 @@ public Task deleteReferencesTo( public Task setWriteable( @ActionParam(PARAM_VALUE) @Optional("true") @Nonnull Boolean value) { + final Authentication auth = AuthenticationContext.getAuthentication(); + OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "setWriteable"), authorizer, auth, true); + if (!isAPIAuthorized( - AuthenticationContext.getAuthentication(), - authorizer, + opContext, PoliciesConfig.SET_WRITEABLE_PRIVILEGE)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to enable and disable write mode."); } log.info("setting entity resource to be writable"); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { entityService.setWritable(value); return null; @@ -1073,16 +1078,18 @@ public Task setWriteable( public Task getTotalEntityCount(@ActionParam(PARAM_ENTITY) @Nonnull String entityName) { final Authentication auth = AuthenticationContext.getAuthentication(); + OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "getTotalEntityCount", entityName), authorizer, auth, true); + if (!isAPIAuthorized( - auth, - authorizer, + opContext, COUNTS, READ)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity counts."); } - OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "getTotalEntityCount", entityName), authorizer, auth, true); - return RestliUtil.toTask(() -> entitySearchService.docCount(opContext, entityName)); + + return RestliUtils.toTask(() -> entitySearchService.docCount(opContext, entityName)); } @Action(name = "batchGetTotalEntityCount") @@ -1091,16 +1098,18 @@ public Task getTotalEntityCount(@ActionParam(PARAM_ENTITY) @Nonnull String public Task batchGetTotalEntityCount( @ActionParam(PARAM_ENTITIES) @Nonnull String[] entityNames) { final Authentication auth = AuthenticationContext.getAuthentication(); + OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "batchGetTotalEntityCount", entityNames), authorizer, auth, true); + if (!isAPIAuthorized( - auth, - authorizer, + opContext, COUNTS, READ)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity counts."); } - OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "batchGetTotalEntityCount", entityNames), authorizer, auth, true); - return RestliUtil.toTask( + + return RestliUtils.toTask( () -> new LongMap(searchService.docCountPerEntity(opContext, Arrays.asList(entityNames)))); } @@ -1114,22 +1123,22 @@ public Task listUrns( throws URISyntaxException { final Authentication auth = AuthenticationContext.getAuthentication(); + OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_LIST_URNS, entityName), authorizer, auth, true); + if (!AuthUtil.isAPIAuthorizedEntityType( - auth, - authorizer, + opContext, READ, entityName)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); } - OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_LIST_URNS, entityName), authorizer, auth, true); log.info("LIST URNS for {} with start {} and count {}", entityName, start, count); - return RestliUtil.toTask(() -> { + return RestliUtils.toTask(() -> { ListUrnsResult result = entityService.listUrns(opContext, entityName, start, count); if (!isAPIAuthorizedEntityUrns( - auth, - authorizer, + opContext, READ, result.getEntities())) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity counts."); @@ -1155,18 +1164,19 @@ public Task applyRetention( } final Authentication auth = AuthenticationContext.getAuthentication(); + OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_APPLY_RETENTION, resourceSpec.getType()), authorizer, auth, true); + if (!isAPIAuthorized( - auth, - authorizer, + opContext, PoliciesConfig.APPLY_RETENTION_PRIVILEGE, resourceSpec)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to apply retention."); } - OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_APPLY_RETENTION, resourceSpec.getType()), authorizer, auth, true); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> entityService.batchApplyRetention(opContext, start, count, attemptWithVersion, aspectName, urn), ACTION_APPLY_RETENTION); } @@ -1183,24 +1193,25 @@ public Task filter( @ActionParam(PARAM_COUNT) int count) { final Authentication auth = AuthenticationContext.getAuthentication(); + OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_FILTER, entityName), authorizer, auth, true); + if (!AuthUtil.isAPIAuthorizedEntityType( - auth, - authorizer, + opContext, READ, entityName)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to search."); } - OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_FILTER, entityName), authorizer, auth, true); List sortCriterionList = getSortCriteria(sortCriteria, sortCriterion); log.info("FILTER RESULTS for {} with filter {}", entityName, filter); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { - SearchResult result = entitySearchService.filter(opContext.withSearchFlags(flags -> flags.setFulltext(true)), entityName, filter, sortCriterionList, start, count); + SearchResult result = entitySearchService.filter(opContext.withSearchFlags(flags -> flags.setFulltext(true)), + entityName, filter, sortCriterionList, start, count); if (!isAPIAuthorizedResult( - auth, - authorizer, + opContext, result)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity counts."); @@ -1219,19 +1230,20 @@ public Task exists(@ActionParam(PARAM_URN) @Nonnull String urnStr, @Act Urn urn = UrnUtils.getUrn(urnStr); final Authentication auth = AuthenticationContext.getAuthentication(); + OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_EXISTS, urn.getEntityType()), authorizer, auth, true); if (!isAPIAuthorizedEntityUrns( - auth, - authorizer, + opContext, EXISTS, List.of(urn))) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized check entity existence: " + urnStr); } - OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_EXISTS, urn.getEntityType()), authorizer, auth, true); + log.info("EXISTS for {}", urnStr); final boolean includeRemoved = includeSoftDelete == null || includeSoftDelete; - return RestliUtil.toTask( + return RestliUtils.toTask( () -> entityService.exists(opContext, urn, includeRemoved), MetricRegistry.name(this.getClass(), "exists")); } } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java index 9052f0240266ad..20209ddf44d643 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityV2Resource.java @@ -16,7 +16,7 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.parseq.Task; import com.linkedin.restli.common.HttpStatus; import com.linkedin.restli.server.RestLiServiceException; @@ -70,18 +70,19 @@ public Task get( final Urn urn = Urn.createFromString(urnStr); final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "getEntityV2", urn.getEntityType()), _authorizer, auth, true); + if (!isAPIAuthorizedEntityUrns( - auth, - _authorizer, + opContext, READ, List.of(urn))) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity " + urn); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "getEntityV2", urn.getEntityType()), _authorizer, auth, true); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final String entityName = urnToEntityName(urn); final Set projectedAspects = @@ -114,22 +115,23 @@ public Task> batchGet( } final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "getEntityV2", urns.stream().map(Urn::getEntityType).collect(Collectors.toList())), _authorizer, auth, true); + if (!isAPIAuthorizedEntityUrns( - auth, - _authorizer, + opContext, READ, urns)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entities " + urnStrs); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "getEntityV2", urns.stream().map(Urn::getEntityType).collect(Collectors.toList())), _authorizer, auth, true); if (urns.size() <= 0) { return Task.value(Collections.emptyMap()); } final String entityName = urnToEntityName(urns.iterator().next()); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final Set projectedAspects = aspectNames == null diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java index d6c91ba7dcaa35..73b2d1a6c5cb87 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityVersionedV2Resource.java @@ -18,7 +18,7 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.parseq.Task; import com.linkedin.restli.common.HttpStatus; import com.linkedin.restli.server.RestLiServiceException; @@ -78,24 +78,25 @@ public Task> batchGetVersioned( .map(versionedUrn -> UrnUtils.getUrn(versionedUrn.getUrn())).collect(Collectors.toSet()); Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "authorizerChain", urns.stream() + .map(Urn::getEntityType).collect(Collectors.toList())), _authorizer, auth, true); + if (!isAPIAuthorizedEntityUrns( - auth, - _authorizer, + opContext, READ, urns)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entities " + versionedUrnStrs); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "authorizerChain", urns.stream() - .map(Urn::getEntityType).collect(Collectors.toList())), _authorizer, auth, true); + log.debug("BATCH GET VERSIONED V2 {}", versionedUrnStrs); if (versionedUrnStrs.size() <= 0) { return Task.value(Collections.emptyMap()); } - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final Set projectedAspects = aspectNames == null diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java index d04efcaa85e49f..738f33db63a8b7 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/lineage/Relationships.java @@ -13,6 +13,7 @@ import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter; import com.codahale.metrics.MetricRegistry; +import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; import com.datahub.authorization.EntitySpec; import com.datahub.plugins.auth.authorization.Authorizer; @@ -27,7 +28,7 @@ import com.linkedin.metadata.graph.LineageDirection; import com.linkedin.metadata.graph.RelatedEntitiesResult; import com.linkedin.metadata.query.filter.RelationshipDirection; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.search.utils.QueryUtils; import com.linkedin.parseq.Task; import com.linkedin.restli.common.HttpStatus; @@ -40,6 +41,8 @@ import com.linkedin.restli.server.annotations.RestLiSimpleResource; import com.linkedin.restli.server.annotations.RestMethod; import com.linkedin.restli.server.resources.SimpleResourceTemplate; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; import io.opentelemetry.extension.annotations.WithSpan; import java.net.URISyntaxException; import java.util.Arrays; @@ -70,6 +73,10 @@ public final class Relationships extends SimpleResourceTemplate get( @QueryParam("count") @Optional @Nullable Integer count) { Urn urn = UrnUtils.getUrn(rawUrn); + final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "getRelationships", urn.getEntityType()), _authorizer, auth, true); + if (!isAPIAuthorizedUrns( - AuthenticationContext.getAuthentication(), - _authorizer, + opContext, LINEAGE, READ, List.of(urn))) { throw new RestLiServiceException( @@ -126,7 +137,7 @@ public Task get( } RelationshipDirection direction = RelationshipDirection.valueOf(rawDirection); final List relationshipTypes = Arrays.asList(relationshipTypesParam); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final RelatedEntitiesResult relatedEntitiesResult = getRelatedEntities(rawUrn, relationshipTypes, direction, start, count); @@ -162,9 +173,13 @@ public Task get( public UpdateResponse delete(@QueryParam("urn") @Nonnull String rawUrn) throws Exception { Urn urn = Urn.createFromString(rawUrn); + final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "deleteRelationships", urn.getEntityType()), _authorizer, auth, true); + if (!isAPIAuthorizedUrns( - AuthenticationContext.getAuthentication(), - _authorizer, + opContext, LINEAGE, DELETE, List.of(urn))) { throw new RestLiServiceException( @@ -187,15 +202,19 @@ public Task getLineage( log.info("GET LINEAGE {} {} {} {} {}", urnStr, direction, start, count, maxHops); final Urn urn = Urn.createFromString(urnStr); + final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "getLineage", urn.getEntityType()), _authorizer, auth, true); + if (!isAPIAuthorizedUrns( - AuthenticationContext.getAuthentication(), - _authorizer, + opContext, LINEAGE, READ, List.of(urn))) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get entity lineage: " + urnStr); } - return RestliUtil.toTask( + return RestliUtils.toTask( () -> _graphService.getLineage( urn, diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java index 42d0bf11c505d8..ea329ce0809fba 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java @@ -17,7 +17,7 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.Filter; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.search.utils.QueryUtils; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.BatchWriteOperationsOptions; @@ -91,7 +91,7 @@ public OperationsResource() {} OperationsResource(OperationContext systemOperationContext, TimeseriesAspectService timeseriesAspectService) { this._timeseriesAspectService = timeseriesAspectService; this.systemOperationContext = systemOperationContext; - this._authorizer = systemOperationContext.getAuthorizerContext().getAuthorizer(); + this._authorizer = systemOperationContext.getAuthorizationContext().getAuthorizer(); } @Action(name = ACTION_RESTORE_INDICES) @@ -106,7 +106,7 @@ public Task restoreIndices( @ActionParam("limit") @Optional @Nullable Integer limit, @ActionParam("gePitEpochMs") @Optional @Nullable Long gePitEpochMs, @ActionParam("lePitEpochMs") @Optional @Nullable Long lePitEpochMs) { - return RestliUtil.toTask( + return RestliUtils.toTask( () -> Utils.restoreIndices(systemOperationContext, getContext(), aspectName, urn, urnLike, start, batchSize, limit, gePitEpochMs, lePitEpochMs, _authorizer, _entityService), MetricRegistry.name(this.getClass(), "restoreIndices")); @@ -131,12 +131,16 @@ public Task getTaskStatus( @ActionParam(PARAM_NODE_ID) @Optional String nodeId, @ActionParam(PARAM_TASK_ID) @Optional("0") long taskId, @ActionParam(PARAM_TASK) @Optional String task) { - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { + final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_GET_ES_TASK_STATUS), _authorizer, auth, true); + if (!isAPIAuthorized( - AuthenticationContext.getAuthentication(), - _authorizer, + opContext, PoliciesConfig.GET_ES_TASK_STATUS_PRIVILEGE)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get ES task status"); @@ -190,19 +194,20 @@ public Task getTaskStatus( @Nonnull @WithSpan public Task getIndexSizes() { - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_GET_INDEX_SIZES, List.of()), _authorizer, auth, true); + if (!isAPIAuthorized( - AuthenticationContext.getAuthentication(), - _authorizer, + opContext, PoliciesConfig.GET_TIMESERIES_INDEX_SIZES_PRIVILEGE)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to get index sizes."); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_GET_INDEX_SIZES, List.of()), _authorizer, auth, true); TimeseriesIndicesSizesResult result = new TimeseriesIndicesSizesResult(); result.setIndexSizes( @@ -224,15 +229,16 @@ String executeTruncateTimeseriesAspect( @Nullable Boolean forceReindex) { final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + "executeTruncateTimeseriesAspect", entityType), _authorizer, auth, true); + if (!isAPIAuthorized( - auth, - _authorizer, + opContext, PoliciesConfig.TRUNCATE_TIMESERIES_INDEX_PRIVILEGE)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to truncate timeseries index"); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), "executeTruncateTimeseriesAspect", entityType), _authorizer, auth, true); if (forceDeleteByQuery != null && forceDeleteByQuery.equals(forceReindex)) { return "please only set forceReindex OR forceDeleteByQuery flags"; @@ -313,7 +319,7 @@ public Task truncateTimeseriesAspect( @ActionParam(PARAM_TIMEOUT_SECONDS) @Optional @Nullable Long timeoutSeconds, @ActionParam(PARAM_FORCE_DELETE_BY_QUERY) @Optional @Nullable Boolean forceDeleteByQuery, @ActionParam(PARAM_FORCE_REINDEX) @Optional @Nullable Boolean forceReindex) { - return RestliUtil.toTask( + return RestliUtils.toTask( () -> executeTruncateTimeseriesAspect( entityType, diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java index 54c1029edcab04..734fe0cd606d98 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java @@ -52,16 +52,17 @@ public static String restoreIndices( resourceSpec = new EntitySpec(resource.getEntityType(), resource.toString()); } final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), resourceContext, + "restoreIndices", List.of()), authorizer, auth, true); + if (!isAPIAuthorized( - auth, - authorizer, + opContext, PoliciesConfig.RESTORE_INDICES_PRIVILEGE, resourceSpec)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to restore indices."); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), resourceContext, "restoreIndices", List.of()), authorizer, auth, true); RestoreIndicesArgs args = new RestoreIndicesArgs() diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java index 5b2f19c661dabc..46fab05133651a 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/platform/PlatformResource.java @@ -2,13 +2,15 @@ import static com.datahub.authorization.AuthUtil.isAPIAuthorized; +import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; import com.datahub.plugins.auth.authorization.Authorizer; +import com.linkedin.common.urn.Urn; import com.linkedin.entity.Entity; import com.linkedin.metadata.authorization.Disjunctive; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.event.EventProducer; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.mxe.PlatformEvent; import com.linkedin.parseq.Task; import com.linkedin.restli.common.HttpStatus; @@ -18,12 +20,16 @@ import com.linkedin.restli.server.annotations.Optional; import com.linkedin.restli.server.annotations.RestLiCollection; import com.linkedin.restli.server.resources.CollectionResourceTaskTemplate; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; import io.opentelemetry.extension.annotations.WithSpan; import javax.annotation.Nonnull; import javax.inject.Inject; import javax.inject.Named; import lombok.extern.slf4j.Slf4j; +import java.util.stream.Collectors; + /** DataHub Platform Actions */ @Slf4j @RestLiCollection(name = "platform", namespace = "com.linkedin.platform") @@ -39,6 +45,10 @@ public class PlatformResource extends CollectionResourceTaskTemplate producePlatformEvent( @ActionParam("key") @Optional String key, @ActionParam("event") @Nonnull PlatformEvent event) { + final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_PRODUCE_PLATFORM_EVENT), _authorizer, + auth, true); + if (!isAPIAuthorized( - AuthenticationContext.getAuthentication(), - _authorizer, + opContext, PoliciesConfig.PRODUCE_PLATFORM_EVENT_PRIVILEGE)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to produce platform events."); } log.info(String.format("Emitting platform event. name: %s, key: %s", eventName, key)); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { _eventProducer.producePlatformEvent(eventName, key, event); return null; diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java index 0ca8eb49308b32..185874fac1382d 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/restli/RestliUtils.java @@ -1,5 +1,10 @@ package com.linkedin.metadata.resources.restli; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; +import com.linkedin.metadata.dao.throttle.APIThrottleException; +import com.linkedin.metadata.restli.NonExceptionHttpErrorResponse; +import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.parseq.Task; import com.linkedin.restli.common.HttpStatus; import com.linkedin.restli.server.RestLiServiceException; @@ -27,20 +32,41 @@ public static Task toTask(@Nonnull Supplier supplier) { return Task.value(supplier.get()); } catch (Throwable throwable) { + final RestLiServiceException finalException; + // Convert IllegalArgumentException to BAD REQUEST if (throwable instanceof IllegalArgumentException || throwable.getCause() instanceof IllegalArgumentException) { - throwable = badRequestException(throwable.getMessage()); - } - - if (throwable instanceof RestLiServiceException) { - throw (RestLiServiceException) throwable; + finalException = badRequestException(throwable.getMessage()); + } else if (throwable instanceof APIThrottleException) { + finalException = apiThrottled(throwable.getMessage()); + } else if (throwable instanceof RestLiServiceException) { + finalException = (RestLiServiceException) throwable; + } else { + finalException = new RestLiServiceException(HttpStatus.S_500_INTERNAL_SERVER_ERROR, throwable); } - throw new RestLiServiceException(HttpStatus.S_500_INTERNAL_SERVER_ERROR, throwable); + throw finalException; } } + @Nonnull + public static Task toTask(@Nonnull Supplier supplier, String metricName) { + Timer.Context context = MetricUtils.timer(metricName).time(); + // Stop timer on success and failure + return toTask(supplier) + .transform( + orig -> { + context.stop(); + if (orig.isFailed()) { + MetricUtils.counter(MetricRegistry.name(metricName, "failed")).inc(); + } else { + MetricUtils.counter(MetricRegistry.name(metricName, "success")).inc(); + } + return orig; + }); + } + /** * Similar to {@link #toTask(Supplier)} but the supplier is expected to return an {@link Optional} * instead. A {@link RestLiServiceException} with 404 HTTP status code will be thrown if the @@ -59,6 +85,11 @@ public static RestLiServiceException resourceNotFoundException() { return resourceNotFoundException(null); } + @Nonnull + public static RestLiServiceException nonExceptionResourceNotFound() { + return new NonExceptionHttpErrorResponse(HttpStatus.S_404_NOT_FOUND); + } + @Nonnull public static RestLiServiceException resourceNotFoundException(@Nullable String message) { return new RestLiServiceException(HttpStatus.S_404_NOT_FOUND, message); @@ -73,4 +104,9 @@ public static RestLiServiceException badRequestException(@Nullable String messag public static RestLiServiceException invalidArgumentsException(@Nullable String message) { return new RestLiServiceException(HttpStatus.S_412_PRECONDITION_FAILED, message); } + + @Nonnull + public static RestLiServiceException apiThrottled(@Nullable String message) { + return new RestLiServiceException(HttpStatus.S_429_TOO_MANY_REQUESTS, message); + } } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java index 1b003fec82e8b8..a0c3d460951605 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/usage/UsageStats.java @@ -24,7 +24,7 @@ import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.restli.RestliUtil; +import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.timeseries.elastic.UsageServiceUtil; import com.linkedin.metadata.timeseries.transformer.TimeseriesAspectTransformer; @@ -100,22 +100,23 @@ public class UsageStats extends SimpleResourceTemplate { @WithSpan public Task batchIngest(@ActionParam(PARAM_BUCKETS) @Nonnull UsageAggregation[] buckets) { log.info("Ingesting {} usage stats aggregations", buckets.length); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { final Authentication auth = AuthenticationContext.getAuthentication(); Set urns = Arrays.stream(buckets).sequential().map(UsageAggregation::getResource).collect(Collectors.toSet()); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_BATCH_INGEST, urns.stream().map(Urn::getEntityType).collect(Collectors.toList())), _authorizer, + auth, true); + if (!isAPIAuthorizedEntityUrns( - auth, - _authorizer, + opContext, UPDATE, urns)) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to edit entities."); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_BATCH_INGEST, urns.stream() - .map(Urn::getEntityType).collect(Collectors.toList())), _authorizer, auth, true); for (UsageAggregation agg : buckets) { this.ingest(opContext, agg); @@ -139,21 +140,22 @@ public Task query( log.info( "Querying usage stats for resource: {}, duration: {}, start time: {}, end time: {}, max buckets: {}", resource, duration, startTime, endTime, maxBuckets); - return RestliUtil.toTask( + return RestliUtils.toTask( () -> { Urn resourceUrn = UrnUtils.getUrn(resource); final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_QUERY, resourceUrn.getEntityType()), _authorizer, auth, true); + if (!isAPIAuthorized( - auth, - _authorizer, + opContext, PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE, new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString()))) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to query usage."); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_QUERY, resourceUrn.getEntityType()), _authorizer, auth, true); return UsageServiceUtil.query(opContext, _timeseriesAspectService, resource, duration, startTime, endTime, maxBuckets); }, @@ -170,19 +172,20 @@ public Task queryRange( Urn resourceUrn = UrnUtils.getUrn(resource); final Authentication auth = AuthenticationContext.getAuthentication(); + final OperationContext opContext = OperationContext.asSession( + systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), + ACTION_QUERY_RANGE, resourceUrn.getEntityType()), _authorizer, auth, true); + + if (!isAPIAuthorized( - auth, - _authorizer, + opContext, PoliciesConfig.VIEW_DATASET_USAGE_PRIVILEGE, new EntitySpec(resourceUrn.getEntityType(), resourceUrn.toString()))) { throw new RestLiServiceException( HttpStatus.S_403_FORBIDDEN, "User is unauthorized to query usage."); } - final OperationContext opContext = OperationContext.asSession( - systemOperationContext, RequestContext.builder().buildRestli(auth.getActor().toUrnStr(), getContext(), ACTION_QUERY_RANGE, resourceUrn.getEntityType()), _authorizer, auth, true); - - return RestliUtil.toTask( + return RestliUtils.toTask( () -> UsageServiceUtil.queryRange(opContext, _timeseriesAspectService, resource, duration, range), MetricRegistry.name(this.getClass(), "queryRange")); } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java index ed14dec4ed940a..3769507d5e5071 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java @@ -49,6 +49,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; @@ -342,9 +343,9 @@ private void deleteReference( */ private void deleteAspect( @Nonnull OperationContext opContext, Urn urn, String aspectName, RecordTemplate prevAspect) { - final RollbackResult rollbackResult = + final Optional rollbackResult = _entityService.deleteAspect(opContext, urn.toString(), aspectName, new HashMap<>(), true); - if (rollbackResult == null || rollbackResult.getNewValue() != null) { + if (rollbackResult.isEmpty() || rollbackResult.get().getNewValue() != null) { log.error( "Failed to delete aspect with references. Before {}, after: null, please check GMS logs" + " logs for more information", diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java index 283cff5d2e19da..66f7ff50a36245 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -26,6 +26,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.Future; import java.util.function.Consumer; @@ -458,7 +459,7 @@ void ingestEntity( void setRetentionService(RetentionService retentionService); - default RollbackResult deleteAspect( + default Optional deleteAspect( @Nonnull OperationContext opContext, String urn, String aspectName, @@ -468,7 +469,8 @@ default RollbackResult deleteAspect( new AspectRowSummary().setUrn(urn).setAspectName(aspectName); return rollbackWithConditions(opContext, List.of(aspectRowSummary), conditions, hardDelete) .getRollbackResults() - .get(0); + .stream() + .findFirst(); } RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java index 730a2886ab2bf0..f6a37f958c30db 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java @@ -32,6 +32,7 @@ import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import javax.validation.constraints.Null; import org.apache.commons.collections.CollectionUtils; public class QueryUtils { @@ -57,6 +58,26 @@ public static Criterion newCriterion( .setCondition(condition); } + // Creates new Criterion with field and value, using EQUAL condition. + @Nullable + public static Criterion newCriterion(@Nonnull String field, @Nonnull List values) { + return newCriterion(field, values, Condition.EQUAL); + } + + // Creates new Criterion with field, value and condition. + @Null + public static Criterion newCriterion( + @Nonnull String field, @Nonnull List values, @Nonnull Condition condition) { + if (values.isEmpty()) { + return null; + } + return new Criterion() + .setField(field) + .setValue(values.get(0)) // Hack! This is due to bad modeling. + .setValues(new StringArray(values)) + .setCondition(condition); + } + // Creates new Filter from a map of Criteria by removing null-valued Criteria and using EQUAL // condition (default). @Nonnull diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java index 403665120c6868..7c5a17c91b95e8 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java @@ -3,7 +3,6 @@ import static com.linkedin.metadata.Constants.DEFAULT_RUN_ID; import static com.linkedin.metadata.authorization.ApiOperation.DELETE; -import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationException; import com.datahub.authorization.AuthUtil; import com.datahub.plugins.auth.authorization.Authorizer; @@ -78,7 +77,7 @@ public RollbackResponse rollbackIngestion( } List aspectRowsToDelete = rollbackTargetAspects(runId, hardDelete); - if (!isAuthorized(authorizer, aspectRowsToDelete, opContext.getSessionAuthentication())) { + if (!isAuthorized(opContext, aspectRowsToDelete)) { throw new AuthenticationException("User is NOT unauthorized to delete entities."); } @@ -287,13 +286,10 @@ public void updateExecutionRequestStatus( } private boolean isAuthorized( - final Authorizer authorizer, - @Nonnull List rowSummaries, - @Nonnull Authentication authentication) { + @Nonnull OperationContext opContext, @Nonnull List rowSummaries) { return AuthUtil.isAPIAuthorizedEntityUrns( - authentication, - authorizer, + opContext, DELETE, rowSummaries.stream() .map(AspectRowSummary::getUrn) diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java index 07f91bb52fe109..f56cbc36e4a66c 100644 --- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java +++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java @@ -10,6 +10,7 @@ import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler; import io.datahubproject.metadata.context.OperationContext; import jakarta.servlet.http.HttpServlet; @@ -35,22 +36,27 @@ @Slf4j public class ConfigSearchExport extends HttpServlet { - private ConfigurationProvider getConfigProvider(WebApplicationContext ctx) { + private static ConfigurationProvider getConfigProvider(WebApplicationContext ctx) { return (ConfigurationProvider) ctx.getBean("configurationProvider"); } - private AspectRetriever getAspectRetriever(WebApplicationContext ctx) { + private static AspectRetriever getAspectRetriever(WebApplicationContext ctx) { return (AspectRetriever) ctx.getBean("aspectRetriever"); } - private OperationContext getOperationContext(WebApplicationContext ctx) { + private static OperationContext getOperationContext(WebApplicationContext ctx) { return (OperationContext) ctx.getBean("systemOperationContext"); } + private static QueryFilterRewriteChain getQueryFilterRewriteChain(WebApplicationContext ctx) { + return ctx.getBean(QueryFilterRewriteChain.class); + } + private void writeSearchCsv(WebApplicationContext ctx, PrintWriter pw) { SearchConfiguration searchConfiguration = getConfigProvider(ctx).getElasticSearch().getSearch(); AspectRetriever aspectRetriever = getAspectRetriever(ctx); EntityRegistry entityRegistry = aspectRetriever.getEntityRegistry(); + QueryFilterRewriteChain queryFilterRewriteChain = getQueryFilterRewriteChain(ctx); CSVWriter writer = CSVWriter.builder().printWriter(pw).build(); @@ -85,7 +91,8 @@ private void writeSearchCsv(WebApplicationContext ctx, PrintWriter pw) { entitySpecOpt -> { EntitySpec entitySpec = entitySpecOpt.get(); SearchRequest searchRequest = - SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, null) + SearchRequestHandler.getBuilder( + entitySpec, searchConfiguration, null, queryFilterRewriteChain) .getSearchRequest( getOperationContext(ctx) .withSearchFlags( diff --git a/metadata-service/war/src/main/resources/boot/policies.json b/metadata-service/war/src/main/resources/boot/policies.json index e62d0a33e7cd05..e0f26b908c4991 100644 --- a/metadata-service/war/src/main/resources/boot/policies.json +++ b/metadata-service/war/src/main/resources/boot/policies.json @@ -37,7 +37,8 @@ "MANAGE_BUSINESS_ATTRIBUTE", "MANAGE_STRUCTURED_PROPERTIES", "MANAGE_DOCUMENTATION_FORMS", - "MANAGE_FEATURES" + "MANAGE_FEATURES", + "MANAGE_SYSTEM_OPERATIONS" ], "displayName": "Root User - All Platform Privileges", "description": "Grants all platform privileges to root user.", diff --git a/metadata-service/war/src/main/resources/boot/retention.yaml b/metadata-service/war/src/main/resources/boot/retention.yaml index 630d2ce7ad0e67..639d1aa6bfc47e 100644 --- a/metadata-service/war/src/main/resources/boot/retention.yaml +++ b/metadata-service/war/src/main/resources/boot/retention.yaml @@ -3,7 +3,7 @@ config: retention: version: - maxVersions: 5 + maxVersions: 1 - entity: "*" aspect: "*" config: @@ -17,4 +17,4 @@ # version: # maxVersions: 10 # time: -# maxAgeInSeconds: 2592000 # 30 days \ No newline at end of file +# maxAgeInSeconds: 2592000 # 30 days diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index a282c6be673d0e..7a5a34d0f36301 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -167,6 +167,12 @@ public class PoliciesConfig { Privilege.of( "MANAGE_FEATURES", "Manage Features", "Umbrella privilege to manage all features."); + public static final Privilege MANAGE_SYSTEM_OPERATIONS_PRIVILEGE = + Privilege.of( + "MANAGE_SYSTEM_OPERATIONS", + "Manage System Operations", + "Allow access to system operations APIs and controls."); + public static final List PLATFORM_PRIVILEGES = ImmutableList.of( MANAGE_POLICIES_PRIVILEGE, @@ -194,7 +200,8 @@ public class PoliciesConfig { MANAGE_CONNECTIONS_PRIVILEGE, MANAGE_STRUCTURED_PROPERTIES_PRIVILEGE, MANAGE_DOCUMENTATION_FORMS_PRIVILEGE, - MANAGE_FEATURES_PRIVILEGE); + MANAGE_FEATURES_PRIVILEGE, + MANAGE_SYSTEM_OPERATIONS_PRIVILEGE); // Resource Privileges // diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/restli/RestliUtil.java b/metadata-utils/src/main/java/com/linkedin/metadata/restli/RestliUtil.java deleted file mode 100644 index c9b1d5a8a82de5..00000000000000 --- a/metadata-utils/src/main/java/com/linkedin/metadata/restli/RestliUtil.java +++ /dev/null @@ -1,101 +0,0 @@ -package com.linkedin.metadata.restli; - -import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Timer; -import com.linkedin.metadata.utils.metrics.MetricUtils; -import com.linkedin.parseq.Task; -import com.linkedin.restli.common.HttpStatus; -import com.linkedin.restli.server.RestLiServiceException; -import java.util.Optional; -import java.util.function.Supplier; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; - -public class RestliUtil { - - private RestliUtil() { - // Utils class - } - - /** - * Executes the provided supplier and convert the results to a {@link Task}. Exceptions thrown - * during the execution will be properly wrapped in {@link RestLiServiceException}. - * - * @param supplier The supplier to execute - * @return A parseq {@link Task} - */ - @Nonnull - public static Task toTask(@Nonnull Supplier supplier) { - try { - return Task.value(supplier.get()); - } catch (Throwable throwable) { - - // Convert IllegalArgumentException to BAD REQUEST - if (throwable instanceof IllegalArgumentException - || throwable.getCause() instanceof IllegalArgumentException) { - throwable = badRequestException(throwable.getMessage()); - } - - if (throwable instanceof RestLiServiceException) { - throw (RestLiServiceException) throwable; - } - - throw new RestLiServiceException(HttpStatus.S_500_INTERNAL_SERVER_ERROR, throwable); - } - } - - @Nonnull - public static Task toTask(@Nonnull Supplier supplier, String metricName) { - Timer.Context context = MetricUtils.timer(metricName).time(); - // Stop timer on success and failure - return toTask(supplier) - .transform( - orig -> { - context.stop(); - if (orig.isFailed()) { - MetricUtils.counter(MetricRegistry.name(metricName, "failed")).inc(); - } else { - MetricUtils.counter(MetricRegistry.name(metricName, "success")).inc(); - } - return orig; - }); - } - - /** - * Similar to {@link #toTask(Supplier)} but the supplier is expected to return an {@link Optional} - * instead. A {@link RestLiServiceException} with 404 HTTP status code will be thrown if the - * optional is emtpy. - * - * @param supplier The supplier to execute - * @return A parseq {@link Task} - */ - @Nonnull - public static Task toTaskFromOptional(@Nonnull Supplier> supplier) { - return toTask(() -> supplier.get().orElseThrow(RestliUtil::resourceNotFoundException)); - } - - @Nonnull - public static RestLiServiceException resourceNotFoundException() { - return resourceNotFoundException(null); - } - - @Nonnull - public static RestLiServiceException nonExceptionResourceNotFound() { - return new NonExceptionHttpErrorResponse(HttpStatus.S_404_NOT_FOUND); - } - - @Nonnull - public static RestLiServiceException resourceNotFoundException(@Nullable String message) { - return new RestLiServiceException(HttpStatus.S_404_NOT_FOUND, message); - } - - @Nonnull - public static RestLiServiceException badRequestException(@Nullable String message) { - return new RestLiServiceException(HttpStatus.S_400_BAD_REQUEST, message); - } - - @Nonnull - public static RestLiServiceException invalidArgumentsException(@Nullable String message) { - return new RestLiServiceException(HttpStatus.S_412_PRECONDITION_FAILED, message); - } -} diff --git a/smoke-test/tests/openapi/v1/timeline.json b/smoke-test/tests/openapi/v1/timeline.json index 36459d1b9e8243..e59407fd8188f8 100644 --- a/smoke-test/tests/openapi/v1/timeline.json +++ b/smoke-test/tests/openapi/v1/timeline.json @@ -351,7 +351,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'service'." @@ -364,7 +365,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.type)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'service.type'." @@ -377,7 +379,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'service.provider'." @@ -390,7 +393,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.id'." @@ -403,7 +407,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.name)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.name'." @@ -416,7 +421,8 @@ "parameters": { "fieldPath": "property_id", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),property_id)", - "nullable": false + "nullable": false, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'property_id'." @@ -438,7 +444,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id)", - "nullable": true + "nullable": true, + "modificationCategory": "RENAME" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to renaming of the field 'service.provider.id to service.provider.id2'." @@ -451,7 +458,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id3", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id3)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.id3'." @@ -464,7 +472,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.name)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MAJOR", "description": "A backwards incompatible change due to removal of field: 'service.provider.name'." @@ -486,7 +495,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id2", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id2)", - "nullable": true + "nullable": true, + "modificationCategory": "RENAME" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to renaming of the field 'service.provider.id2 to service.provider.id'." @@ -499,7 +509,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id3", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.id3)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MAJOR", "description": "A backwards incompatible change due to removal of field: 'service.provider.id3'." @@ -512,7 +523,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV1,PROD),service.provider.name)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.name'." diff --git a/smoke-test/tests/openapi/v2/timeline.json b/smoke-test/tests/openapi/v2/timeline.json index ceee67b39a6d0f..ccf33ebd9d1c8b 100644 --- a/smoke-test/tests/openapi/v2/timeline.json +++ b/smoke-test/tests/openapi/v2/timeline.json @@ -351,7 +351,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'service'." @@ -364,7 +365,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=string].type", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.type)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'service.type'." @@ -377,7 +379,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'service.provider'." @@ -390,7 +393,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.id'." @@ -403,7 +407,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.name)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.name'." @@ -416,7 +421,8 @@ "parameters": { "fieldPath": "property_id", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),property_id)", - "nullable": false + "nullable": false, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'property_id'." @@ -438,7 +444,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id)", - "nullable": true + "nullable": true, + "modificationCategory": "RENAME" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to renaming of the field 'service.provider.id to service.provider.id2'." @@ -451,7 +458,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id3", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id3)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.id3'." @@ -464,7 +472,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.name)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MAJOR", "description": "A backwards incompatible change due to removal of field: 'service.provider.name'." @@ -486,7 +495,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id2", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id2)", - "nullable": true + "nullable": true, + "modificationCategory": "RENAME" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to renaming of the field 'service.provider.id2 to service.provider.id'." @@ -499,7 +509,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=int].id3", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.id3)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MAJOR", "description": "A backwards incompatible change due to removal of field: 'service.provider.id3'." @@ -512,7 +523,8 @@ "parameters": { "fieldPath": "[version=2.0].[type=struct].[type=struct].service.[type=struct].provider.[type=string].name", "fieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:test,datasetTimelineV2,PROD),service.provider.name)", - "nullable": true + "nullable": true, + "modificationCategory": "OTHER" }, "semVerChange": "MINOR", "description": "A forwards & backwards compatible change due to the newly added field 'service.provider.name'."