diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 587833a41ba16c..32f29c9b0950d1 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -1029,6 +1029,7 @@ jobs: TEST_STRATEGY: ${{ matrix.test_strategy }} run: | echo "$DATAHUB_VERSION" + ./gradlew --stop ./smoke-test/smoke.sh - name: Disk Check run: df -h . && docker images @@ -1042,8 +1043,9 @@ jobs: uses: actions/upload-artifact@v3 if: failure() with: - name: docker logs + name: docker-logs-${{ matrix.test_strategy }} path: "docker_logs/*.log" + retention-days: 5 - name: Upload screenshots uses: actions/upload-artifact@v3 if: failure() diff --git a/.github/workflows/pr-labeler.yml b/.github/workflows/pr-labeler.yml index a3d1b85df08188..1ae3edae7aa90b 100644 --- a/.github/workflows/pr-labeler.yml +++ b/.github/workflows/pr-labeler.yml @@ -61,18 +61,14 @@ jobs: contains( fromJson('[ "siladitya2", - "sgomezvillamor", - "ngamanda", - "HarveyLeo", - "frsann", "bossenti", - "nikolakasev", "PatrickfBraz", "cuong-pham", "sudhakarast", "tkdrahn", "rtekal", - "sgm44" + "mikeburke24", + "DSchmidtDev" ]'), github.actor ) diff --git a/build.gradle b/build.gradle index 07ca1f09e813c3..a83d878f46965f 100644 --- a/build.gradle +++ b/build.gradle @@ -34,8 +34,8 @@ buildscript { // Releases: https://github.com/linkedin/rest.li/blob/master/CHANGELOG.md ext.pegasusVersion = '29.57.0' ext.mavenVersion = '3.6.3' - ext.springVersion = '6.1.6' - ext.springBootVersion = '3.2.6' + ext.springVersion = '6.1.13' + ext.springBootVersion = '3.2.9' ext.springKafkaVersion = '3.1.6' ext.openTelemetryVersion = '1.18.0' ext.neo4jVersion = '5.14.0' @@ -222,10 +222,10 @@ project.ext.externalDependency = [ 'playServer': "com.typesafe.play:play-server_2.12:$playVersion", 'playTest': "com.typesafe.play:play-test_2.12:$playVersion", 'playFilters': "com.typesafe.play:filters-helpers_2.12:$playVersion", - 'pac4j': 'org.pac4j:pac4j-oidc:4.5.7', + 'pac4j': 'org.pac4j:pac4j-oidc:4.5.8', 'playPac4j': 'org.pac4j:play-pac4j_2.12:9.0.2', 'postgresql': 'org.postgresql:postgresql:42.3.9', - 'protobuf': 'com.google.protobuf:protobuf-java:3.19.6', + 'protobuf': 'com.google.protobuf:protobuf-java:3.25.5', 'grpcProtobuf': 'io.grpc:grpc-protobuf:1.53.0', 'rangerCommons': 'org.apache.ranger:ranger-plugins-common:2.3.0', 'reflections': 'org.reflections:reflections:0.9.9', @@ -267,7 +267,7 @@ project.ext.externalDependency = [ 'testContainersOpenSearch': 'org.opensearch:opensearch-testcontainers:2.0.0', 'typesafeConfig':'com.typesafe:config:1.4.1', 'wiremock':'com.github.tomakehurst:wiremock:2.10.0', - 'zookeeper': 'org.apache.zookeeper:zookeeper:3.7.2', + 'zookeeper': 'org.apache.zookeeper:zookeeper:3.6.2', 'wire': 'com.squareup.wire:wire-compiler:3.7.1', 'charle': 'com.charleskorn.kaml:kaml:0.53.0', 'common': 'commons-io:commons-io:2.7', diff --git a/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java b/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java index 080ca236630bf3..f982944071498c 100644 --- a/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java +++ b/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java @@ -243,6 +243,9 @@ public Builder from(final com.typesafe.config.Config configs, final String ssoSe Optional.ofNullable(getOptional(configs, OIDC_PREFERRED_JWS_ALGORITHM, null)); } + grantType = Optional.ofNullable(getOptional(configs, OIDC_GRANT_TYPE, null)); + acrValues = Optional.ofNullable(getOptional(configs, OIDC_ACR_VALUES, null)); + return this; } diff --git a/datahub-frontend/app/auth/sso/oidc/custom/CustomOidcClient.java b/datahub-frontend/app/auth/sso/oidc/custom/CustomOidcClient.java index 67ec5d78add838..3a0a247cb761e2 100644 --- a/datahub-frontend/app/auth/sso/oidc/custom/CustomOidcClient.java +++ b/datahub-frontend/app/auth/sso/oidc/custom/CustomOidcClient.java @@ -18,7 +18,7 @@ public CustomOidcClient(final OidcConfiguration configuration) { protected void clientInit() { CommonHelper.assertNotNull("configuration", getConfiguration()); getConfiguration().init(); - defaultRedirectionActionBuilder(new OidcRedirectionActionBuilder(getConfiguration(), this)); + defaultRedirectionActionBuilder(new CustomOidcRedirectionActionBuilder(getConfiguration(), this)); defaultCredentialsExtractor(new OidcExtractor(getConfiguration(), this)); defaultAuthenticator(new CustomOidcAuthenticator(this)); defaultProfileCreator(new OidcProfileCreator<>(getConfiguration(), this)); diff --git a/datahub-frontend/app/auth/sso/oidc/custom/CustomOidcRedirectionActionBuilder.java b/datahub-frontend/app/auth/sso/oidc/custom/CustomOidcRedirectionActionBuilder.java new file mode 100644 index 00000000000000..bdeeacc895af35 --- /dev/null +++ b/datahub-frontend/app/auth/sso/oidc/custom/CustomOidcRedirectionActionBuilder.java @@ -0,0 +1,46 @@ +package auth.sso.oidc.custom; + +import java.util.Map; +import java.util.Optional; +import org.pac4j.core.context.WebContext; +import org.pac4j.core.exception.http.RedirectionAction; +import org.pac4j.core.exception.http.RedirectionActionHelper; +import org.pac4j.oidc.client.OidcClient; +import org.pac4j.oidc.config.OidcConfiguration; +import org.pac4j.oidc.redirect.OidcRedirectionActionBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class CustomOidcRedirectionActionBuilder extends OidcRedirectionActionBuilder { + + private static final Logger logger = LoggerFactory.getLogger(OidcRedirectionActionBuilder.class); + public CustomOidcRedirectionActionBuilder(OidcConfiguration configuration, OidcClient client) { + super(configuration, client); + } + + @Override + public Optional getRedirectionAction(WebContext context) { + Map params = this.buildParams(); + String computedCallbackUrl = this.client.computeFinalCallbackUrl(context); + params.put("redirect_uri", computedCallbackUrl); + this.addStateAndNonceParameters(context, params); + if (this.configuration.getMaxAge() != null) { + params.put("max_age", this.configuration.getMaxAge().toString()); + } + + String location = this.buildAuthenticationRequestUrl(params); + + logger.debug("Custom parameters: {}", this.configuration.getCustomParams()); + + String acrValues = this.configuration.getCustomParam("acr_values"); + + if (acrValues != null && !location.contains("acr_values=")) { + location += (location.contains("?") ? "&" : "?") + "acr_values=" + acrValues; + } + + logger.debug("Authentication request url: {}", location); + return Optional.of(RedirectionActionHelper.buildRedirectUrlAction(context, location)); + } + +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java index 72643ccac6325c..9ba2778c285aaf 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java @@ -78,7 +78,8 @@ public List getTimeseriesChart( Optional dimension, // Length 1 for now Map> filters, Map> mustNotFilters, - Optional uniqueOn) { + Optional uniqueOn, + String dateRangeField) { log.debug( String.format( @@ -87,11 +88,11 @@ public List getTimeseriesChart( + String.format("filters: %s, uniqueOn: %s", filters, uniqueOn)); AggregationBuilder filteredAgg = - getFilteredAggregation(filters, mustNotFilters, Optional.of(dateRange)); + getFilteredAggregation(filters, mustNotFilters, Optional.of(dateRange), dateRangeField); AggregationBuilder dateHistogram = AggregationBuilders.dateHistogram(DATE_HISTOGRAM) - .field("timestamp") + .field(dateRangeField) .calendarInterval(new DateHistogramInterval(granularity.name().toLowerCase())); uniqueOn.ifPresent(s -> dateHistogram.subAggregation(getUniqueQuery(s))); @@ -128,6 +129,25 @@ public List getTimeseriesChart( } } + public List getTimeseriesChart( + String indexName, + DateRange dateRange, + DateInterval granularity, + Optional dimension, // Length 1 for now + Map> filters, + Map> mustNotFilters, + Optional uniqueOn) { + return getTimeseriesChart( + indexName, + dateRange, + granularity, + dimension, + filters, + mustNotFilters, + uniqueOn, + "timestamp"); + } + private int extractCount(MultiBucketsAggregation.Bucket bucket, boolean didUnique) { return didUnique ? (int) bucket.getAggregations().get(UNIQUE).getValue() @@ -323,20 +343,38 @@ private Filter executeAndExtract(SearchRequest searchRequest) { } } + // Make dateRangeField as customizable private AggregationBuilder getFilteredAggregation( Map> mustFilters, Map> mustNotFilters, - Optional dateRange) { + Optional dateRange, + String dateRangeField) { BoolQueryBuilder filteredQuery = QueryBuilders.boolQuery(); mustFilters.forEach((key, values) -> filteredQuery.must(QueryBuilders.termsQuery(key, values))); mustNotFilters.forEach( (key, values) -> filteredQuery.mustNot(QueryBuilders.termsQuery(key, values))); - dateRange.ifPresent(range -> filteredQuery.must(dateRangeQuery(range))); + dateRange.ifPresent(range -> filteredQuery.must(dateRangeQuery(range, dateRangeField))); return AggregationBuilders.filter(FILTERED, filteredQuery); } + private AggregationBuilder getFilteredAggregation( + Map> mustFilters, + Map> mustNotFilters, + Optional dateRange) { + // Use timestamp as dateRangeField + return getFilteredAggregation(mustFilters, mustNotFilters, dateRange, "timestamp"); + } + private QueryBuilder dateRangeQuery(DateRange dateRange) { - return QueryBuilders.rangeQuery("timestamp").gte(dateRange.getStart()).lt(dateRange.getEnd()); + // Use timestamp as dateRangeField + return dateRangeQuery(dateRange, "timestamp"); + } + + // Make dateRangeField as customizable + private QueryBuilder dateRangeQuery(DateRange dateRange, String dateRangeField) { + return QueryBuilders.rangeQuery(dateRangeField) + .gte(dateRange.getStart()) + .lt(dateRange.getEnd()); } private AggregationBuilder getUniqueQuery(String uniqueOn) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java index 2c058eb60a7ee3..fff1dfee7ef9c1 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java @@ -14,6 +14,7 @@ import com.linkedin.datahub.graphql.generated.ScrollAcrossLineageResults; import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.types.common.mappers.LineageFlagsInputMapper; +import com.linkedin.datahub.graphql.types.common.mappers.SearchFlagsInputMapper; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; import com.linkedin.datahub.graphql.types.mappers.UrnScrollAcrossLineageResultsMapper; import com.linkedin.entity.client.EntityClient; @@ -89,7 +90,6 @@ public CompletableFuture get(DataFetchingEnvironment if (lineageFlags.getEndTimeMillis() == null && endTimeMillis != null) { lineageFlags.setEndTimeMillis(endTimeMillis); } - ; com.linkedin.metadata.graph.LineageDirection resolvedDirection = com.linkedin.metadata.graph.LineageDirection.valueOf(lineageDirection.toString()); @@ -107,17 +107,13 @@ public CompletableFuture get(DataFetchingEnvironment count); final SearchFlags searchFlags; - final com.linkedin.datahub.graphql.generated.SearchFlags inputFlags = - input.getSearchFlags(); + com.linkedin.datahub.graphql.generated.SearchFlags inputFlags = input.getSearchFlags(); if (inputFlags != null) { - searchFlags = - new SearchFlags() - .setSkipCache(inputFlags.getSkipCache()) - .setFulltext(inputFlags.getFulltext()) - .setMaxAggValues(inputFlags.getMaxAggValues()); + searchFlags = SearchFlagsInputMapper.INSTANCE.apply(context, inputFlags); } else { searchFlags = null; } + return UrnScrollAcrossLineageResultsMapper.map( context, _entityClient.scrollAcrossLineage( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolver.java index c432281ec16848..f5c101ba2bf64c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.structuredproperties; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; import com.linkedin.common.urn.Urn; @@ -21,17 +22,21 @@ import com.linkedin.structured.PrimitivePropertyValue; import com.linkedin.structured.PropertyCardinality; import com.linkedin.structured.PropertyValue; +import com.linkedin.structured.StructuredPropertyDefinition; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import java.util.Objects; import java.util.concurrent.CompletableFuture; import javax.annotation.Nonnull; +import javax.annotation.Nullable; public class UpdateStructuredPropertyResolver implements DataFetcher> { private final EntityClient _entityClient; + private static final String ALLOWED_TYPES = "allowedTypes"; + public UpdateStructuredPropertyResolver(@Nonnull final EntityClient entityClient) { _entityClient = Objects.requireNonNull(entityClient, "entityClient must not be null"); } @@ -52,6 +57,8 @@ public CompletableFuture get(final DataFetchingEnviron "Unable to update structured property. Please contact your admin."); } final Urn propertyUrn = UrnUtils.getUrn(input.getUrn()); + StructuredPropertyDefinition existingDefinition = + getExistingStructuredProperty(context, propertyUrn); StructuredPropertyDefinitionPatchBuilder builder = new StructuredPropertyDefinitionPatchBuilder().urn(propertyUrn); @@ -65,7 +72,7 @@ public CompletableFuture get(final DataFetchingEnviron builder.setImmutable(input.getImmutable()); } if (input.getTypeQualifier() != null) { - buildTypeQualifier(input, builder); + buildTypeQualifier(input, builder, existingDefinition); } if (input.getNewAllowedValues() != null) { buildAllowedValues(input, builder); @@ -97,10 +104,16 @@ public CompletableFuture get(final DataFetchingEnviron private void buildTypeQualifier( @Nonnull final UpdateStructuredPropertyInput input, - @Nonnull final StructuredPropertyDefinitionPatchBuilder builder) { + @Nonnull final StructuredPropertyDefinitionPatchBuilder builder, + @Nullable final StructuredPropertyDefinition existingDefinition) { if (input.getTypeQualifier().getNewAllowedTypes() != null) { final StringArrayMap typeQualifier = new StringArrayMap(); StringArray allowedTypes = new StringArray(); + if (existingDefinition != null + && existingDefinition.getTypeQualifier() != null + && existingDefinition.getTypeQualifier().get(ALLOWED_TYPES) != null) { + allowedTypes.addAll(existingDefinition.getTypeQualifier().get(ALLOWED_TYPES)); + } allowedTypes.addAll(input.getTypeQualifier().getNewAllowedTypes()); typeQualifier.put("allowedTypes", allowedTypes); builder.setTypeQualifier(typeQualifier); @@ -127,4 +140,18 @@ private void buildAllowedValues( builder.addAllowedValue(value); }); } + + private StructuredPropertyDefinition getExistingStructuredProperty( + @Nonnull final QueryContext context, @Nonnull final Urn propertyUrn) throws Exception { + EntityResponse response = + _entityClient.getV2( + context.getOperationContext(), STRUCTURED_PROPERTY_ENTITY_NAME, propertyUrn, null); + + if (response != null + && response.getAspects().containsKey(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) { + return new StructuredPropertyDefinition( + response.getAspects().get(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME).getValue().data()); + } + return null; + } } diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index a2e2fe9163f536..fd112c9524ac9a 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -9157,6 +9157,10 @@ enum PolicyMatchCondition { Whether the field matches the value """ EQUALS + """ + Whether the field value starts with the value + """ + STARTS_WITH } """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolverTest.java new file mode 100644 index 00000000000000..a12f593253b533 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolverTest.java @@ -0,0 +1,155 @@ +package com.linkedin.datahub.graphql.resolvers.search; + +import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static org.mockito.ArgumentMatchers.nullable; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyList; +import static org.mockito.Mockito.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import com.datahub.authentication.Authentication; +import com.linkedin.common.UrnArrayArray; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.LineageDirection; +import com.linkedin.datahub.graphql.generated.ScrollAcrossLineageInput; +import com.linkedin.datahub.graphql.generated.ScrollAcrossLineageResults; +import com.linkedin.datahub.graphql.generated.SearchAcrossLineageResult; +import com.linkedin.datahub.graphql.generated.SearchFlags; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.AggregationMetadataArray; +import com.linkedin.metadata.search.LineageScrollResult; +import com.linkedin.metadata.search.LineageSearchEntity; +import com.linkedin.metadata.search.LineageSearchEntityArray; +import com.linkedin.metadata.search.MatchedFieldArray; +import com.linkedin.metadata.search.SearchResultMetadata; +import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; +import java.io.InputStream; +import java.util.Collections; +import java.util.List; +import org.mockito.ArgumentCaptor; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +public class ScrollAcrossLineageResolverTest { + private static final String SOURCE_URN_STRING = + "urn:li:dataset:(urn:li:dataPlatform:foo,bar,PROD)"; + private static final String TARGET_URN_STRING = + "urn:li:dataset:(urn:li:dataPlatform:foo,baz,PROD)"; + private static final String QUERY = ""; + private static final int START = 0; + private static final int COUNT = 10; + private static final Long START_TIMESTAMP_MILLIS = 0L; + private static final Long END_TIMESTAMP_MILLIS = 1000L; + private EntityClient _entityClient; + private DataFetchingEnvironment _dataFetchingEnvironment; + private Authentication _authentication; + private ScrollAcrossLineageResolver _resolver; + + @BeforeTest + public void disableAssert() { + PathSpecBasedSchemaAnnotationVisitor.class + .getClassLoader() + .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); + } + + @BeforeMethod + public void setupTest() { + _entityClient = mock(EntityClient.class); + _dataFetchingEnvironment = mock(DataFetchingEnvironment.class); + _authentication = mock(Authentication.class); + _resolver = new ScrollAcrossLineageResolver(_entityClient); + } + + @Test + public void testAllEntitiesInitialization() { + InputStream inputStream = ClassLoader.getSystemResourceAsStream("entity-registry.yml"); + EntityRegistry entityRegistry = new ConfigEntityRegistry(inputStream); + SearchAcrossLineageResolver resolver = + new SearchAcrossLineageResolver(_entityClient, entityRegistry); + assertTrue(resolver._allEntities.contains("dataset")); + assertTrue(resolver._allEntities.contains("dataFlow")); + // Test for case sensitivity + assertFalse(resolver._allEntities.contains("dataflow")); + } + + @Test + public void testSearchAcrossLineage() throws Exception { + final QueryContext mockContext = getMockAllowContext(); + when(mockContext.getAuthentication()).thenReturn(_authentication); + + when(_dataFetchingEnvironment.getContext()).thenReturn(mockContext); + + final SearchFlags searchFlags = new SearchFlags(); + searchFlags.setFulltext(true); + + final ScrollAcrossLineageInput input = new ScrollAcrossLineageInput(); + input.setCount(COUNT); + input.setDirection(LineageDirection.DOWNSTREAM); + input.setOrFilters(Collections.emptyList()); + input.setQuery(QUERY); + input.setTypes(Collections.emptyList()); + input.setStartTimeMillis(START_TIMESTAMP_MILLIS); + input.setEndTimeMillis(END_TIMESTAMP_MILLIS); + input.setUrn(SOURCE_URN_STRING); + input.setSearchFlags(searchFlags); + when(_dataFetchingEnvironment.getArgument(eq("input"))).thenReturn(input); + + final LineageScrollResult lineageSearchResult = new LineageScrollResult(); + lineageSearchResult.setNumEntities(1); + lineageSearchResult.setPageSize(10); + + final SearchResultMetadata searchResultMetadata = new SearchResultMetadata(); + searchResultMetadata.setAggregations(new AggregationMetadataArray()); + lineageSearchResult.setMetadata(searchResultMetadata); + + final LineageSearchEntity lineageSearchEntity = new LineageSearchEntity(); + lineageSearchEntity.setEntity(UrnUtils.getUrn(TARGET_URN_STRING)); + lineageSearchEntity.setScore(15.0); + lineageSearchEntity.setDegree(1); + lineageSearchEntity.setMatchedFields(new MatchedFieldArray()); + lineageSearchEntity.setPaths(new UrnArrayArray()); + lineageSearchResult.setEntities(new LineageSearchEntityArray(lineageSearchEntity)); + ArgumentCaptor opContext = ArgumentCaptor.forClass(OperationContext.class); + + when(_entityClient.scrollAcrossLineage( + opContext.capture(), + eq(UrnUtils.getUrn(SOURCE_URN_STRING)), + eq(com.linkedin.metadata.graph.LineageDirection.DOWNSTREAM), + anyList(), + eq(QUERY), + eq(null), + any(), + eq(null), + nullable(String.class), + nullable(String.class), + eq(COUNT))) + .thenReturn(lineageSearchResult); + + final ScrollAcrossLineageResults results = _resolver.get(_dataFetchingEnvironment).join(); + assertEquals(results.getCount(), 10); + assertEquals(results.getTotal(), 1); + assertEquals( + opContext.getValue().getSearchContext().getLineageFlags().getStartTimeMillis(), + START_TIMESTAMP_MILLIS); + assertEquals( + opContext.getValue().getSearchContext().getLineageFlags().getEndTimeMillis(), + END_TIMESTAMP_MILLIS); + + final List entities = results.getSearchResults(); + assertEquals(entities.size(), 1); + final SearchAcrossLineageResult entity = entities.get(0); + assertEquals(entity.getEntity().getUrn(), TARGET_URN_STRING); + assertEquals(entity.getEntity().getType(), EntityType.DATASET); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolverTest.java index 971a53de9473b5..b818bcfb7d7f4f 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpdateStructuredPropertyResolverTest.java @@ -89,8 +89,8 @@ public void testGetFailure() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); - // Validate that ingest was called, but that caused a failure - Mockito.verify(mockEntityClient, Mockito.times(1)) + // Validate that ingest was not called since there was a get failure before ingesting + Mockito.verify(mockEntityClient, Mockito.times(0)) .ingestProposal(any(), any(MetadataChangeProposal.class), Mockito.eq(false)); } diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle index 5d814dd876679e..e808f9e87687c0 100644 --- a/datahub-upgrade/build.gradle +++ b/datahub-upgrade/build.gradle @@ -121,6 +121,34 @@ task run(type: Exec) { "-Dserver.port=8083", bootJar.getArchiveFile().get(), "-u", "SystemUpdate" } +/** + * Runs RestoreIndices on locally running system. The batchSize are set to + * test the process with pagination and not designed for optimal performance. + */ +task runRestoreIndices(type: Exec) { + dependsOn bootJar + group = "Execution" + description = "Run the restore indices process locally." + environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml" + commandLine "java", "-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n", + "-jar", + "-Dkafka.schemaRegistry.url=http://localhost:8080/schema-registry/api", + "-Dserver.port=8083", + bootJar.getArchiveFile().get(), "-u", "RestoreIndices", "-a", "batchSize=100" +} + +task runRestoreIndicesUrn(type: Exec) { + dependsOn bootJar + group = "Execution" + description = "Run the restore indices process locally." + environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml" + commandLine "java", "-agentlib:jdwp=transport=dt_socket,address=5003,server=y,suspend=n", + "-jar", + "-Dkafka.schemaRegistry.url=http://localhost:8080/schema-registry/api", + "-Dserver.port=8083", + bootJar.getArchiveFile().get(), "-u", "RestoreIndices", "-a", "batchSize=100", "-a", "urnBasedPagination=true" +} + docker { name "${docker_registry}/${docker_repo}:v${version}" version "v${version}" diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java index dea98c5cbcb132..8b33e4e7c21649 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java @@ -12,12 +12,13 @@ import com.linkedin.gms.factory.kafka.common.TopicConventionFactory; import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; -import com.linkedin.metadata.aspect.GraphRetriever; import com.linkedin.metadata.config.kafka.KafkaConfiguration; import com.linkedin.metadata.dao.producer.KafkaEventProducer; import com.linkedin.metadata.dao.producer.KafkaHealthChecker; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceAspectRetriever; +import com.linkedin.metadata.graph.GraphService; +import com.linkedin.metadata.graph.SystemGraphRetriever; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.SearchServiceSearchRetriever; @@ -145,7 +146,7 @@ protected OperationContext javaSystemOperationContext( @Nonnull final EntityRegistry entityRegistry, @Nonnull final EntityService entityService, @Nonnull final RestrictedService restrictedService, - @Nonnull final GraphRetriever graphRetriever, + @Nonnull final GraphService graphService, @Nonnull final SearchService searchService, @Qualifier("baseElasticSearchComponents") BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components) { @@ -159,6 +160,9 @@ protected OperationContext javaSystemOperationContext( SearchServiceSearchRetriever searchServiceSearchRetriever = SearchServiceSearchRetriever.builder().searchService(searchService).build(); + SystemGraphRetriever systemGraphRetriever = + SystemGraphRetriever.builder().graphService(graphService).build(); + OperationContext systemOperationContext = OperationContext.asSystem( operationContextConfig, @@ -168,11 +172,12 @@ protected OperationContext javaSystemOperationContext( components.getIndexConvention(), RetrieverContext.builder() .aspectRetriever(entityServiceAspectRetriever) - .graphRetriever(graphRetriever) + .graphRetriever(systemGraphRetriever) .searchRetriever(searchServiceSearchRetriever) .build()); entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext); + systemGraphRetriever.setSystemOperationContext(systemOperationContext); searchServiceSearchRetriever.setSystemOperationContext(systemOperationContext); return systemOperationContext; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexDataJobViaNodesCLLConfig.java similarity index 85% rename from datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java rename to datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexDataJobViaNodesCLLConfig.java index 4956254062ff96..a973876c6715f0 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexDataJobViaNodesCLLConfig.java @@ -1,7 +1,8 @@ -package com.linkedin.datahub.upgrade.config; +package com.linkedin.datahub.upgrade.config.graph; +import com.linkedin.datahub.upgrade.config.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; -import com.linkedin.datahub.upgrade.system.vianodes.ReindexDataJobViaNodesCLL; +import com.linkedin.datahub.upgrade.system.graph.vianodes.ReindexDataJobViaNodesCLL; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityService; import io.datahubproject.metadata.context.OperationContext; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexEdgeStatusConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexEdgeStatusConfig.java new file mode 100644 index 00000000000000..97715573eb51ff --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexEdgeStatusConfig.java @@ -0,0 +1,31 @@ +package com.linkedin.datahub.upgrade.config.graph; + +import com.linkedin.datahub.upgrade.config.SystemUpdateCondition; +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.datahub.upgrade.system.graph.edgestatus.ReindexEdgeStatus; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Conditional; +import org.springframework.context.annotation.Configuration; + +@Configuration +@Conditional(SystemUpdateCondition.NonBlockingSystemUpdateCondition.class) +public class ReindexEdgeStatusConfig { + + @Bean + public NonBlockingSystemUpgrade reindexEdgeStatus( + final OperationContext opContext, + final EntityService entityService, + final AspectDao aspectDao, + @Value("${elasticsearch.search.graph.graphStatusEnabled}") final boolean featureEnabled, + @Value("${systemUpdate.edgeStatus.enabled}") final boolean enabled, + @Value("${systemUpdate.edgeStatus.batchSize}") final Integer batchSize, + @Value("${systemUpdate.edgeStatus.delayMs}") final Integer delayMs, + @Value("${systemUpdate.edgeStatus.limit}") final Integer limit) { + return new ReindexEdgeStatus( + opContext, entityService, aspectDao, featureEnabled && enabled, batchSize, delayMs, limit); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java index 8e62db444a5655..902a80ec107fd2 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java @@ -16,6 +16,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.NoSuchElementException; import java.util.Optional; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; @@ -189,7 +190,12 @@ public Function executable() { context.report().addLine(String.format("Rows processed this loop %d", rowsProcessed)); start += args.batchSize; } catch (InterruptedException | ExecutionException e) { - return new DefaultUpgradeStepResult(id(), DataHubUpgradeState.FAILED); + if (e.getCause() instanceof NoSuchElementException) { + context.report().addLine("End of data."); + break; + } else { + return new DefaultUpgradeStepResult(id(), DataHubUpgradeState.FAILED); + } } } } else { diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexEdgeStatus.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexEdgeStatus.java new file mode 100644 index 00000000000000..6b7286a6a06393 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexEdgeStatus.java @@ -0,0 +1,50 @@ +package com.linkedin.datahub.upgrade.system.graph.edgestatus; + +import com.google.common.collect.ImmutableList; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + +/** + * A job that reindexes all status aspects as part of the graph edges containing status information. + * This is required to make sure previously written status information is present in the graph + * index. + */ +@Slf4j +public class ReindexEdgeStatus implements NonBlockingSystemUpgrade { + + private final List _steps; + + public ReindexEdgeStatus( + @Nonnull OperationContext opContext, + EntityService entityService, + AspectDao aspectDao, + boolean enabled, + Integer batchSize, + Integer batchDelayMs, + Integer limit) { + if (enabled) { + _steps = + ImmutableList.of( + new ReindexReindexEdgeStatusStep( + opContext, entityService, aspectDao, batchSize, batchDelayMs, limit)); + } else { + _steps = ImmutableList.of(); + } + } + + @Override + public String id() { + return this.getClass().getName(); + } + + @Override + public List steps() { + return _steps; + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexReindexEdgeStatusStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexReindexEdgeStatusStep.java new file mode 100644 index 00000000000000..6543f82e745635 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexReindexEdgeStatusStep.java @@ -0,0 +1,56 @@ +package com.linkedin.datahub.upgrade.system.graph.edgestatus; + +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; + +import com.linkedin.datahub.upgrade.UpgradeContext; +import com.linkedin.datahub.upgrade.system.AbstractMCLStep; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.jetbrains.annotations.Nullable; + +@Slf4j +public class ReindexReindexEdgeStatusStep extends AbstractMCLStep { + + public ReindexReindexEdgeStatusStep( + OperationContext opContext, + EntityService entityService, + AspectDao aspectDao, + Integer batchSize, + Integer batchDelayMs, + Integer limit) { + super(opContext, entityService, aspectDao, batchSize, batchDelayMs, limit); + } + + @Override + public String id() { + return "edge-status-reindex-v1"; + } + + @Nonnull + @Override + protected String getAspectName() { + return STATUS_ASPECT_NAME; + } + + @Nullable + @Override + protected String getUrnLike() { + return null; + } + + @Override + /** + * Returns whether the upgrade should be skipped. Uses previous run history or the environment + * variable to determine whether to skip. + */ + public boolean skip(UpgradeContext context) { + boolean envFlagRecommendsSkip = Boolean.parseBoolean(System.getenv("SKIP_REINDEX_EDGE_STATUS")); + if (envFlagRecommendsSkip) { + log.info("Environment variable SKIP_REINDEX_EDGE_STATUS is set to true. Skipping."); + } + return (super.skip(context) || envFlagRecommendsSkip); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLL.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLL.java similarity index 95% rename from datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLL.java rename to datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLL.java index fc0b44f57ab494..7a4ca9586f155d 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLL.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLL.java @@ -1,4 +1,4 @@ -package com.linkedin.datahub.upgrade.system.vianodes; +package com.linkedin.datahub.upgrade.system.graph.vianodes; import com.google.common.collect.ImmutableList; import com.linkedin.datahub.upgrade.UpgradeStep; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLLStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLLStep.java similarity index 96% rename from datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLLStep.java rename to datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLLStep.java index cf580670ee3a9a..e3e07f99bb1ee7 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLLStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLLStep.java @@ -1,4 +1,4 @@ -package com.linkedin.datahub.upgrade.system.vianodes; +package com.linkedin.datahub.upgrade.system.graph.vianodes; import static com.linkedin.metadata.Constants.*; diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java index 55a52f072a0caf..df27d33f3a117e 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java @@ -9,7 +9,7 @@ import com.linkedin.datahub.upgrade.impl.DefaultUpgradeManager; import com.linkedin.datahub.upgrade.system.SystemUpdateNonBlocking; -import com.linkedin.datahub.upgrade.system.vianodes.ReindexDataJobViaNodesCLL; +import com.linkedin.datahub.upgrade.system.graph.vianodes.ReindexDataJobViaNodesCLL; import com.linkedin.metadata.boot.kafka.MockSystemUpdateDeserializer; import com.linkedin.metadata.boot.kafka.MockSystemUpdateSerializer; import com.linkedin.metadata.config.kafka.KafkaConfiguration; diff --git a/datahub-web-react/package.json b/datahub-web-react/package.json index a1b3a94ace6406..dcaef6004d7022 100644 --- a/datahub-web-react/package.json +++ b/datahub-web-react/package.json @@ -45,7 +45,7 @@ "dayjs": "^1.11.7", "deepmerge": "^4.2.2", "diff": "^5.0.0", - "dompurify": "^2.3.8", + "dompurify": "^2.5.4", "dotenv": "^8.2.0", "faker": "5.5.3", "graphql": "^15.5.0", diff --git a/datahub-web-react/src/app/entity/group/AddGroupMembersModal.tsx b/datahub-web-react/src/app/entity/group/AddGroupMembersModal.tsx index 2485fb1e48fbed..7666eb04612e5c 100644 --- a/datahub-web-react/src/app/entity/group/AddGroupMembersModal.tsx +++ b/datahub-web-react/src/app/entity/group/AddGroupMembersModal.tsx @@ -17,7 +17,7 @@ type Props = { const SelectInput = styled(Select)` > .ant-select-selector { - height: 36px; + height: 'auto'; } `; diff --git a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx index 26c90edd82b696..0749ff369c1251 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/search/EmbeddedListSearchResults.tsx @@ -10,6 +10,8 @@ import { SearchFiltersSection } from '../../../../../search/SearchFiltersSection import { EntitySearchResults, EntityActionProps } from './EntitySearchResults'; import MatchingViewsLabel from './MatchingViewsLabel'; import { ANTD_GRAY } from '../../../constants'; +import { useIsShowSeparateSiblingsEnabled } from '../../../../../useAppConfig'; +import { combineSiblingsInSearchResults } from '../../../../../search/utils/combineSiblingsInSearchResults'; const SearchBody = styled.div` height: 100%; @@ -129,6 +131,12 @@ export const EmbeddedListSearchResults = ({ onLineageClick, isLineageTab = false, }: Props) => { + const showSeparateSiblings = useIsShowSeparateSiblingsEnabled(); + const combinedSiblingSearchResults = combineSiblingsInSearchResults( + showSeparateSiblings, + searchResponse?.searchResults, + ); + const pageStart = searchResponse?.start || 0; const pageSize = searchResponse?.count || 0; const totalResults = searchResponse?.total || 0; @@ -169,9 +177,9 @@ export const EmbeddedListSearchResults = ({ )} {!loading && !isServerOverloadError && ( ({ + combinedSiblingSearchResults?.map((searchResult) => ({ // when we add impact analysis, we will want to pipe the path to each element to the result this // eslint-disable-next-line @typescript-eslint/dot-notation degree: searchResult['degree'], diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/contract/utils.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/contract/utils.tsx index cc2e1bb7b386e7..537750ec279f99 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/contract/utils.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/contract/utils.tsx @@ -111,4 +111,5 @@ export const DATA_QUALITY_ASSERTION_TYPES = new Set([ AssertionType.Sql, AssertionType.Field, AssertionType.Dataset, + AssertionType.Custom, ]); diff --git a/datahub-web-react/src/app/lineage/LineageEntityEdge.tsx b/datahub-web-react/src/app/lineage/LineageEntityEdge.tsx index a0a7db63381dfe..485c00c7984d8c 100644 --- a/datahub-web-react/src/app/lineage/LineageEntityEdge.tsx +++ b/datahub-web-react/src/app/lineage/LineageEntityEdge.tsx @@ -26,11 +26,11 @@ const StyledEyeOutlined = styled(EyeOutlined)` type Props = { edge: VizEdge; - key: string; + edgeKey: string; isHighlighted: boolean; }; -export default function LineageEntityEdge({ edge, key, isHighlighted }: Props) { +export default function LineageEntityEdge({ edge, edgeKey, isHighlighted }: Props) { const createdOnTimestamp = edge?.createdOn; const updatedOnTimestamp = edge?.updatedOn; const createdOn = createdOnTimestamp ? dayjs(createdOnTimestamp).format('ll') : undefined; @@ -59,7 +59,7 @@ export default function LineageEntityEdge({ edge, key, isHighlighted }: Props) { undefined } > - + { diff --git a/datahub-web-react/src/app/lineage/LineageExplorer.tsx b/datahub-web-react/src/app/lineage/LineageExplorer.tsx index 6d5815afe7d308..2d42d164f6626e 100644 --- a/datahub-web-react/src/app/lineage/LineageExplorer.tsx +++ b/datahub-web-react/src/app/lineage/LineageExplorer.tsx @@ -77,12 +77,12 @@ export default function LineageExplorer({ urn, type }: Props) { const [isDrawerVisible, setIsDrawVisible] = useState(false); const [selectedEntity, setSelectedEntity] = useState(undefined); - const [asyncEntities, setAsyncEntities] = useState({}); + const [asyncEntities, setAsyncEntities] = useState(new Map()); // In the case that any URL params change, we want to reset asyncEntities. If new parameters are added, // they should be added to the dependency array below. useEffect(() => { - setAsyncEntities({}); + setAsyncEntities(new Map()); // this can also be our hook for emitting the tracking event analytics.event({ @@ -93,7 +93,7 @@ export default function LineageExplorer({ urn, type }: Props) { useEffect(() => { if (showColumns) { - setAsyncEntities({}); + setAsyncEntities(new Map()); } }, [showColumns]); @@ -101,7 +101,7 @@ export default function LineageExplorer({ urn, type }: Props) { const maybeAddAsyncLoadedEntity = useCallback( (entityAndType: EntityAndType) => { - if (entityAndType?.entity.urn && !asyncEntities[entityAndType?.entity.urn]?.fullyFetched) { + if (entityAndType?.entity.urn && !asyncEntities.get(entityAndType?.entity.urn)?.fullyFetched) { // record that we have added this entity let newAsyncEntities = extendAsyncEntities( fineGrainedMap, @@ -145,10 +145,10 @@ export default function LineageExplorer({ urn, type }: Props) { // set asyncEntity to have fullyFetched: false so we can update it in maybeAddAsyncLoadedEntity function resetAsyncEntity(entityUrn: string) { - setAsyncEntities({ - ...asyncEntities, - [entityUrn]: { ...asyncEntities[entityUrn], fullyFetched: false }, - }); + const newAsyncEntities = new Map(asyncEntities); + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + newAsyncEntities.set(entityUrn, { ...asyncEntities.get(entityUrn)!, fullyFetched: false }); + setAsyncEntities(newAsyncEntities); } const handleClose = () => { diff --git a/datahub-web-react/src/app/lineage/LineageTree.tsx b/datahub-web-react/src/app/lineage/LineageTree.tsx index 8b5de4e78ff17c..46156baacd611b 100644 --- a/datahub-web-react/src/app/lineage/LineageTree.tsx +++ b/datahub-web-react/src/app/lineage/LineageTree.tsx @@ -25,7 +25,7 @@ type LineageTreeProps = { setIsDraggingNode: (isDraggingNode: boolean) => void; draggedNodes: Record; setDraggedNodes: (draggedNodes: Record) => void; - fetchedEntities: { [x: string]: FetchedEntity }; + fetchedEntities: Map; setUpdatedLineages: React.Dispatch>; }; diff --git a/datahub-web-react/src/app/lineage/LineageTreeNodeAndEdgeRenderer.tsx b/datahub-web-react/src/app/lineage/LineageTreeNodeAndEdgeRenderer.tsx index bec83c80107b31..2e6fc2997d4ca1 100644 --- a/datahub-web-react/src/app/lineage/LineageTreeNodeAndEdgeRenderer.tsx +++ b/datahub-web-react/src/app/lineage/LineageTreeNodeAndEdgeRenderer.tsx @@ -77,7 +77,7 @@ export default function LineageTreeNodeAndEdgeRenderer({ link.target.data.urn }${link.targetField && `-${link.targetField}`}-${link.target.direction}`; - return ; + return ; })} {nodesToRender.map((node, index) => { const isSelected = node.data.urn === selectedEntity?.urn; diff --git a/datahub-web-react/src/app/lineage/LineageVizInsideZoom.tsx b/datahub-web-react/src/app/lineage/LineageVizInsideZoom.tsx index ab86dfcb335f0d..4d4ac317e06f59 100644 --- a/datahub-web-react/src/app/lineage/LineageVizInsideZoom.tsx +++ b/datahub-web-react/src/app/lineage/LineageVizInsideZoom.tsx @@ -36,7 +36,7 @@ const ZoomButton = styled(Button)` type Props = { margin: { top: number; right: number; bottom: number; left: number }; entityAndType?: EntityAndType | null; - fetchedEntities: { [x: string]: FetchedEntity }; + fetchedEntities: Map; onEntityClick: (EntitySelectParams) => void; onEntityCenter: (EntitySelectParams) => void; onLineageExpand: (data: EntityAndType) => void; diff --git a/datahub-web-react/src/app/lineage/LineageVizRootSvg.tsx b/datahub-web-react/src/app/lineage/LineageVizRootSvg.tsx index 434fb1562bc2a0..8aa556ae12983d 100644 --- a/datahub-web-react/src/app/lineage/LineageVizRootSvg.tsx +++ b/datahub-web-react/src/app/lineage/LineageVizRootSvg.tsx @@ -10,7 +10,7 @@ import constructTree from './utils/constructTree'; type Props = { margin: { top: number; right: number; bottom: number; left: number }; entityAndType?: EntityAndType | null; - fetchedEntities: { [x: string]: FetchedEntity }; + fetchedEntities: Map; onEntityClick: (EntitySelectParams) => void; onEntityCenter: (EntitySelectParams) => void; onLineageExpand: (data: EntityAndType) => void; diff --git a/datahub-web-react/src/app/lineage/__tests__/LineageTree.test.tsx b/datahub-web-react/src/app/lineage/__tests__/LineageTree.test.tsx index 0852a8a32dbfa6..571ca51c46b099 100644 --- a/datahub-web-react/src/app/lineage/__tests__/LineageTree.test.tsx +++ b/datahub-web-react/src/app/lineage/__tests__/LineageTree.test.tsx @@ -9,7 +9,7 @@ import { dataset6WithLineage, mocks, } from '../../../Mocks'; -import { Direction, EntityAndType, FetchedEntities } from '../types'; +import { Direction, EntityAndType } from '../types'; import constructTree from '../utils/constructTree'; import LineageTree from '../LineageTree'; import extendAsyncEntities from '../utils/extendAsyncEntities'; @@ -50,7 +50,7 @@ describe('LineageTree', () => { { entity: entry.entity, type: EntityType.Dataset } as EntityAndType, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const downstreamData = constructTree( diff --git a/datahub-web-react/src/app/lineage/__tests__/adjustVXTreeLayout.test.tsx b/datahub-web-react/src/app/lineage/__tests__/adjustVXTreeLayout.test.tsx index ffcc7433426a00..11cc69a142320b 100644 --- a/datahub-web-react/src/app/lineage/__tests__/adjustVXTreeLayout.test.tsx +++ b/datahub-web-react/src/app/lineage/__tests__/adjustVXTreeLayout.test.tsx @@ -14,7 +14,7 @@ import { import constructTree from '../utils/constructTree'; import extendAsyncEntities from '../utils/extendAsyncEntities'; import adjustVXTreeLayout from '../utils/adjustVXTreeLayout'; -import { NodeData, Direction, FetchedEntities, EntityAndType } from '../types'; +import { NodeData, Direction, EntityAndType } from '../types'; import { getTestEntityRegistry } from '../../../utils/test-utils/TestPageContainer'; import { Dataset, Entity, EntityType } from '../../../types.generated'; @@ -37,7 +37,7 @@ describe('adjustVXTreeLayout', () => { { entity: entry.entity, type: EntityType.Dataset } as EntityAndType, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const downstreamData = hierarchy( @@ -88,7 +88,7 @@ describe('adjustVXTreeLayout', () => { { entity: entry.entity, type: EntityType.Dataset } as EntityAndType, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const upstreamData = hierarchy( @@ -144,7 +144,7 @@ describe('adjustVXTreeLayout', () => { { entity: entry.entity, type: EntityType.Dataset } as EntityAndType, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const upstreamData = hierarchy( @@ -189,7 +189,7 @@ describe('adjustVXTreeLayout', () => { { entity: entry.entity as Dataset, type: EntityType.Dataset }, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const upstreamData = hierarchy( @@ -234,7 +234,7 @@ describe('adjustVXTreeLayout', () => { { entity: entry.entity as Dataset, type: EntityType.Dataset }, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const upstreamData = hierarchy( diff --git a/datahub-web-react/src/app/lineage/__tests__/constructTree.test.ts b/datahub-web-react/src/app/lineage/__tests__/constructTree.test.ts index 2e41fb9ea07bff..245265cd722967 100644 --- a/datahub-web-react/src/app/lineage/__tests__/constructTree.test.ts +++ b/datahub-web-react/src/app/lineage/__tests__/constructTree.test.ts @@ -12,7 +12,7 @@ import { } from '../../../Mocks'; import { DataPlatform, Dataset, Entity, EntityType, RelationshipDirection } from '../../../types.generated'; import { getTestEntityRegistry } from '../../../utils/test-utils/TestPageContainer'; -import { Direction, EntityAndType, FetchedEntities, UpdatedLineages } from '../types'; +import { Direction, EntityAndType, FetchedEntity, UpdatedLineages } from '../types'; import constructTree from '../utils/constructTree'; import extendAsyncEntities from '../utils/extendAsyncEntities'; @@ -23,7 +23,7 @@ const airflowPlatform: DataPlatform = dataFlow1.platform; describe('constructTree', () => { it('handles nodes without any lineage', () => { - const mockFetchedEntities = {}; + const mockFetchedEntities = new Map(); expect( constructTree( { entity: dataset3, type: EntityType.Dataset }, @@ -61,7 +61,7 @@ describe('constructTree', () => { { entity: entry.entity as Dataset, type: EntityType.Dataset }, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); expect( @@ -113,7 +113,7 @@ describe('constructTree', () => { { entity: entry.entity as Dataset, type: EntityType.Dataset }, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); expect( @@ -166,7 +166,7 @@ describe('constructTree', () => { { entity: entry.entity as Dataset, type: EntityType.Dataset }, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); expect( @@ -261,7 +261,7 @@ describe('constructTree', () => { { entity: entry.entity as Dataset, type: EntityType.Dataset }, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const tree = constructTree( @@ -292,7 +292,7 @@ describe('constructTree', () => { { entity: entry.entity as Entity, type: EntityType.Dataset } as EntityAndType, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); expect( constructTree( @@ -379,7 +379,7 @@ describe('constructTree', () => { { entity: entry.entity as Dataset, type: entry.entity.type as EntityType } as EntityAndType, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); expect( constructTree( @@ -434,7 +434,7 @@ describe('constructTree', () => { { entity: entry.entity as Dataset, type: EntityType.Dataset }, entry.fullyFetched, ), - {} as FetchedEntities, + new Map(), ); const updatedLineages: UpdatedLineages = { diff --git a/datahub-web-react/src/app/lineage/types.ts b/datahub-web-react/src/app/lineage/types.ts index b4d73d0c9185e1..0edc3a3a7c0393 100644 --- a/datahub-web-react/src/app/lineage/types.ts +++ b/datahub-web-react/src/app/lineage/types.ts @@ -111,7 +111,7 @@ export type ColumnEdge = { targetField: string; }; -export type FetchedEntities = { [x: string]: FetchedEntity }; +export type FetchedEntities = Map; export enum Direction { Upstream = 'Upstream', @@ -126,7 +126,7 @@ export type LineageExplorerParams = { export type TreeProps = { margin?: { top: number; right: number; bottom: number; left: number }; entityAndType?: EntityAndType | null; - fetchedEntities: { [x: string]: FetchedEntity }; + fetchedEntities: Map; onEntityClick: (EntitySelectParams) => void; onEntityCenter: (EntitySelectParams) => void; onLineageExpand: (data: EntityAndType) => void; diff --git a/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx b/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx index c11d8fe90cfa99..235fce08a85ddb 100644 --- a/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx +++ b/datahub-web-react/src/app/lineage/utils/__tests__/columnLineageUtils.test.tsx @@ -103,9 +103,7 @@ describe('getPopulatedColumnsByUrn', () => { }, ] as FineGrainedLineage[], }; - const fetchedEntities = { - [dataJobWithCLL.urn]: dataJobWithCLL as FetchedEntity, - }; + const fetchedEntities = new Map([[dataJobWithCLL.urn, dataJobWithCLL as FetchedEntity]]); const columnsByUrn = getPopulatedColumnsByUrn({}, fetchedEntities); expect(columnsByUrn).toMatchObject({ diff --git a/datahub-web-react/src/app/lineage/utils/__tests__/extendAsyncEntities.test.ts b/datahub-web-react/src/app/lineage/utils/__tests__/extendAsyncEntities.test.ts index ad28bccbbd85a2..6af9b1b2c0d970 100644 --- a/datahub-web-react/src/app/lineage/utils/__tests__/extendAsyncEntities.test.ts +++ b/datahub-web-react/src/app/lineage/utils/__tests__/extendAsyncEntities.test.ts @@ -19,16 +19,20 @@ describe('extendColumnLineage', () => { }, ] as FineGrainedLineage[], }; - const fetchedEntities = { - [dataJobWithCLL.urn]: dataJobWithCLL as FetchedEntity, - }; + const fetchedEntities = new Map([[dataJobWithCLL.urn, dataJobWithCLL as FetchedEntity]]); const fineGrainedMap = { forward: {}, reverse: {} }; extendColumnLineage(dataJobWithCLL, fineGrainedMap, {}, fetchedEntities); expect(fineGrainedMap).toMatchObject({ forward: { - [dataJob1.urn]: { test1: { [dataset2.urn]: ['test2'] }, test3: { [dataset2.urn]: ['test4'] } }, - [dataset1.urn]: { test1: { [dataJob1.urn]: ['test1'] }, test3: { [dataJob1.urn]: ['test3'] } }, + [dataJob1.urn]: { + test1: { [dataset2.urn]: ['test2'] }, + test3: { [dataset2.urn]: ['test4'] }, + }, + [dataset1.urn]: { + test1: { [dataJob1.urn]: ['test1'] }, + test3: { [dataJob1.urn]: ['test3'] }, + }, }, reverse: { [dataJob1.urn]: { test1: { [dataset1.urn]: ['test1'] }, test3: { [dataset1.urn]: ['test3'] } }, diff --git a/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts b/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts index 60b16984441683..c3483f229f7a09 100644 --- a/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts +++ b/datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts @@ -71,10 +71,10 @@ export function convertInputFieldsToSchemaFields(inputFields?: InputFields) { */ export function getPopulatedColumnsByUrn( columnsByUrn: Record, - fetchedEntities: { [x: string]: FetchedEntity }, + fetchedEntities: Map, ) { let populatedColumnsByUrn = { ...columnsByUrn }; - Object.entries(fetchedEntities).forEach(([urn, fetchedEntity]) => { + Array.from(fetchedEntities.entries()).forEach(([urn, fetchedEntity]) => { if (fetchedEntity.schemaMetadata && !columnsByUrn[urn]) { populatedColumnsByUrn = { ...populatedColumnsByUrn, @@ -122,7 +122,7 @@ export function getPopulatedColumnsByUrn( export function populateColumnsByUrn( columnsByUrn: Record, - fetchedEntities: { [x: string]: FetchedEntity }, + fetchedEntities: Map, setColumnsByUrn: (colsByUrn: Record) => void, ) { setColumnsByUrn(getPopulatedColumnsByUrn(columnsByUrn, fetchedEntities)); diff --git a/datahub-web-react/src/app/lineage/utils/constructFetchedNode.ts b/datahub-web-react/src/app/lineage/utils/constructFetchedNode.ts index 778d0e325f7cb3..12d4cca352bb3f 100644 --- a/datahub-web-react/src/app/lineage/utils/constructFetchedNode.ts +++ b/datahub-web-react/src/app/lineage/utils/constructFetchedNode.ts @@ -36,7 +36,7 @@ export default function constructFetchedNode( } const newConstructionPath = [...constructionPath, urn]; - const fetchedNode = fetchedEntities[urn]; + const fetchedNode = fetchedEntities.get(urn); if (constructedNodes[urn]) { return constructedNodes[urn]; @@ -53,7 +53,7 @@ export default function constructFetchedNode( subtype: fetchedNode.subtype, icon: fetchedNode.icon, unexploredChildren: - fetchedNode?.[childrenKey]?.filter((childUrn) => !(childUrn.entity.urn in fetchedEntities)).length || 0, + fetchedNode?.[childrenKey]?.filter((childUrn) => !fetchedEntities.has(childUrn.entity.urn)).length || 0, countercurrentChildrenUrns: fetchedNode?.[direction === Direction.Downstream ? 'upstreamChildren' : 'downstreamChildren']?.map( (child) => child.entity.urn, @@ -88,7 +88,7 @@ export default function constructFetchedNode( ); }) ?.filter((child) => { - const childEntity = fetchedEntities[child?.urn || '']; + const childEntity = fetchedEntities.get(child?.urn || ''); const parentChildren = fetchedNode[childrenKey]; return shouldIncludeChildEntity(direction, parentChildren, childEntity, fetchedNode); }) diff --git a/datahub-web-react/src/app/lineage/utils/constructTree.ts b/datahub-web-react/src/app/lineage/utils/constructTree.ts index 7da6fc56b57bd4..38a865ea9e093d 100644 --- a/datahub-web-react/src/app/lineage/utils/constructTree.ts +++ b/datahub-web-react/src/app/lineage/utils/constructTree.ts @@ -62,15 +62,14 @@ export default function constructTree( const constructedNodes = {}; let updatedFetchedEntities = fetchedEntities; - Object.entries(updatedFetchedEntities).forEach((entry) => { - const [urn, fetchedEntity] = entry; + Array.from(updatedFetchedEntities.entries()).forEach(([urn, fetchedEntity]) => { if (urn in updatedLineages) { - updatedFetchedEntities[urn] = updateFetchedEntity(fetchedEntity, updatedLineages); + updatedFetchedEntities.set(urn, updateFetchedEntity(fetchedEntity, updatedLineages)); } }); Object.values(updatedLineages).forEach((updatedLineage) => { (updatedLineage as any).entitiesToAdd.forEach((entity) => { - if (!(entity.urn in updatedFetchedEntities)) { + if (!updatedFetchedEntities.has(entity.urn)) { updatedFetchedEntities = extendAsyncEntities( {}, {}, @@ -125,7 +124,7 @@ export default function constructTree( ]); }) ?.filter((child) => { - const childEntity = updatedFetchedEntities[child?.urn || '']; + const childEntity = updatedFetchedEntities.get(child?.urn || ''); return shouldIncludeChildEntity(direction, children, childEntity, fetchedEntity); }) ?.filter(Boolean) as Array; diff --git a/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts b/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts index 30e81a37dc3801..7deca50b154c7b 100644 --- a/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts +++ b/datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts @@ -115,8 +115,8 @@ export function extendColumnLineage( // if this upstreamEntityUrn is a sibling of one of the already rendered nodes, // update the fine grained map with the rendered node instead of its sibling - Object.keys(fetchedEntities).forEach((urn) => { - fetchedEntities[urn].siblings?.siblings?.forEach((sibling) => { + Array.from(fetchedEntities.keys()).forEach((urn) => { + fetchedEntities.get(urn)?.siblings?.siblings?.forEach((sibling) => { if (sibling && sibling.urn === upstreamEntityUrn) { updateFineGrainedMap( fineGrainedMap, @@ -188,7 +188,7 @@ export default function extendAsyncEntities( entityAndType: EntityAndType, fullyFetched = false, ): FetchedEntities { - if (fetchedEntities[entityAndType.entity.urn]?.fullyFetched) { + if (fetchedEntities.get(entityAndType.entity.urn)?.fullyFetched) { return fetchedEntities; } @@ -198,11 +198,7 @@ export default function extendAsyncEntities( extendColumnLineage(lineageVizConfig, fineGrainedMap, fineGrainedMapForSiblings, fetchedEntities); - return { - ...fetchedEntities, - [entityAndType.entity.urn]: { - ...lineageVizConfig, - fullyFetched, - }, - }; + const newFetchedEntities = new Map(fetchedEntities); + newFetchedEntities.set(entityAndType.entity.urn, { ...lineageVizConfig, fullyFetched }); + return newFetchedEntities; } diff --git a/datahub-web-react/src/app/lineage/utils/useSortColumnsBySelectedField.ts b/datahub-web-react/src/app/lineage/utils/useSortColumnsBySelectedField.ts index dc0d3ea2f03768..d27764d6ed7b59 100644 --- a/datahub-web-react/src/app/lineage/utils/useSortColumnsBySelectedField.ts +++ b/datahub-web-react/src/app/lineage/utils/useSortColumnsBySelectedField.ts @@ -12,7 +12,7 @@ import { } from './columnLineageUtils'; import { LineageExplorerContext } from './LineageExplorerContext'; -export default function useSortColumnsBySelectedField(fetchedEntities: { [x: string]: FetchedEntity }) { +export default function useSortColumnsBySelectedField(fetchedEntities: Map) { const { highlightedEdges, selectedField, columnsByUrn, setColumnsByUrn } = useContext(LineageExplorerContext); const previousSelectedField = usePrevious(selectedField); @@ -37,15 +37,15 @@ export default function useSortColumnsBySelectedField(fetchedEntities: { [x: str setColumnsByUrn(updatedColumnsByUrn); } else if (!selectedField && previousSelectedField !== selectedField) { Object.entries(columnsByUrn).forEach(([urn, columns]) => { - const fetchedEntity = fetchedEntities[urn]; - if (fetchedEntity && fetchedEntity.schemaMetadata) { + const fetchedEntity = fetchedEntities.get(urn); + if (fetchedEntity?.schemaMetadata) { updatedColumnsByUrn = sortColumnsByDefault( updatedColumnsByUrn, columns, convertFieldsToV1FieldPath(fetchedEntity.schemaMetadata.fields), urn, ); - } else if (fetchedEntity && fetchedEntity.inputFields) { + } else if (fetchedEntity?.inputFields) { updatedColumnsByUrn = sortColumnsByDefault( updatedColumnsByUrn, columns, diff --git a/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx b/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx index 88a1388ba95891..37349585fa4c92 100644 --- a/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx +++ b/datahub-web-react/src/app/permissions/policy/PolicyDetailsModal.tsx @@ -3,9 +3,14 @@ import { Link } from 'react-router-dom'; import { Button, Divider, Modal, Tag, Typography } from 'antd'; import styled from 'styled-components'; import { useEntityRegistry } from '../../useEntityRegistry'; -import { Maybe, Policy, PolicyState, PolicyType } from '../../../types.generated'; +import { Maybe, Policy, PolicyMatchCondition, PolicyState, PolicyType } from '../../../types.generated'; import { useAppConfig } from '../../useAppConfig'; -import { convertLegacyResourceFilter, getFieldValues, mapResourceTypeToDisplayName } from './policyUtils'; +import { + convertLegacyResourceFilter, + getFieldValues, + getFieldCondition, + mapResourceTypeToDisplayName, +} from './policyUtils'; import AvatarsGroup from '../AvatarsGroup'; type PrivilegeOptionType = { @@ -70,6 +75,7 @@ export default function PolicyDetailsModal({ policy, open, onClose, privileges } const resourceTypes = getFieldValues(resources?.filter, 'TYPE') || []; const dataPlatformInstances = getFieldValues(resources?.filter, 'DATA_PLATFORM_INSTANCE') || []; const resourceEntities = getFieldValues(resources?.filter, 'URN') || []; + const resourceFilterCondition = getFieldCondition(resources?.filter, 'URN') || PolicyMatchCondition.Equals; const domains = getFieldValues(resources?.filter, 'DOMAIN') || []; const { @@ -104,6 +110,10 @@ export default function PolicyDetailsModal({ policy, open, onClose, privileges } ); }; + const getWildcardUrnTag = (criterionValue) => { + return {criterionValue.value}*; + }; + const resourceOwnersField = (actors) => { if (!actors?.resourceOwners) { return No; @@ -166,7 +176,10 @@ export default function PolicyDetailsModal({ policy, open, onClose, privileges } return ( // eslint-disable-next-line react/no-array-index-key - {getEntityTag(value)} + {resourceFilterCondition && + resourceFilterCondition === PolicyMatchCondition.StartsWith + ? getWildcardUrnTag(value) + : getEntityTag(value)} ); })) || All} diff --git a/datahub-web-react/src/app/permissions/policy/policyUtils.ts b/datahub-web-react/src/app/permissions/policy/policyUtils.ts index 725e39d82d62e0..b71a38f80fc256 100644 --- a/datahub-web-react/src/app/permissions/policy/policyUtils.ts +++ b/datahub-web-react/src/app/permissions/policy/policyUtils.ts @@ -118,6 +118,10 @@ export const getFieldValues = (filter: Maybe | undefined, res return filter?.criteria?.find((criterion) => criterion.field === resourceFieldType)?.values || []; }; +export const getFieldCondition = (filter: Maybe | undefined, resourceFieldType: string) => { + return filter?.criteria?.find((criterion) => criterion.field === resourceFieldType)?.condition || null; +}; + export const getFieldValuesOfTags = (filter: Maybe | undefined, resourceFieldType: string) => { return filter?.criteria?.find((criterion) => criterion.field === resourceFieldType)?.values || []; }; diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index 18742397517238..17ad6f881b0abd 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -758,16 +758,6 @@ fragment schemaFieldFields on SchemaField { } } } - parent { - urn - type - ...entityDisplayNameFields - ... on Dataset { - platform { - ...platformFields - } - } - } } } diff --git a/datahub-web-react/yarn.lock b/datahub-web-react/yarn.lock index 558711251d490e..8d5899d9891f18 100644 --- a/datahub-web-react/yarn.lock +++ b/datahub-web-react/yarn.lock @@ -5435,10 +5435,10 @@ domino@^2.1.6: resolved "https://registry.yarnpkg.com/domino/-/domino-2.1.6.tgz#fe4ace4310526e5e7b9d12c7de01b7f485a57ffe" integrity sha512-3VdM/SXBZX2omc9JF9nOPCtDaYQ67BGp5CoLpIQlO2KCAPETs8TcDHacF26jXadGbvUteZzRTeos2fhID5+ucQ== -dompurify@^2.3.8: - version "2.3.8" - resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-2.3.8.tgz#224fe9ae57d7ebd9a1ae1ac18c1c1ca3f532226f" - integrity sha512-eVhaWoVibIzqdGYjwsBWodIQIaXFSB+cKDf4cfxLMsK0xiud6SE+/WCVx/Xw/UwQsa4cS3T2eITcdtmTg2UKcw== +dompurify@^2.5.4: + version "2.5.4" + resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-2.5.4.tgz#347e91070963b22db31c7c8d0ce9a0a2c3c08746" + integrity sha512-l5NNozANzaLPPe0XaAwvg3uZcHtDBnziX/HjsY1UcDj1MxTK8Dd0Kv096jyPK5HRzs/XM5IMj20dW8Fk+HnbUA== dot-case@^3.0.4: version "3.0.4" @@ -5459,9 +5459,9 @@ dotenv@^8.2.0: integrity sha512-IrPdXQsk2BbzvCBGBOTmmSH5SodmqZNt4ERAZDmW4CT+tL8VtvinqywuANaFu4bOMWki16nqf0e4oC0QIaDr/g== dset@^3.1.2: - version "3.1.3" - resolved "https://registry.yarnpkg.com/dset/-/dset-3.1.3.tgz#c194147f159841148e8e34ca41f638556d9542d2" - integrity sha512-20TuZZHCEZ2O71q9/+8BwKwZ0QtD9D8ObhrihJPr+vLLYlSuAU3/zL4cSlgbfeoGHTjCSJBa7NGcrF9/Bx/WJQ== + version "3.1.4" + resolved "https://registry.yarnpkg.com/dset/-/dset-3.1.4.tgz#f8eaf5f023f068a036d08cd07dc9ffb7d0065248" + integrity sha512-2QF/g9/zTaPDc3BjNcVTGoBbXBgYfMTTceLaYcFJ/W9kggFUkhxD/hMEeuLKbugyef9SqAx8cpgwlIP/jinUTA== duplexer@^0.1.2: version "0.1.2" @@ -8215,9 +8215,9 @@ path-root@^0.1.1: path-root-regex "^0.1.0" path-to-regexp@^1.7.0: - version "1.8.0" - resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-1.8.0.tgz#887b3ba9d84393e87a0a0b9f4cb756198b53548a" - integrity sha512-n43JRhlUKUAlibEJhPeir1ncUID16QnEjNpwzNdO3Lm4ywrBpBZ5oLD0I6br9evr1Y9JTqwRtAh7JLoOzAQdVA== + version "1.9.0" + resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-1.9.0.tgz#5dc0753acbf8521ca2e0f137b4578b917b10cf24" + integrity sha512-xIp7/apCFJuUHdDLWe8O1HIkb0kQrOMb/0u6FXQjemHn/ii5LrIzU6bdECnsiTF/GjZkMEKg1xdiZwNqDYlZ6g== dependencies: isarray "0.0.1" diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index c9448fa34c6870..6e3e5780506ac0 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -125,7 +125,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev - ${DATAHUB_LOCAL_GMS_ENV:-empty2.env} environment: &datahub-gms-dev-env <<: [*datahub-dev-telemetry-env, *datahub-gms-env] - ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-/etc/datahub/search/search_config.yaml} + ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-search_config.yaml} SKIP_ELASTICSEARCH_CHECK: false JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001' BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: false diff --git a/docs/cli.md b/docs/cli.md index 1f1e6dfa26be71..c109d02e0ad517 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -34,9 +34,9 @@ datahub init # authenticate your datahub CLI with your datahub instance ``` -If you run into an error, try checking the [_common setup issues_](../metadata-ingestion/developing.md#Common-setup-issues). +If you run into an error, try checking the [_common setup issues_](../metadata-ingestion/developing.md#common-setup-issues). -Other installation options such as installation from source and running the cli inside a container are available further below in the guide [here](#alternate-installation-options) +Other installation options such as installation from source and running the cli inside a container are available further below in the guide [here](#alternate-installation-options). ## Starter Commands @@ -672,7 +672,6 @@ Old Entities Migrated = {'urn:li:dataset:(urn:li:dataPlatform:hive,logging_event ### Using docker [![Docker Hub](https://img.shields.io/docker/pulls/acryldata/datahub-ingestion?style=plastic)](https://hub.docker.com/r/acryldata/datahub-ingestion) -[![datahub-ingestion docker](https://github.com/acryldata/datahub/workflows/datahub-ingestion%20docker/badge.svg)](https://github.com/acryldata/datahub/actions/workflows/docker-ingestion.yml) If you don't want to install locally, you can alternatively run metadata ingestion within a Docker container. We have prebuilt images available on [Docker hub](https://hub.docker.com/r/acryldata/datahub-ingestion). All plugins will be installed and enabled automatically. diff --git a/docs/how/restore-indices.md b/docs/how/restore-indices.md index 368b385ae5ea54..447e08c2dc6f07 100644 --- a/docs/how/restore-indices.md +++ b/docs/how/restore-indices.md @@ -21,6 +21,7 @@ datahub docker quickstart --restore-indices :::info Using the `datahub` CLI to restore the indices when using the quickstart images will also clear the search and graph indices before restoring. +::: See [this section](../quickstart.md#restore-datahub) for more information. @@ -34,6 +35,7 @@ If you are on a custom docker-compose deployment, run the following command (you :::info By default this command will not clear the search and graph indices before restoring, thous potentially leading to inconsistencies between the local database and the indices, in case aspects were previously deleted in the local database but were not removed from the correponding index. +::: If you need to clear the search and graph indices before restoring, add `-a clean` to the end of the command. Please take note that the search and graph services might not be fully functional during reindexing when the indices are cleared. @@ -67,6 +69,7 @@ Once the job completes, your indices will have been restored. :::info By default the restore indices job template will not clear the search and graph indices before restoring, thous potentially leading to inconsistencies between the local database and the indices, in case aspects were previously deleted in the local database but were not removed from the correponding index. +::: If you need to clear the search and graph indices before restoring, modify the `values.yaml` for your deployment and overwrite the default arguments of the restore indices job template to include the `-a clean` argument. Please take note that the search and graph services might not be fully functional during reindexing when the indices are cleared. diff --git a/docs/how/search.md b/docs/how/search.md index c809ab1efba12d..5c1ba266ee2ae5 100644 --- a/docs/how/search.md +++ b/docs/how/search.md @@ -85,8 +85,8 @@ These examples are non exhaustive and using Datasets as a reference. If you want to: - Exact match on term or phrase - - ```"datahub_schema"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22datahub_schema%22) - - ```datahub_schema``` [Sample results](https://demo.datahubproject.io/search?page=1&query=datahub_schema) + - ```"pet profile"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22pet%20profile%22) + - ```pet profile``` [Sample results](https://demo.datahubproject.io/search?page=1&query=pet%20profile) - Enclosing one or more terms with double quotes will enforce exact matching on these terms, preventing further tokenization. - Exclude terms diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java index 8777be57e1bd8f..e999471488dd73 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java @@ -14,6 +14,7 @@ import java.util.Optional; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.AllArgsConstructor; import lombok.Data; import lombok.EqualsAndHashCode; @@ -36,6 +37,10 @@ public class Edge { @EqualsAndHashCode.Include private Urn lifecycleOwner; // An entity through which the edge between source and destination is created @EqualsAndHashCode.Include private Urn via; + @EqualsAndHashCode.Exclude @Nullable private Boolean sourceStatus; + @EqualsAndHashCode.Exclude @Nullable private Boolean destinationStatus; + @EqualsAndHashCode.Exclude @Nullable private Boolean viaStatus; + @EqualsAndHashCode.Exclude @Nullable private Boolean lifecycleOwnerStatus; // For backwards compatibility public Edge( @@ -57,6 +62,38 @@ public Edge( updatedActor, properties, null, + null, + null, + null, + null, + null); + } + + public Edge( + Urn source, + Urn destination, + String relationshipType, + Long createdOn, + Urn createdActor, + Long updatedOn, + Urn updatedActor, + Map properties, + Urn lifecycleOwner, + Urn via) { + this( + source, + destination, + relationshipType, + createdOn, + createdActor, + updatedOn, + updatedActor, + properties, + lifecycleOwner, + via, + null, + null, + null, null); } @@ -91,6 +128,10 @@ public String toDocId(@Nonnull String idHashAlgo) { public static final String EDGE_FIELD_LIFECYCLE_OWNER = "lifecycleOwner"; public static final String EDGE_SOURCE_URN_FIELD = "source.urn"; public static final String EDGE_DESTINATION_URN_FIELD = "destination.urn"; + public static final String EDGE_SOURCE_STATUS = "source.removed"; + public static final String EDGE_DESTINATION_STATUS = "destination.removed"; + public static final String EDGE_FIELD_VIA_STATUS = "viaRemoved"; + public static final String EDGE_FIELD_LIFECYCLE_OWNER_STATUS = "lifecycleOwnerRemoved"; public static final List> KEY_SORTS = ImmutableList.of( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/EdgeUrnType.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/EdgeUrnType.java new file mode 100644 index 00000000000000..2fc2f4b588e8b2 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/EdgeUrnType.java @@ -0,0 +1,8 @@ +package com.linkedin.metadata.aspect.models.graph; + +public enum EdgeUrnType { + SOURCE, + DESTINATION, + VIA, + LIFECYCLE_OWNER +} diff --git a/gradle.properties b/gradle.properties index 4bdbd3d89286ce..e42e18dab677b1 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,10 +1,14 @@ -org.gradle.daemon=false org.gradle.configureondemand=true org.gradle.parallel=true org.gradle.caching=true +# Cycle daemons after 30m +org.gradle.daemon.idletimeout=1800000 + # Increase gradle JVM memory to 5GB to allow tests to run locally -org.gradle.jvmargs=-Xmx5120m +org.gradle.jvmargs=-Xmx5120m -XX:MaxMetaspaceSize=512m +org.gradle.workers.max=4 + # Increase retries to 5 (from default of 3) and increase interval from 125ms to 1s. # Based on this thread https://github.com/gradle/gradle/issues/4629, it's unclear # if we should be using systemProp or not. We're using both for now. diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authentication/Authentication.java b/metadata-auth/auth-api/src/main/java/com/datahub/authentication/Authentication.java index b53d868e6e8781..7583a4efd6425e 100644 --- a/metadata-auth/auth-api/src/main/java/com/datahub/authentication/Authentication.java +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authentication/Authentication.java @@ -4,8 +4,10 @@ import java.util.Map; import java.util.Objects; import javax.annotation.Nonnull; +import lombok.EqualsAndHashCode; /** Class representing an authenticated actor accessing DataHub. */ +@EqualsAndHashCode public class Authentication { private final Actor authenticatedActor; diff --git a/metadata-ingestion/developing.md b/metadata-ingestion/developing.md index 9293fc7a369dc7..19a18c5275a3b9 100644 --- a/metadata-ingestion/developing.md +++ b/metadata-ingestion/developing.md @@ -55,7 +55,6 @@ logger.debug("this is the sample debug line") #3. click on the `log` option ``` - > **P.S. if you are not able to see the log lines, then restart the `airflow scheduler` and rerun the DAG** ### (Optional) Set up your Python environment for developing on Dagster Plugin @@ -70,6 +69,7 @@ datahub version # should print "DataHub CLI version: unavailable (installed in ``` ### (Optional) Set up your Python environment for developing on Prefect Plugin + From the repository root: ```shell @@ -127,6 +127,18 @@ This sometimes happens if there's a version mismatch between the Kafka's C libra +
+ Conflict: acryl-datahub requires pydantic 1.10 + +The base `acryl-datahub` package supports both Pydantic 1.x and 2.x. However, some of our specific sources require Pydantic 1.x because of transitive dependencies. + +If you're primarily using `acryl-datahub` for the SDKs, you can install `acryl-datahub` and some extras, like `acryl-datahub[sql-parser]`, without getting conflicts related to Pydantic versioning. + +We recommend not installing full ingestion sources into your main environment (e.g. avoid having a dependency on `acryl-datahub[snowflake]` or other ingestion sources). +Instead, we recommend using UI-based ingestion or isolating the ingestion pipelines using [virtual environments](https://docs.python.org/3/library/venv.html). If you're using an orchestrator, they often have first-class support for virtual environments - here's an [example for Airflow](./schedule_docs/airflow.md). + +
+ ### Using Plugins in Development The syntax for installing plugins is slightly different in development. For example: @@ -286,4 +298,4 @@ tox -- --update-golden-files # Update golden files for a specific environment. tox -e py310-airflow26 -- --update-golden-files -``` \ No newline at end of file +``` diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 8b778048c34757..bf80172441405f 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -101,7 +101,7 @@ sqlglot_lib = { # Using an Acryl fork of sqlglot. # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:main?expand=1 - "acryl-sqlglot[rs]==25.20.2.dev5", + "acryl-sqlglot[rs]==25.20.2.dev6", } classification_lib = { diff --git a/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/__init__.py b/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py b/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py new file mode 100644 index 00000000000000..fc164c84793658 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py @@ -0,0 +1,133 @@ +import dataclasses +import json +from typing import Dict, Iterable, Optional + +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.metadata.schema_classes import ( + ChangeTypeClass, + DatasetPropertiesClass, + GenericAspectClass, + MetadataChangeProposalClass, + OperationClass, + TimeStampClass, +) +from datahub.specific.dataset import DatasetPatchBuilder +from datahub.utilities.urns.urn import guess_entity_type + + +@dataclasses.dataclass +class TimestampPair: + last_modified_dataset_props: Optional[ + TimeStampClass + ] # last_modified of datasetProperties aspect + last_updated_timestamp_dataset_props: Optional[ + int + ] # lastUpdatedTimestamp of the operation aspect + + +def try_aspect_from_metadata_change_proposal_class( + wu: MetadataWorkUnit, +) -> Optional[DatasetPropertiesClass]: + if ( + isinstance(wu.metadata, MetadataChangeProposalClass) + and wu.metadata.aspectName == "datasetProperties" + and wu.metadata.changeType == ChangeTypeClass.PATCH + and isinstance(wu.metadata.aspect, GenericAspectClass) + ): + patch_dataset_properties = json.loads(wu.metadata.aspect.value) + for operation in patch_dataset_properties: + if operation.get("path") == "/lastModified": + # Deserializing `lastModified` as the `auto_patch_last_modified` function relies on this property + # to decide if a patch aspect for the datasetProperties aspect should be generated + return DatasetPropertiesClass( + lastModified=TimeStampClass(time=operation["value"]["time"]) + ) + + return None + + +def auto_patch_last_modified( + stream: Iterable[MetadataWorkUnit], +) -> Iterable[MetadataWorkUnit]: + """ + Generate a patch request for datasetProperties aspect in-case + 1. `lastModified` of datasetProperties is not set + 2. And there are operation aspects + in this case set the `lastModified` of datasetProperties to max value of operation aspects `lastUpdatedTimestamp`. + + We need this functionality to support sort by `last modified` on UI. + """ + candidate_dataset_for_patch: Dict[str, TimestampPair] = {} + + for wu in stream: + if ( + guess_entity_type(wu.get_urn()) != "dataset" + ): # we are only processing datasets + yield wu + continue + + dataset_properties_aspect = wu.get_aspect_of_type( + DatasetPropertiesClass + ) or try_aspect_from_metadata_change_proposal_class(wu) + dataset_operation_aspect = wu.get_aspect_of_type(OperationClass) + + timestamp_pair = candidate_dataset_for_patch.get(wu.get_urn()) + + if timestamp_pair: + # Update the timestamp_pair + if dataset_properties_aspect and dataset_properties_aspect.lastModified: + timestamp_pair.last_modified_dataset_props = ( + dataset_properties_aspect.lastModified + ) + + if ( + dataset_operation_aspect + and dataset_operation_aspect.lastUpdatedTimestamp + ): + timestamp_pair.last_updated_timestamp_dataset_props = max( + timestamp_pair.last_updated_timestamp_dataset_props or 0, + dataset_operation_aspect.lastUpdatedTimestamp, + ) + + else: + # Create new TimestampPair + last_modified_dataset_props: Optional[TimeStampClass] = None + last_updated_timestamp_dataset_props: Optional[int] = None + + if dataset_properties_aspect: + last_modified_dataset_props = dataset_properties_aspect.lastModified + + if dataset_operation_aspect: + last_updated_timestamp_dataset_props = ( + dataset_operation_aspect.lastUpdatedTimestamp + ) + + candidate_dataset_for_patch[wu.get_urn()] = TimestampPair( + last_modified_dataset_props=last_modified_dataset_props, + last_updated_timestamp_dataset_props=last_updated_timestamp_dataset_props, + ) + + yield wu + + # Emit a patch datasetProperties aspect for dataset where last_modified is None + for entity_urn, timestamp_pair in candidate_dataset_for_patch.items(): + # Emit patch if last_modified is not set and last_updated_timestamp is set + if ( + timestamp_pair.last_modified_dataset_props is None + and timestamp_pair.last_updated_timestamp_dataset_props + ): + dataset_patch_builder = DatasetPatchBuilder(urn=entity_urn) + + dataset_patch_builder.set_last_modified( + timestamp=TimeStampClass( + time=timestamp_pair.last_updated_timestamp_dataset_props + ) + ) + + yield from [ + MetadataWorkUnit( + id=MetadataWorkUnit.generate_workunit_id(mcp), + mcp_raw=mcp, + ) + for mcp in dataset_patch_builder.build() + ] diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index 3dea3d36f41f17..85ae17ddf65291 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -28,6 +28,9 @@ from datahub.configuration.common import ConfigModel from datahub.configuration.source_common import PlatformInstanceConfigMixin from datahub.emitter.mcp_builder import mcps_from_mce +from datahub.ingestion.api.auto_work_units.auto_dataset_properties_aspect import ( + auto_patch_last_modified, +) from datahub.ingestion.api.closeable import Closeable from datahub.ingestion.api.common import PipelineContext, RecordEnvelope, WorkUnit from datahub.ingestion.api.report import Report @@ -443,6 +446,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: ), browse_path_processor, partial(auto_workunit_reporter, self.get_report()), + auto_patch_last_modified, ] @staticmethod diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index c783d9a35814b3..0fdb7bb537457d 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -527,6 +527,50 @@ def get_aspects_for_entity( return result + def get_entity_as_mcps( + self, entity_urn: str, aspects: Optional[List[str]] = None + ) -> List[MetadataChangeProposalWrapper]: + """Get all non-timeseries aspects for an entity. + + By formatting the entity's aspects as MCPWs, we can also include SystemMetadata. + + Warning: Do not use this method to determine if an entity exists! This method will always return + something, even if the entity doesn't actually exist in DataHub. + + Args: + entity_urn: The urn of the entity + aspects: Optional list of aspect names being requested (e.g. ["schemaMetadata", "datasetProperties"]) + + Returns: + A list of MCPWs. + """ + + response_json = self.get_entity_raw(entity_urn, aspects) + + # Now, we parse the response into proper aspect objects. + results: List[MetadataChangeProposalWrapper] = [] + for aspect_name, aspect_json in response_json.get("aspects", {}).items(): + aspect_type = ASPECT_NAME_MAP.get(aspect_name) + if aspect_type is None: + logger.warning(f"Ignoring unknown aspect type {aspect_name}") + continue + + post_json_obj = post_json_transform(aspect_json) + aspect_value = aspect_type.from_obj(post_json_obj["value"]) + + system_metadata_raw = post_json_obj["systemMetadata"] + system_metadata = SystemMetadataClass.from_obj(system_metadata_raw) + + mcpw = MetadataChangeProposalWrapper( + entityUrn=entity_urn, + aspect=aspect_value, + systemMetadata=system_metadata, + ) + + results.append(mcpw) + + return results + def get_entity_semityped( self, entity_urn: str, aspects: Optional[List[str]] = None ) -> AspectBag: @@ -545,19 +589,12 @@ def get_entity_semityped( not be present in the dictionary. The entity's key aspect will always be present. """ - response_json = self.get_entity_raw(entity_urn, aspects) + mcps = self.get_entity_as_mcps(entity_urn, aspects) - # Now, we parse the response into proper aspect objects. result: AspectBag = {} - for aspect_name, aspect_json in response_json.get("aspects", {}).items(): - aspect_type = ASPECT_NAME_MAP.get(aspect_name) - if aspect_type is None: - logger.warning(f"Ignoring unknown aspect type {aspect_name}") - continue - - post_json_obj = post_json_transform(aspect_json) - aspect_value = aspect_type.from_obj(post_json_obj["value"]) - result[aspect_name] = aspect_value # type: ignore + for mcp in mcps: + if mcp.aspect: + result[mcp.aspect.get_aspect_name()] = mcp.aspect # type: ignore return result diff --git a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py index 3d50ef5f254a0f..9059dcca3e2b85 100644 --- a/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py +++ b/metadata-ingestion/src/datahub/ingestion/sink/datahub_rest.py @@ -74,6 +74,7 @@ class DatahubRestSinkConfig(DatahubClientConfig): @dataclasses.dataclass class DataHubRestSinkReport(SinkReport): + mode: Optional[RestSinkMode] = None max_threads: Optional[int] = None gms_version: Optional[str] = None pending_requests: int = 0 @@ -126,6 +127,7 @@ def __post_init__(self) -> None: .get("acryldata/datahub", {}) .get("version", None) ) + self.report.mode = self.config.mode self.report.max_threads = self.config.max_threads logger.debug("Setting env variables to override config") logger.debug("Setting gms config") diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py index 1866599fa21c67..b39e05a8db4de1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py @@ -75,8 +75,37 @@ def set_metadata_endpoint(cls, values: dict) -> dict: def infer_metadata_endpoint(access_url: str) -> Optional[str]: - # See https://docs.getdbt.com/docs/cloud/about-cloud/access-regions-ip-addresses#api-access-urls - # and https://docs.getdbt.com/docs/dbt-cloud-apis/discovery-querying#discovery-api-endpoints + """Infer the dbt metadata endpoint from the access URL. + + See https://docs.getdbt.com/docs/cloud/about-cloud/access-regions-ip-addresses#api-access-urls + and https://docs.getdbt.com/docs/dbt-cloud-apis/discovery-querying#discovery-api-endpoints + for more information. + + Args: + access_url: The dbt Cloud access URL. This is the URL of the dbt Cloud UI. + + Returns: + The metadata endpoint, or None if it couldn't be inferred. + + Examples: + # Standard multi-tenant deployments. + >>> infer_metadata_endpoint("https://cloud.getdbt.com") + 'https://metadata.cloud.getdbt.com/graphql' + + >>> infer_metadata_endpoint("https://au.dbt.com") + 'https://metadata.au.dbt.com/graphql' + + >>> infer_metadata_endpoint("https://emea.dbt.com") + 'https://metadata.emea.dbt.com/graphql' + + # Cell-based deployment. + >>> infer_metadata_endpoint("https://prefix.us1.dbt.com") + 'https://prefix.metadata.us1.dbt.com/graphql' + + # Test with an "internal" URL. + >>> infer_metadata_endpoint("http://dbt.corp.internal") + 'http://metadata.dbt.corp.internal/graphql' + """ try: parsed_uri = urlparse(access_url) @@ -86,13 +115,18 @@ def infer_metadata_endpoint(access_url: str) -> Optional[str]: logger.debug(f"Unable to parse access URL {access_url}: {e}", exc_info=e) return None - if parsed_uri.hostname.endswith(".dbt.com"): + if parsed_uri.hostname.endswith(".getdbt.com") or parsed_uri.hostname in { + # Two special cases of multi-tenant deployments that use the dbt.com domain + # instead of getdbt.com. + "au.dbt.com", + "emea.dbt.com", + }: + return f"{parsed_uri.scheme}://metadata.{parsed_uri.netloc}/graphql" + elif parsed_uri.hostname.endswith(".dbt.com"): # For cell-based deployments. # prefix.region.dbt.com -> prefix.metadata.region.dbt.com hostname_parts = parsed_uri.hostname.split(".", maxsplit=1) return f"{parsed_uri.scheme}://{hostname_parts[0]}.metadata.{hostname_parts[1]}/graphql" - elif parsed_uri.hostname.endswith(".getdbt.com"): - return f"{parsed_uri.scheme}://metadata.{parsed_uri.netloc}/graphql" else: # The self-hosted variants also have the metadata. prefix. return f"{parsed_uri.scheme}://metadata.{parsed_uri.netloc}/graphql" @@ -403,10 +437,12 @@ def _parse_into_dbt_node(self, node: Dict) -> DBTNode: columns = [] if "columns" in node and node["columns"] is not None: # columns will be empty for ephemeral models - columns = [ - self._parse_into_dbt_column(column) - for column in sorted(node["columns"], key=lambda c: c["index"]) - ] + columns = list( + sorted( + [self._parse_into_dbt_column(column) for column in node["columns"]], + key=lambda c: c.index, + ) + ) test_info = None test_result = None @@ -494,7 +530,10 @@ def _parse_into_dbt_column( name=column["name"], comment=column.get("comment", ""), description=column["description"], - index=column["index"], + # For some reason, the index sometimes comes back as None from the dbt Cloud API. + # In that case, we just assume that the column is at the end of the table by + # assigning it a very large index. + index=column["index"] if column["index"] is not None else 10**6, data_type=column["type"], meta=column["meta"], tags=column["tags"], diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py index 1aad806e958f85..04de763370c951 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py @@ -7,6 +7,7 @@ import dateutil.parser import requests +from packaging import version from pydantic import BaseModel, Field, validator from datahub.configuration.git import GitReference @@ -41,14 +42,17 @@ class DBTCoreConfig(DBTCommonConfig): manifest_path: str = Field( - description="Path to dbt manifest JSON. See https://docs.getdbt.com/reference/artifacts/manifest-json Note this can be a local file or a URI." + description="Path to dbt manifest JSON. See https://docs.getdbt.com/reference/artifacts/manifest-json Note " + "this can be a local file or a URI." ) catalog_path: str = Field( - description="Path to dbt catalog JSON. See https://docs.getdbt.com/reference/artifacts/catalog-json Note this can be a local file or a URI." + description="Path to dbt catalog JSON. See https://docs.getdbt.com/reference/artifacts/catalog-json Note this " + "can be a local file or a URI." ) sources_path: Optional[str] = Field( default=None, - description="Path to dbt sources JSON. See https://docs.getdbt.com/reference/artifacts/sources-json. If not specified, last-modified fields will not be populated. Note this can be a local file or a URI.", + description="Path to dbt sources JSON. See https://docs.getdbt.com/reference/artifacts/sources-json. If not " + "specified, last-modified fields will not be populated. Note this can be a local file or a URI.", ) run_results_paths: List[str] = Field( default=[], @@ -569,16 +573,26 @@ def load_nodes(self) -> Tuple[List[DBTNode], Dict[str, Optional[str]]]: ) = self.loadManifestAndCatalog() # If catalog_version is between 1.7.0 and 1.7.2, report a warning. - if ( - catalog_version - and catalog_version.startswith("1.7.") - and catalog_version < "1.7.3" - ): - self.report.report_warning( - "dbt_catalog_version", - f"Due to a bug in dbt, dbt version {catalog_version} will have incomplete metadata on sources. " - "Please upgrade to dbt version 1.7.3 or later. " - "See https://github.com/dbt-labs/dbt-core/issues/9119 for details on the bug.", + try: + if ( + catalog_version + and catalog_version.startswith("1.7.") + and version.parse(catalog_version) < version.parse("1.7.3") + ): + self.report.report_warning( + title="Dbt Catalog Version", + message="Due to a bug in dbt version between 1.7.0 and 1.7.2, you will have incomplete metadata " + "source", + context=f"Due to a bug in dbt, dbt version {catalog_version} will have incomplete metadata on " + f"sources." + "Please upgrade to dbt version 1.7.3 or later. " + "See https://github.com/dbt-labs/dbt-core/issues/9119 for details on the bug.", + ) + except Exception as e: + self.report.info( + title="Dbt Catalog Version", + message="Failed to determine the catalog version", + exc=e, ) additional_custom_props = { diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index 73427d9084dd3c..56b8ce00a4d1f2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -15,6 +15,8 @@ import yaml from liquid import Template, Undefined from pydantic import Field, validator +from requests.adapters import HTTPAdapter, Retry +from requests.exceptions import ConnectionError from requests.models import HTTPBasicAuth, HTTPError from sqllineage.runner import LineageRunner from tenacity import retry_if_exception_type, stop_after_attempt, wait_exponential @@ -127,6 +129,10 @@ class ModeAPIConfig(ConfigModel): max_attempts: int = Field( default=5, description="Maximum number of attempts to retry before failing" ) + timeout: int = Field( + default=40, + description="Timout setting, how long to wait for the Mode rest api to send data before giving up", + ) class ModeConfig(StatefulIngestionConfigBase, DatasetLineageProviderConfigBase): @@ -299,7 +305,15 @@ def __init__(self, ctx: PipelineContext, config: ModeConfig): self.report = ModeSourceReport() self.ctx = ctx - self.session = requests.session() + self.session = requests.Session() + # Handling retry and backoff + retries = 3 + backoff_factor = 10 + retry = Retry(total=retries, backoff_factor=backoff_factor) + adapter = HTTPAdapter(max_retries=retry) + self.session.mount("http://", adapter) + self.session.mount("https://", adapter) + self.session.auth = HTTPBasicAuth( self.config.token, self.config.password.get_secret_value(), @@ -1469,15 +1483,16 @@ def _get_request_json(self, url: str) -> Dict: multiplier=self.config.api_options.retry_backoff_multiplier, max=self.config.api_options.max_retry_interval, ), - retry=retry_if_exception_type(HTTPError429), + retry=retry_if_exception_type((HTTPError429, ConnectionError)), stop=stop_after_attempt(self.config.api_options.max_attempts), ) @r.wraps def get_request(): try: - response = self.session.get(url) - response.raise_for_status() + response = self.session.get( + url, timeout=self.config.api_options.timeout + ) return response.json() except HTTPError as http_error: error_response = http_error.response diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py index 56e4c806eb0c30..71245353101f60 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py @@ -347,6 +347,9 @@ def detach_ctes( dialect = get_dialect(platform) statement = parse_statement(sql, dialect=dialect) + if not cte_mapping: + return statement + def replace_cte_refs(node: sqlglot.exp.Expression) -> sqlglot.exp.Expression: if ( isinstance(node, sqlglot.exp.Identifier) diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json index a7d46a2412b6cd..631b28c64f14df 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json +++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json @@ -498,5 +498,27 @@ "runId": "bigquery-2022_02_03-07_00_00", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1643871600000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json index a2d21b84f19e8c..1b79e8464c05f1 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json +++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json @@ -12588,5 +12588,555 @@ "runId": "bigquery-queries-2024_08_19-07_00_00", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.derived_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322481569 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_external_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322505477 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_view_on_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322464098 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_sharded_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322500148 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_wildcard_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322502689 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.destination_table_of_select_query,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322510656 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_another_project,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322478955 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_timetravelled_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322508214 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_base,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322460257 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view_and_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322472836 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322491425 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_tmp_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322457731 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_snapshot_on_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322471500 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.materialized_view_from_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322476091 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322484293 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322465459 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_ingestion_time_partition,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322495660 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322467835 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322462741 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_nested_fields,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322498418 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.base_table_2,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322477705 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_integer_range_partition,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322497080 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322452660 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.partition_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322448864 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.usage_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322445357 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json index f7ee62201a8630..95671b4f5a09c4 100644 --- a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json +++ b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json @@ -400,5 +400,27 @@ "runId": "delta-lake-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-test-bucket/delta_tables/sales,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655664815399 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1672531200000, + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json index 24344d6a266648..0e88106d79175b 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json @@ -1838,5 +1838,93 @@ "runId": "allow_table.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831477768 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655664815399 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/my_table_no_name,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831649788 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/level1/my_table_inner,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831866541 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json index 717481b2534292..d04cc789711904 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json @@ -1785,5 +1785,93 @@ "runId": "inner_table.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831477768 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655664815399 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_no_name,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831649788 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/level1/my_table_inner,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831866541 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json index f446c2deb6a847..0b1a8140cd6495 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json @@ -368,5 +368,27 @@ "runId": "relative_path.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,delta_tables/my_table_basic,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831477768 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json index 100f93fdaf5d36..cba70b2f54b188 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json @@ -724,5 +724,27 @@ "runId": "single_table.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831477768 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json index cb40e152f67cca..28e517cc8c319d 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json @@ -1841,5 +1841,137 @@ "runId": "tables_with_nested_datatypes.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_nested_struct_1,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1709535906725 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_nested_struct,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1709110542636 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_string_and_array,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1708329078869 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_string_and_array_of_struct,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1708329897384 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_nested_struct_2,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1709536366367 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_string_and_nested_array_of_numbers,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1708330178404 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mode/mode_mces_golden.json b/metadata-ingestion/tests/integration/mode/mode_mces_golden.json index a6a685672bda00..ed00dc5734680d 100644 --- a/metadata-ingestion/tests/integration/mode/mode_mces_golden.json +++ b/metadata-ingestion/tests/integration/mode/mode_mces_golden.json @@ -806,9 +806,9 @@ "json": { "fields": [ { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),amount)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),payment_date)", "schemaField": { - "fieldPath": "amount", + "fieldPath": "payment_date", "nullable": false, "type": { "type": { @@ -828,9 +828,9 @@ } }, { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),payment_date)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),amount)", "schemaField": { - "fieldPath": "payment_date", + "fieldPath": "amount", "nullable": false, "type": { "type": { @@ -1075,5 +1075,27 @@ "runId": "mode-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1639177973273 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "mode-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mode/test_mode.py b/metadata-ingestion/tests/integration/mode/test_mode.py index 7ea6597460de20..ce7533d5611e49 100644 --- a/metadata-ingestion/tests/integration/mode/test_mode.py +++ b/metadata-ingestion/tests/integration/mode/test_mode.py @@ -45,8 +45,12 @@ def __init__(self, error_list, status_code): def json(self): return self.json_data - def get(self, url): + def mount(self, prefix, adaptor): + return self + + def get(self, url, timeout=40): self.url = url + self.timeout = timeout response_json_path = f"{test_resources_dir}/setup/{JSON_RESPONSE_MAP.get(url)}" with open(response_json_path) as file: data = json.loads(file.read()) @@ -74,7 +78,7 @@ def mocked_requests_failure(*args, **kwargs): @freeze_time(FROZEN_TIME) def test_mode_ingest_success(pytestconfig, tmp_path): with patch( - "datahub.ingestion.source.mode.requests.session", + "datahub.ingestion.source.mode.requests.Session", side_effect=mocked_requests_sucess, ): pipeline = Pipeline.create( @@ -111,7 +115,7 @@ def test_mode_ingest_success(pytestconfig, tmp_path): @freeze_time(FROZEN_TIME) def test_mode_ingest_failure(pytestconfig, tmp_path): with patch( - "datahub.ingestion.source.mode.requests.session", + "datahub.ingestion.source.mode.requests.Session", side_effect=mocked_requests_failure, ): global test_resources_dir diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json index 77db69d197c787..7c0ecd8a07ddf4 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json @@ -146,7 +146,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445129021 + "lastUpdatedTimestamp": 1586808250000 } }, "systemMetadata": { @@ -1370,5 +1370,27 @@ "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808250000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json index b35fb24d43bf38..fb1ab3a869648d 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json @@ -48,7 +48,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -60,7 +60,7 @@ "isPartOfKey": false }, { - "fieldPath": "height", + "fieldPath": "weight", "nullable": false, "type": { "type": { @@ -72,26 +72,26 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "height", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "color", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false } @@ -117,7 +117,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445089021 + "lastUpdatedTimestamp": 1586808220000 } }, "systemMetadata": { @@ -1307,7 +1307,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -1319,14 +1319,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -1343,7 +1343,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -1355,14 +1355,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -1388,7 +1388,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445119021 + "lastUpdatedTimestamp": 1586808240000 } }, "systemMetadata": { @@ -1798,5 +1798,49 @@ "runId": "folder_no_partition.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808220000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808240000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json index 62ba688990e2be..5c330a1953549e 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json @@ -48,7 +48,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -60,14 +60,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -84,7 +84,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -96,14 +96,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445119021 + "lastUpdatedTimestamp": 1586808240000 } }, "systemMetadata": { @@ -1458,5 +1458,27 @@ "runId": "folder_no_partition_exclude.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808240000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json index 346c2e9bcd83ac..a14cfdfb6f635f 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json @@ -48,7 +48,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -60,7 +60,7 @@ "isPartOfKey": false }, { - "fieldPath": "height", + "fieldPath": "weight", "nullable": false, "type": { "type": { @@ -72,26 +72,26 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "height", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "color", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false } @@ -117,7 +117,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445089021 + "lastUpdatedTimestamp": 1586808220000 } }, "systemMetadata": { @@ -1307,7 +1307,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -1319,14 +1319,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -1343,7 +1343,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -1355,14 +1355,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -1388,7 +1388,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445119021 + "lastUpdatedTimestamp": 1586808240000 } }, "systemMetadata": { @@ -1798,5 +1798,49 @@ "runId": "folder_no_partition_filename.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808220000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808240000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json index 6bff1bf1b14687..e695804f24f5dc 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json @@ -48,7 +48,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -60,14 +60,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -84,7 +84,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -96,14 +96,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445119021 + "lastUpdatedTimestamp": 1586808240000 } }, "systemMetadata": { @@ -1458,5 +1458,27 @@ "runId": "folder_no_partition_glob.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808240000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json index 6668e4e6a26c09..4b78aae2a36425 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json @@ -48,31 +48,43 @@ }, "fields": [ { - "fieldPath": "effect_changes", + "fieldPath": "effect_changes.effect_entries.effect", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries", + "fieldPath": "effect_changes.effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.effect", + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", "nullable": false, "type": { "type": { @@ -96,19 +108,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.is_native", + "fieldPath": "effect_changes.effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.name", + "fieldPath": "effect_changes.version_group.name", "nullable": false, "type": { "type": { @@ -120,7 +132,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.url", + "fieldPath": "effect_changes.version_group.url", "nullable": false, "type": { "type": { @@ -144,19 +156,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.name", + "fieldPath": "effect_changes", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.url", + "fieldPath": "effect_entries.effect", "nullable": false, "type": { "type": { @@ -168,19 +180,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries", + "fieldPath": "effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.effect", + "fieldPath": "effect_entries.language.url", "nullable": false, "type": { "type": { @@ -204,7 +216,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.name", + "fieldPath": "effect_entries.short_effect", "nullable": false, "type": { "type": { @@ -216,19 +228,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.url", + "fieldPath": "effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.short_effect", + "fieldPath": "flavor_text_entries.flavor_text", "nullable": false, "type": { "type": { @@ -240,19 +252,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries", + "fieldPath": "flavor_text_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.flavor_text", + "fieldPath": "flavor_text_entries.language.url", "nullable": false, "type": { "type": { @@ -276,7 +288,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.name", + "fieldPath": "flavor_text_entries.version_group.name", "nullable": false, "type": { "type": { @@ -288,7 +300,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.url", + "fieldPath": "flavor_text_entries.version_group.url", "nullable": false, "type": { "type": { @@ -312,19 +324,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.name", + "fieldPath": "flavor_text_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.url", + "fieldPath": "generation.name", "nullable": false, "type": { "type": { @@ -336,19 +348,7 @@ "isPartOfKey": false }, { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", + "fieldPath": "generation.url", "nullable": false, "type": { "type": { @@ -360,14 +360,14 @@ "isPartOfKey": false }, { - "fieldPath": "generation.url", + "fieldPath": "generation", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -408,31 +408,19 @@ "isPartOfKey": false }, { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", + "fieldPath": "names.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "names.language.name", + "fieldPath": "names.language.url", "nullable": false, "type": { "type": { @@ -444,14 +432,14 @@ "isPartOfKey": false }, { - "fieldPath": "names.language.url", + "fieldPath": "names.language", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -468,7 +456,7 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon", + "fieldPath": "names", "nullable": false, "type": { "type": { @@ -492,19 +480,19 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon", + "fieldPath": "pokemon.pokemon.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.name", + "fieldPath": "pokemon.pokemon.url", "nullable": false, "type": { "type": { @@ -516,14 +504,14 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.url", + "fieldPath": "pokemon.pokemon", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -538,6 +526,18 @@ "nativeDataType": "int", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false } ] } @@ -561,7 +561,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445259021 + "lastUpdatedTimestamp": 1586808380000 } }, "systemMetadata": { @@ -1711,5 +1711,27 @@ "runId": "folder_partition_basic.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808380000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json index 7f8bcfec6d3140..201828842b84b9 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json @@ -48,31 +48,43 @@ }, "fields": [ { - "fieldPath": "effect_changes", + "fieldPath": "effect_changes.effect_entries.effect", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries", + "fieldPath": "effect_changes.effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.effect", + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", "nullable": false, "type": { "type": { @@ -96,19 +108,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.is_native", + "fieldPath": "effect_changes.effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.name", + "fieldPath": "effect_changes.version_group.name", "nullable": false, "type": { "type": { @@ -120,7 +132,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.url", + "fieldPath": "effect_changes.version_group.url", "nullable": false, "type": { "type": { @@ -144,19 +156,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.name", + "fieldPath": "effect_changes", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.url", + "fieldPath": "effect_entries.effect", "nullable": false, "type": { "type": { @@ -168,19 +180,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries", + "fieldPath": "effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.effect", + "fieldPath": "effect_entries.language.url", "nullable": false, "type": { "type": { @@ -204,7 +216,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.name", + "fieldPath": "effect_entries.short_effect", "nullable": false, "type": { "type": { @@ -216,19 +228,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.url", + "fieldPath": "effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.short_effect", + "fieldPath": "flavor_text_entries.flavor_text", "nullable": false, "type": { "type": { @@ -240,19 +252,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries", + "fieldPath": "flavor_text_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.flavor_text", + "fieldPath": "flavor_text_entries.language.url", "nullable": false, "type": { "type": { @@ -276,7 +288,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.name", + "fieldPath": "flavor_text_entries.version_group.name", "nullable": false, "type": { "type": { @@ -288,7 +300,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.url", + "fieldPath": "flavor_text_entries.version_group.url", "nullable": false, "type": { "type": { @@ -312,19 +324,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.name", + "fieldPath": "flavor_text_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.url", + "fieldPath": "generation.name", "nullable": false, "type": { "type": { @@ -336,19 +348,7 @@ "isPartOfKey": false }, { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", + "fieldPath": "generation.url", "nullable": false, "type": { "type": { @@ -360,14 +360,14 @@ "isPartOfKey": false }, { - "fieldPath": "generation.url", + "fieldPath": "generation", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -408,31 +408,19 @@ "isPartOfKey": false }, { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", + "fieldPath": "names.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "names.language.name", + "fieldPath": "names.language.url", "nullable": false, "type": { "type": { @@ -444,14 +432,14 @@ "isPartOfKey": false }, { - "fieldPath": "names.language.url", + "fieldPath": "names.language", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -468,7 +456,7 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon", + "fieldPath": "names", "nullable": false, "type": { "type": { @@ -492,19 +480,19 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon", + "fieldPath": "pokemon.pokemon.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.name", + "fieldPath": "pokemon.pokemon.url", "nullable": false, "type": { "type": { @@ -516,14 +504,14 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.url", + "fieldPath": "pokemon.pokemon", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -538,6 +526,18 @@ "nativeDataType": "int", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false } ] } @@ -561,7 +561,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445259021 + "lastUpdatedTimestamp": 1586808380000 } }, "systemMetadata": { @@ -1711,5 +1711,27 @@ "runId": "folder_partition_keyval.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808380000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema.json index 35efe529948378..52aead01fb2fc5 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema.json @@ -48,31 +48,43 @@ }, "fields": [ { - "fieldPath": "effect_changes", + "fieldPath": "effect_changes.effect_entries.effect", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries", + "fieldPath": "effect_changes.effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.effect", + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", "nullable": false, "type": { "type": { @@ -96,19 +108,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.is_native", + "fieldPath": "effect_changes.effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.name", + "fieldPath": "effect_changes.version_group.name", "nullable": false, "type": { "type": { @@ -120,7 +132,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.url", + "fieldPath": "effect_changes.version_group.url", "nullable": false, "type": { "type": { @@ -144,19 +156,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.name", + "fieldPath": "effect_changes", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.url", + "fieldPath": "effect_entries.effect", "nullable": false, "type": { "type": { @@ -168,19 +180,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries", + "fieldPath": "effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.effect", + "fieldPath": "effect_entries.language.url", "nullable": false, "type": { "type": { @@ -204,7 +216,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.name", + "fieldPath": "effect_entries.short_effect", "nullable": false, "type": { "type": { @@ -216,19 +228,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.url", + "fieldPath": "effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.short_effect", + "fieldPath": "flavor_text_entries.flavor_text", "nullable": false, "type": { "type": { @@ -240,19 +252,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries", + "fieldPath": "flavor_text_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.flavor_text", + "fieldPath": "flavor_text_entries.language.url", "nullable": false, "type": { "type": { @@ -276,7 +288,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.name", + "fieldPath": "flavor_text_entries.version_group.name", "nullable": false, "type": { "type": { @@ -288,7 +300,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.url", + "fieldPath": "flavor_text_entries.version_group.url", "nullable": false, "type": { "type": { @@ -312,19 +324,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.name", + "fieldPath": "flavor_text_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.url", + "fieldPath": "generation.name", "nullable": false, "type": { "type": { @@ -336,19 +348,7 @@ "isPartOfKey": false }, { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", + "fieldPath": "generation.url", "nullable": false, "type": { "type": { @@ -360,14 +360,14 @@ "isPartOfKey": false }, { - "fieldPath": "generation.url", + "fieldPath": "generation", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -408,31 +408,19 @@ "isPartOfKey": false }, { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", + "fieldPath": "names.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "names.language.name", + "fieldPath": "names.language.url", "nullable": false, "type": { "type": { @@ -444,14 +432,14 @@ "isPartOfKey": false }, { - "fieldPath": "names.language.url", + "fieldPath": "names.language", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -468,7 +456,7 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon", + "fieldPath": "names", "nullable": false, "type": { "type": { @@ -492,19 +480,19 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon", + "fieldPath": "pokemon.pokemon.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.name", + "fieldPath": "pokemon.pokemon.url", "nullable": false, "type": { "type": { @@ -516,14 +504,14 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.url", + "fieldPath": "pokemon.pokemon", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -538,6 +526,18 @@ "nativeDataType": "int", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false } ] } @@ -561,7 +561,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445259021 + "lastUpdatedTimestamp": 1586808380000 } }, "systemMetadata": { @@ -1711,5 +1711,27 @@ "runId": "folder_partition_update_schema.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808380000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema_with_partition_autodetect.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema_with_partition_autodetect.json deleted file mode 100644 index adb3686309e6c4..00000000000000 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema_with_partition_autodetect.json +++ /dev/null @@ -1,2572 +0,0 @@ -[ -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv", - "number_of_files": "3", - "size_in_bytes": "3539" - }, - "name": "folder_aaa.food_csv", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_csv", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833420000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests" - }, - "name": "tests" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration" - }, - "name": "integration" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3" - }, - "name": "s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data" - }, - "name": "test_data" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system" - }, - "name": "local_system" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a" - }, - "name": "folder_a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa" - }, - "name": "folder_aa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:98a716614da5246426edd48260406364" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 4, - "columnCount": 4, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "apple", - "frequency": 1 - }, - { - "value": "cookie", - "frequency": 1 - }, - { - "value": "lasagna", - "frequency": 1 - }, - { - "value": "pasta", - "frequency": 1 - } - ], - "sampleValues": [ - "apple", - "cookie", - "lasagna", - "pasta" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "23", - "frequency": 1 - }, - { - "value": "49", - "frequency": 1 - }, - { - "value": "50", - "frequency": 1 - }, - { - "value": "72", - "frequency": 1 - } - ], - "sampleValues": [ - "23", - "49", - "50", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 1, - "uniqueProportion": 0.25, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 4 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 3, - "uniqueProportion": 0.75, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "brown", - "frequency": 1 - }, - { - "value": "red", - "frequency": 2 - }, - { - "value": "yellow", - "frequency": 1 - } - ], - "sampleValues": [ - "brown", - "red", - "red", - "yellow" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet", - "number_of_files": "2", - "size_in_bytes": "8412" - }, - "name": "folder_aaa.food_parquet", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_parquet", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "healthy", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833440000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 70, - "columnCount": 5, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 9, - "uniqueProportion": 0.13043478260869565, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "distinctValueFrequencies": [ - { - "value": "NullValue", - "frequency": 1 - }, - { - "value": "apple", - "frequency": 7 - }, - { - "value": "chicken", - "frequency": 7 - }, - { - "value": "cookie", - "frequency": 6 - }, - { - "value": "hamburger", - "frequency": 7 - }, - { - "value": "lasagna", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 7 - }, - { - "value": "pasta", - "frequency": 7 - }, - { - "value": "spinach", - "frequency": 7 - }, - { - "value": "sushi", - "frequency": 7 - }, - { - "value": "water", - "frequency": 7 - } - ], - "sampleValues": [ - "apple", - "apple", - "apple", - "chicken", - "cookie", - "cookie", - "cookie", - "lasagna", - "lasagna", - "orange", - "orange", - "pasta", - "pasta", - "pasta", - "pasta", - "spinach", - "spinach", - "spinach", - "water", - "water" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 10, - "uniqueProportion": 0.14285714285714285, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "10", - "frequency": 7 - }, - { - "value": "2", - "frequency": 7 - }, - { - "value": "23", - "frequency": 7 - }, - { - "value": "32", - "frequency": 7 - }, - { - "value": "36", - "frequency": 7 - }, - { - "value": "43", - "frequency": 7 - }, - { - "value": "49", - "frequency": 7 - }, - { - "value": "50", - "frequency": 7 - }, - { - "value": "53", - "frequency": 7 - }, - { - "value": "72", - "frequency": 7 - } - ], - "sampleValues": [ - "10", - "10", - "10", - "23", - "23", - "23", - "32", - "32", - "36", - "43", - "43", - "49", - "49", - "50", - "50", - "50", - "72", - "72", - "72", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 4, - "uniqueProportion": 0.05714285714285714, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 24 - }, - { - "value": "5", - "frequency": 15 - }, - { - "value": "6", - "frequency": 23 - }, - { - "value": "7", - "frequency": 8 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4", - "4", - "4", - "4", - "5", - "5", - "5", - "5", - "5", - "6", - "6", - "6", - "6", - "6", - "6", - "7", - "7" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 7, - "uniqueProportion": 0.1, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "blue", - "frequency": 7 - }, - { - "value": "brown", - "frequency": 14 - }, - { - "value": "green", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 14 - }, - { - "value": "red", - "frequency": 14 - }, - { - "value": "white", - "frequency": 7 - }, - { - "value": "yellow", - "frequency": 7 - } - ], - "sampleValues": [ - "blue", - "blue", - "brown", - "brown", - "brown", - "green", - "green", - "green", - "orange", - "orange", - "red", - "red", - "red", - "red", - "red", - "white", - "yellow", - "yellow", - "yellow", - "yellow" - ] - }, - { - "fieldPath": "healthy", - "uniqueCount": 2, - "uniqueProportion": 0.028985507246376812, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "sampleValues": [ - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "None", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", - "number_of_files": "13", - "size_in_bytes": "188600" - }, - "name": "folder_aaa.pokemon_abilities_json", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.pokemon_abilities_json", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "effect_changes", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.is_native", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.short_effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.flavor_text", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "id", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "is_main_series", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.is_hidden", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.slot", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833590000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 1, - "columnCount": 9, - "fieldProfiles": [ - { - "fieldPath": "effect_changes", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect_entries=[Row(effect='Hat im Kampf keinen Effekt.', language=Row(is_native='no', name='de', url='https://pokeapi.co/api/v2/language/6/')), Row(effect='Has no effect in battle.', language=Row(is_native='yes', name='en', url='https://pokeapi.co/api/v2/language/9/'))], version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/'))]" - ] - }, - { - "fieldPath": "effect_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect='Attacken die Schaden verursachen haben mit jedem Treffer eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen, wenn die Attacke dies nicht bereits als Nebeneffekt hat.\\n\\nDer Effekt stapelt nicht mit dem von getragenen Items.\\n\\nAu\u00dferhalb vom Kampf: Wenn ein Pok\u00e9mon mit dieser F\u00e4higkeit an erster Stelle im Team steht, tauchen wilde Pok\u00e9mon nur halb so oft auf.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), short_effect='Mit jedem Treffer besteht eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen.'), Row(effect=\"This Pok\u00e9mon's damaging moves have a 10% chance to make the target flinch with each hit if they do not already cause flinching as a secondary effect.\\n\\nThis ability does not stack with a held item.\\n\\nOverworld: The wild encounter rate is halved while this Pok\u00e9mon is first in the party.\", language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), short_effect='Has a 10% chance of making target Pok\u00e9mon flinch with each hit.')]" - ] - }, - { - "fieldPath": "flavor_text_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ruby-sapphire', url='https://pokeapi.co/api/v2/version-group/5/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='emerald', url='https://pokeapi.co/api/v2/version-group/6/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='firered-leafgreen', url='https://pokeapi.co/api/v2/version-group/7/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='diamond-pearl', url='https://pokeapi.co/api/v2/version-group/8/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='platinum', url='https://pokeapi.co/api/v2/version-group/9/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='heartgold-soulsilver', url='https://pokeapi.co/api/v2/version-group/10/')), Row(flavor_text='La puanteur peut\\neffrayer l\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-2-white-2', url='https://pokeapi.co/api/v2/version-group/14/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/'))]" - ] - }, - { - "fieldPath": "generation", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "Row(name='generation-iii', url='https://pokeapi.co/api/v2/generation/3/')" - ] - }, - { - "fieldPath": "id", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "1", - "frequency": 1 - } - ], - "sampleValues": [ - "1" - ] - }, - { - "fieldPath": "is_main_series", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "True" - ] - }, - { - "fieldPath": "name", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "stench", - "frequency": 1 - } - ], - "sampleValues": [ - "stench" - ] - }, - { - "fieldPath": "names", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), name='\uc545\ucde8'), Row(language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), name='\u60e1\u81ed'), Row(language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), name='Puanteur'), Row(language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), name='Duftnote'), Row(language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), name='Hedor'), Row(language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), name='Tanfo'), Row(language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), name='Stench'), Row(language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), name='\u6076\u81ed')]" - ] - }, - { - "fieldPath": "pokemon", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(is_hidden=True, pokemon=Row(name='gloom', url='https://pokeapi.co/api/v2/pokemon/44/'), slot=3), Row(is_hidden=False, pokemon=Row(name='grimer', url='https://pokeapi.co/api/v2/pokemon/88/'), slot=1), Row(is_hidden=False, pokemon=Row(name='muk', url='https://pokeapi.co/api/v2/pokemon/89/'), slot=1), Row(is_hidden=False, pokemon=Row(name='stunky', url='https://pokeapi.co/api/v2/pokemon/434/'), slot=1), Row(is_hidden=False, pokemon=Row(name='skuntank', url='https://pokeapi.co/api/v2/pokemon/435/'), slot=1), Row(is_hidden=False, pokemon=Row(name='trubbish', url='https://pokeapi.co/api/v2/pokemon/568/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor', url='https://pokeapi.co/api/v2/pokemon/569/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor-gmax', url='https://pokeapi.co/api/v2/pokemon/10198/'), slot=1)]" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -} -] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json deleted file mode 100644 index 80f584788fdb26..00000000000000 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json +++ /dev/null @@ -1,2572 +0,0 @@ -[ -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv", - "number_of_files": "3", - "size_in_bytes": "3539" - }, - "name": "folder_aaa.food_csv", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_csv", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833420000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests" - }, - "name": "tests" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration" - }, - "name": "integration" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3" - }, - "name": "s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data" - }, - "name": "test_data" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system" - }, - "name": "local_system" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a" - }, - "name": "folder_a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa" - }, - "name": "folder_aa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:98a716614da5246426edd48260406364" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 4, - "columnCount": 4, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "apple", - "frequency": 1 - }, - { - "value": "cookie", - "frequency": 1 - }, - { - "value": "lasagna", - "frequency": 1 - }, - { - "value": "pasta", - "frequency": 1 - } - ], - "sampleValues": [ - "apple", - "cookie", - "lasagna", - "pasta" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "23", - "frequency": 1 - }, - { - "value": "49", - "frequency": 1 - }, - { - "value": "50", - "frequency": 1 - }, - { - "value": "72", - "frequency": 1 - } - ], - "sampleValues": [ - "23", - "49", - "50", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 1, - "uniqueProportion": 0.25, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 4 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 3, - "uniqueProportion": 0.75, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "brown", - "frequency": 1 - }, - { - "value": "red", - "frequency": 2 - }, - { - "value": "yellow", - "frequency": 1 - } - ], - "sampleValues": [ - "brown", - "red", - "red", - "yellow" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet", - "number_of_files": "2", - "size_in_bytes": "8412" - }, - "name": "folder_aaa.food_parquet", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_parquet", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "healthy", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833440000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 70, - "columnCount": 5, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 9, - "uniqueProportion": 0.13043478260869565, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "distinctValueFrequencies": [ - { - "value": "NullValue", - "frequency": 1 - }, - { - "value": "apple", - "frequency": 7 - }, - { - "value": "chicken", - "frequency": 7 - }, - { - "value": "cookie", - "frequency": 6 - }, - { - "value": "hamburger", - "frequency": 7 - }, - { - "value": "lasagna", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 7 - }, - { - "value": "pasta", - "frequency": 7 - }, - { - "value": "spinach", - "frequency": 7 - }, - { - "value": "sushi", - "frequency": 7 - }, - { - "value": "water", - "frequency": 7 - } - ], - "sampleValues": [ - "apple", - "apple", - "apple", - "chicken", - "cookie", - "cookie", - "cookie", - "lasagna", - "lasagna", - "orange", - "orange", - "pasta", - "pasta", - "pasta", - "pasta", - "spinach", - "spinach", - "spinach", - "water", - "water" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 10, - "uniqueProportion": 0.14285714285714285, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "10", - "frequency": 7 - }, - { - "value": "2", - "frequency": 7 - }, - { - "value": "23", - "frequency": 7 - }, - { - "value": "32", - "frequency": 7 - }, - { - "value": "36", - "frequency": 7 - }, - { - "value": "43", - "frequency": 7 - }, - { - "value": "49", - "frequency": 7 - }, - { - "value": "50", - "frequency": 7 - }, - { - "value": "53", - "frequency": 7 - }, - { - "value": "72", - "frequency": 7 - } - ], - "sampleValues": [ - "10", - "10", - "10", - "23", - "23", - "23", - "32", - "32", - "36", - "43", - "43", - "49", - "49", - "50", - "50", - "50", - "72", - "72", - "72", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 4, - "uniqueProportion": 0.05714285714285714, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 24 - }, - { - "value": "5", - "frequency": 15 - }, - { - "value": "6", - "frequency": 23 - }, - { - "value": "7", - "frequency": 8 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4", - "4", - "4", - "4", - "5", - "5", - "5", - "5", - "5", - "6", - "6", - "6", - "6", - "6", - "6", - "7", - "7" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 7, - "uniqueProportion": 0.1, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "blue", - "frequency": 7 - }, - { - "value": "brown", - "frequency": 14 - }, - { - "value": "green", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 14 - }, - { - "value": "red", - "frequency": 14 - }, - { - "value": "white", - "frequency": 7 - }, - { - "value": "yellow", - "frequency": 7 - } - ], - "sampleValues": [ - "blue", - "blue", - "brown", - "brown", - "brown", - "green", - "green", - "green", - "orange", - "orange", - "red", - "red", - "red", - "red", - "red", - "white", - "yellow", - "yellow", - "yellow", - "yellow" - ] - }, - { - "fieldPath": "healthy", - "uniqueCount": 2, - "uniqueProportion": 0.028985507246376812, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "sampleValues": [ - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "None", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", - "number_of_files": "13", - "size_in_bytes": "188600" - }, - "name": "folder_aaa.pokemon_abilities_json", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.pokemon_abilities_json", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "effect_changes", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.is_native", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.short_effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.flavor_text", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "id", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "is_main_series", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.is_hidden", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.slot", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833590000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 1, - "columnCount": 9, - "fieldProfiles": [ - { - "fieldPath": "effect_changes", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect_entries=[Row(effect='Hat im Kampf keinen Effekt.', language=Row(is_native='no', name='de', url='https://pokeapi.co/api/v2/language/6/')), Row(effect='Has no effect in battle.', language=Row(is_native='yes', name='en', url='https://pokeapi.co/api/v2/language/9/'))], version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/'))]" - ] - }, - { - "fieldPath": "effect_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect='Attacken die Schaden verursachen haben mit jedem Treffer eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen, wenn die Attacke dies nicht bereits als Nebeneffekt hat.\\n\\nDer Effekt stapelt nicht mit dem von getragenen Items.\\n\\nAu\u00dferhalb vom Kampf: Wenn ein Pok\u00e9mon mit dieser F\u00e4higkeit an erster Stelle im Team steht, tauchen wilde Pok\u00e9mon nur halb so oft auf.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), short_effect='Mit jedem Treffer besteht eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen.'), Row(effect=\"This Pok\u00e9mon's damaging moves have a 10% chance to make the target flinch with each hit if they do not already cause flinching as a secondary effect.\\n\\nThis ability does not stack with a held item.\\n\\nOverworld: The wild encounter rate is halved while this Pok\u00e9mon is first in the party.\", language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), short_effect='Has a 10% chance of making target Pok\u00e9mon flinch with each hit.')]" - ] - }, - { - "fieldPath": "flavor_text_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ruby-sapphire', url='https://pokeapi.co/api/v2/version-group/5/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='emerald', url='https://pokeapi.co/api/v2/version-group/6/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='firered-leafgreen', url='https://pokeapi.co/api/v2/version-group/7/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='diamond-pearl', url='https://pokeapi.co/api/v2/version-group/8/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='platinum', url='https://pokeapi.co/api/v2/version-group/9/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='heartgold-soulsilver', url='https://pokeapi.co/api/v2/version-group/10/')), Row(flavor_text='La puanteur peut\\neffrayer l\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-2-white-2', url='https://pokeapi.co/api/v2/version-group/14/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/'))]" - ] - }, - { - "fieldPath": "generation", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "Row(name='generation-iii', url='https://pokeapi.co/api/v2/generation/3/')" - ] - }, - { - "fieldPath": "id", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "1", - "frequency": 1 - } - ], - "sampleValues": [ - "1" - ] - }, - { - "fieldPath": "is_main_series", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "True" - ] - }, - { - "fieldPath": "name", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "stench", - "frequency": 1 - } - ], - "sampleValues": [ - "stench" - ] - }, - { - "fieldPath": "names", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), name='\uc545\ucde8'), Row(language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), name='\u60e1\u81ed'), Row(language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), name='Puanteur'), Row(language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), name='Duftnote'), Row(language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), name='Hedor'), Row(language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), name='Tanfo'), Row(language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), name='Stench'), Row(language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), name='\u6076\u81ed')]" - ] - }, - { - "fieldPath": "pokemon", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(is_hidden=True, pokemon=Row(name='gloom', url='https://pokeapi.co/api/v2/pokemon/44/'), slot=3), Row(is_hidden=False, pokemon=Row(name='grimer', url='https://pokeapi.co/api/v2/pokemon/88/'), slot=1), Row(is_hidden=False, pokemon=Row(name='muk', url='https://pokeapi.co/api/v2/pokemon/89/'), slot=1), Row(is_hidden=False, pokemon=Row(name='stunky', url='https://pokeapi.co/api/v2/pokemon/434/'), slot=1), Row(is_hidden=False, pokemon=Row(name='skuntank', url='https://pokeapi.co/api/v2/pokemon/435/'), slot=1), Row(is_hidden=False, pokemon=Row(name='trubbish', url='https://pokeapi.co/api/v2/pokemon/568/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor', url='https://pokeapi.co/api/v2/pokemon/569/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor-gmax', url='https://pokeapi.co/api/v2/pokemon/10198/'), slot=1)]" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -} -] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json deleted file mode 100644 index a384a8f1e501de..00000000000000 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json +++ /dev/null @@ -1,2572 +0,0 @@ -[ -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv", - "number_of_files": "3", - "size_in_bytes": "3539" - }, - "name": "folder_aaa.food_csv", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_csv", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833420000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests" - }, - "name": "tests" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration" - }, - "name": "integration" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3" - }, - "name": "s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data" - }, - "name": "test_data" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system" - }, - "name": "local_system" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a" - }, - "name": "folder_a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa" - }, - "name": "folder_aa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:98a716614da5246426edd48260406364" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 4, - "columnCount": 4, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "apple", - "frequency": 1 - }, - { - "value": "cookie", - "frequency": 1 - }, - { - "value": "lasagna", - "frequency": 1 - }, - { - "value": "pasta", - "frequency": 1 - } - ], - "sampleValues": [ - "apple", - "cookie", - "lasagna", - "pasta" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "23", - "frequency": 1 - }, - { - "value": "49", - "frequency": 1 - }, - { - "value": "50", - "frequency": 1 - }, - { - "value": "72", - "frequency": 1 - } - ], - "sampleValues": [ - "23", - "49", - "50", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 1, - "uniqueProportion": 0.25, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 4 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 3, - "uniqueProportion": 0.75, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "brown", - "frequency": 1 - }, - { - "value": "red", - "frequency": 2 - }, - { - "value": "yellow", - "frequency": 1 - } - ], - "sampleValues": [ - "brown", - "red", - "red", - "yellow" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet", - "number_of_files": "2", - "size_in_bytes": "8412" - }, - "name": "folder_aaa.food_parquet", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_parquet", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "healthy", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833440000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 70, - "columnCount": 5, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 9, - "uniqueProportion": 0.13043478260869565, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "distinctValueFrequencies": [ - { - "value": "NullValue", - "frequency": 1 - }, - { - "value": "apple", - "frequency": 7 - }, - { - "value": "chicken", - "frequency": 7 - }, - { - "value": "cookie", - "frequency": 6 - }, - { - "value": "hamburger", - "frequency": 7 - }, - { - "value": "lasagna", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 7 - }, - { - "value": "pasta", - "frequency": 7 - }, - { - "value": "spinach", - "frequency": 7 - }, - { - "value": "sushi", - "frequency": 7 - }, - { - "value": "water", - "frequency": 7 - } - ], - "sampleValues": [ - "apple", - "apple", - "apple", - "chicken", - "cookie", - "cookie", - "cookie", - "lasagna", - "lasagna", - "orange", - "orange", - "pasta", - "pasta", - "pasta", - "pasta", - "spinach", - "spinach", - "spinach", - "water", - "water" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 10, - "uniqueProportion": 0.14285714285714285, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "10", - "frequency": 7 - }, - { - "value": "2", - "frequency": 7 - }, - { - "value": "23", - "frequency": 7 - }, - { - "value": "32", - "frequency": 7 - }, - { - "value": "36", - "frequency": 7 - }, - { - "value": "43", - "frequency": 7 - }, - { - "value": "49", - "frequency": 7 - }, - { - "value": "50", - "frequency": 7 - }, - { - "value": "53", - "frequency": 7 - }, - { - "value": "72", - "frequency": 7 - } - ], - "sampleValues": [ - "10", - "10", - "10", - "23", - "23", - "23", - "32", - "32", - "36", - "43", - "43", - "49", - "49", - "50", - "50", - "50", - "72", - "72", - "72", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 4, - "uniqueProportion": 0.05714285714285714, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 24 - }, - { - "value": "5", - "frequency": 15 - }, - { - "value": "6", - "frequency": 23 - }, - { - "value": "7", - "frequency": 8 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4", - "4", - "4", - "4", - "5", - "5", - "5", - "5", - "5", - "6", - "6", - "6", - "6", - "6", - "6", - "7", - "7" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 7, - "uniqueProportion": 0.1, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "blue", - "frequency": 7 - }, - { - "value": "brown", - "frequency": 14 - }, - { - "value": "green", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 14 - }, - { - "value": "red", - "frequency": 14 - }, - { - "value": "white", - "frequency": 7 - }, - { - "value": "yellow", - "frequency": 7 - } - ], - "sampleValues": [ - "blue", - "blue", - "brown", - "brown", - "brown", - "green", - "green", - "green", - "orange", - "orange", - "red", - "red", - "red", - "red", - "red", - "white", - "yellow", - "yellow", - "yellow", - "yellow" - ] - }, - { - "fieldPath": "healthy", - "uniqueCount": 2, - "uniqueProportion": 0.028985507246376812, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "sampleValues": [ - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "None", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", - "number_of_files": "13", - "size_in_bytes": "188600" - }, - "name": "folder_aaa.pokemon_abilities_json", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.pokemon_abilities_json", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "effect_changes", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.is_native", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.short_effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.flavor_text", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "id", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "is_main_series", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.is_hidden", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.slot", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833590000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 1, - "columnCount": 9, - "fieldProfiles": [ - { - "fieldPath": "effect_changes", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect_entries=[Row(effect='Hat im Kampf keinen Effekt.', language=Row(is_native='no', name='de', url='https://pokeapi.co/api/v2/language/6/')), Row(effect='Has no effect in battle.', language=Row(is_native='yes', name='en', url='https://pokeapi.co/api/v2/language/9/'))], version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/'))]" - ] - }, - { - "fieldPath": "effect_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect='Attacken die Schaden verursachen haben mit jedem Treffer eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen, wenn die Attacke dies nicht bereits als Nebeneffekt hat.\\n\\nDer Effekt stapelt nicht mit dem von getragenen Items.\\n\\nAu\u00dferhalb vom Kampf: Wenn ein Pok\u00e9mon mit dieser F\u00e4higkeit an erster Stelle im Team steht, tauchen wilde Pok\u00e9mon nur halb so oft auf.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), short_effect='Mit jedem Treffer besteht eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen.'), Row(effect=\"This Pok\u00e9mon's damaging moves have a 10% chance to make the target flinch with each hit if they do not already cause flinching as a secondary effect.\\n\\nThis ability does not stack with a held item.\\n\\nOverworld: The wild encounter rate is halved while this Pok\u00e9mon is first in the party.\", language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), short_effect='Has a 10% chance of making target Pok\u00e9mon flinch with each hit.')]" - ] - }, - { - "fieldPath": "flavor_text_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ruby-sapphire', url='https://pokeapi.co/api/v2/version-group/5/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='emerald', url='https://pokeapi.co/api/v2/version-group/6/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='firered-leafgreen', url='https://pokeapi.co/api/v2/version-group/7/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='diamond-pearl', url='https://pokeapi.co/api/v2/version-group/8/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='platinum', url='https://pokeapi.co/api/v2/version-group/9/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='heartgold-soulsilver', url='https://pokeapi.co/api/v2/version-group/10/')), Row(flavor_text='La puanteur peut\\neffrayer l\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-2-white-2', url='https://pokeapi.co/api/v2/version-group/14/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/'))]" - ] - }, - { - "fieldPath": "generation", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "Row(name='generation-iii', url='https://pokeapi.co/api/v2/generation/3/')" - ] - }, - { - "fieldPath": "id", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "1", - "frequency": 1 - } - ], - "sampleValues": [ - "1" - ] - }, - { - "fieldPath": "is_main_series", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "True" - ] - }, - { - "fieldPath": "name", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "stench", - "frequency": 1 - } - ], - "sampleValues": [ - "stench" - ] - }, - { - "fieldPath": "names", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), name='\uc545\ucde8'), Row(language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), name='\u60e1\u81ed'), Row(language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), name='Puanteur'), Row(language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), name='Duftnote'), Row(language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), name='Hedor'), Row(language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), name='Tanfo'), Row(language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), name='Stench'), Row(language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), name='\u6076\u81ed')]" - ] - }, - { - "fieldPath": "pokemon", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(is_hidden=True, pokemon=Row(name='gloom', url='https://pokeapi.co/api/v2/pokemon/44/'), slot=3), Row(is_hidden=False, pokemon=Row(name='grimer', url='https://pokeapi.co/api/v2/pokemon/88/'), slot=1), Row(is_hidden=False, pokemon=Row(name='muk', url='https://pokeapi.co/api/v2/pokemon/89/'), slot=1), Row(is_hidden=False, pokemon=Row(name='stunky', url='https://pokeapi.co/api/v2/pokemon/434/'), slot=1), Row(is_hidden=False, pokemon=Row(name='skuntank', url='https://pokeapi.co/api/v2/pokemon/435/'), slot=1), Row(is_hidden=False, pokemon=Row(name='trubbish', url='https://pokeapi.co/api/v2/pokemon/568/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor', url='https://pokeapi.co/api/v2/pokemon/569/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor-gmax', url='https://pokeapi.co/api/v2/pokemon/10198/'), slot=1)]" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -} -] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json index 4f98d68f8ae621..6ae2ec160035e6 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json @@ -458,7 +458,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444889021 + "lastUpdatedTimestamp": 1586808010000 } }, "systemMetadata": { @@ -2896,38 +2896,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -2965,7 +2965,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444899021 + "lastUpdatedTimestamp": 1586808020000 } }, "systemMetadata": { @@ -3541,7 +3541,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444909021 + "lastUpdatedTimestamp": 1586808030000 } }, "systemMetadata": { @@ -3974,17 +3974,11 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json", - "number_of_files": "1", - "size_in_bytes": "4646" - }, - "name": "countries_json.json", - "description": "", - "tags": [] + "platform": "urn:li:dataPlatform:file", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" } }, "systemMetadata": { @@ -3997,11 +3991,17 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "datasetProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:file", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json", + "number_of_files": "1", + "size_in_bytes": "4646" + }, + "name": "countries_json.json", + "description": "", + "tags": [] } }, "systemMetadata": { @@ -4036,14 +4036,14 @@ }, "fields": [ { - "fieldPath": "countries", + "fieldPath": "countries.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, @@ -4060,14 +4060,14 @@ "isPartOfKey": false }, { - "fieldPath": "countries.name", + "fieldPath": "countries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false } @@ -4093,7 +4093,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444919021 + "lastUpdatedTimestamp": 1586808040000 } }, "systemMetadata": { @@ -4205,6 +4205,23 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", @@ -4254,7 +4271,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -4266,14 +4283,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -4290,7 +4307,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -4302,14 +4319,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -4335,24 +4352,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444929021 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + "lastUpdatedTimestamp": 1586808050000 } }, "systemMetadata": { @@ -4739,17 +4739,11 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv", - "number_of_files": "1", - "size_in_bytes": "172" - }, - "name": "small.csv", - "description": "", - "tags": [] + "platform": "urn:li:dataPlatform:file", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" } }, "systemMetadata": { @@ -4762,11 +4756,17 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "datasetProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:file", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv", + "number_of_files": "1", + "size_in_bytes": "172" + }, + "name": "small.csv", + "description": "", + "tags": [] } }, "systemMetadata": { @@ -4882,7 +4882,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444939021 + "lastUpdatedTimestamp": 1586808060000 } }, "systemMetadata": { @@ -5124,6 +5124,23 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", @@ -5566,7 +5583,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444949021 + "lastUpdatedTimestamp": 1586808070000 } }, "systemMetadata": { @@ -7807,13 +7824,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:file", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + "removed": false } }, "systemMetadata": { @@ -7824,7 +7840,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7840,7 +7856,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7856,7 +7872,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7872,7 +7888,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7888,7 +7904,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7904,7 +7920,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7918,15 +7934,153 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808010000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808020000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808030000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808040000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808050000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808060000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "status", + "changeType": "PATCH", + "aspectName": "datasetProperties", "aspect": { - "json": { - "removed": false - } + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808070000 + } + } + ] }, "systemMetadata": { "lastObserved": 1615443388097, diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json index 491cbdf8b9704b..9bb8412b64f915 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json @@ -60,38 +60,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444899021 + "lastUpdatedTimestamp": 1586808020000 } }, "systemMetadata": { @@ -1619,7 +1619,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444909021 + "lastUpdatedTimestamp": 1586808030000 } }, "systemMetadata": { @@ -2075,5 +2075,49 @@ "runId": "multiple_spec_for_files.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808020000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808030000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_specs_of_different_buckets.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_specs_of_different_buckets.json index da7c76876d4156..fea1929b98ab53 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_specs_of_different_buckets.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_specs_of_different_buckets.json @@ -60,38 +60,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444899021 + "lastUpdatedTimestamp": 1586808020000 } }, "systemMetadata": { @@ -1619,7 +1619,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444909021 + "lastUpdatedTimestamp": 1586808030000 } }, "systemMetadata": { @@ -2075,5 +2075,49 @@ "runId": "multiple_specs_of_different_buckets.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808020000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808030000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json index 76e562142e39e0..a31a721fbbadde 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json @@ -60,38 +60,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444899021 + "lastUpdatedTimestamp": 1586808020000 } }, "systemMetadata": { @@ -1504,5 +1504,27 @@ "runId": "single_file.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808020000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json index abc6eb1b471b25..63888d6bc4351f 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json @@ -174,7 +174,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -269,7 +269,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -384,7 +384,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -503,7 +503,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -626,7 +626,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -806,5 +806,27 @@ "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847850000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "file_inference_without_extension.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json index 1c022fabf91584..8087ea591beef2 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json @@ -806,5 +806,27 @@ "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847850000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json index 41484bec81935c..64c1505414ff83 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json @@ -817,5 +817,49 @@ "runId": "folder_no_partition.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json index 0b28381fce8ffd..f86c652462fd45 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json @@ -627,5 +627,27 @@ "runId": "folder_no_partition_exclude.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json index c4cfed8bfc7ac0..2575db41ca8b73 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json @@ -817,5 +817,49 @@ "runId": "folder_no_partition_filename.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json index ae81f60ac8dcce..272beb57e85e18 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json @@ -627,5 +627,27 @@ "runId": "folder_no_partition_glob.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json index 684af901e68323..21623e2216565c 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json @@ -1483,5 +1483,49 @@ "runId": "folder_partition_update_schema_with_partition_autodetect.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json index 4d23cadbbc4d3d..154bce421e18aa 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json @@ -1483,5 +1483,49 @@ "runId": "folder_partition_with_partition_autodetect_traverse_all.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json index 6017a27a88895f..f483f806e61935 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json @@ -1483,5 +1483,49 @@ "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json index 90a361219c1bff..38ce5188e0a8e3 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json @@ -2625,5 +2625,159 @@ "runId": "multiple_files.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847610000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847620000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847630000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847670000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847660000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847640000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847650000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json index c67977ef7fa1bc..7f657cb69180a1 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json @@ -911,5 +911,49 @@ "runId": "multiple_spec_for_files.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847620000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847630000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json index d96bdce9120822..6e2e966f1f7b48 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json @@ -1338,5 +1338,49 @@ "runId": "multiple_specs_of_different_buckets.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847620000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket-2/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847630000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json index 7703d137ddd29b..be3d2efed088e2 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json @@ -684,5 +684,27 @@ "runId": "single_file.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847620000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/test_s3.py b/metadata-ingestion/tests/integration/s3/test_s3.py index 54156610c68720..0e73cdca006bd0 100644 --- a/metadata-ingestion/tests/integration/s3/test_s3.py +++ b/metadata-ingestion/tests/integration/s3/test_s3.py @@ -242,6 +242,7 @@ def test_data_lake_local_ingest( golden_path=f"{test_resources_dir}/golden-files/local/golden_mces_{source_file}", ignore_paths=[ r"root\[\d+\]\['aspect'\]\['json'\]\['lastUpdatedTimestamp'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\[\d+\]\['value'\]\['time'\]", r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['created'\]\['time'\]", # root[41]['aspect']['json']['fieldProfiles'][0]['sampleValues'][0] r"root\[\d+\]\['aspect'\]\['json'\]\['fieldProfiles'\]\[\d+\]\['sampleValues'\]", diff --git a/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json b/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json index 6a3ce983950b0b..82c760458ca148 100644 --- a/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json +++ b/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json @@ -1514,8 +1514,8 @@ "json": { "timestampMillis": 1652353200000, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:user@mydomain.com", "operationType": "CREATE", @@ -1537,8 +1537,8 @@ "json": { "timestampMillis": 1652353200000, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:user@mydomain.com", "operationType": "ALTER", @@ -2023,8 +2023,8 @@ "json": { "timestampMillis": 1652353200000, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "rowCount": 3, "columnCount": 15 @@ -2099,5 +2099,27 @@ "runId": "salesforce-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:salesforce,Property__c,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1652784043000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1652353200000, + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py index d995404ad69a53..97f65f1bd6a5b7 100644 --- a/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py +++ b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py @@ -14,6 +14,9 @@ make_dataset_urn, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.auto_work_units.auto_dataset_properties_aspect import ( + auto_patch_last_modified, +) from datahub.ingestion.api.source_helpers import ( _prepend_platform_instance, auto_browse_path_v2, @@ -21,8 +24,15 @@ auto_lowercase_urns, auto_status_aspect, auto_workunit, + create_dataset_props_patch_builder, ) from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.metadata.schema_classes import ( + DatasetPropertiesClass, + OperationTypeClass, + TimeStampClass, +) +from datahub.specific.dataset import DatasetPatchBuilder _base_metadata: List[ Union[MetadataChangeProposalWrapper, models.MetadataChangeEventClass] @@ -146,7 +156,6 @@ def _make_browse_path_entries(path: List[str]) -> List[models.BrowsePathEntryCla def prepend_platform_instance( path: List[models.BrowsePathEntryClass], ) -> List[models.BrowsePathEntryClass]: - platform = "platform" instance = "instance" return _prepend_platform_instance(path, platform, instance) @@ -656,3 +665,150 @@ def test_auto_empty_dataset_usage_statistics_invalid_timestamp( changeType=models.ChangeTypeClass.CREATE, ).as_workunit(), ] + + +def get_sample_mcps(mcps_to_append: List = []) -> List[MetadataChangeProposalWrapper]: + mcps = [ + MetadataChangeProposalWrapper( + entityUrn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)", + aspect=models.OperationClass( + timestampMillis=10, + lastUpdatedTimestamp=12, + operationType=OperationTypeClass.CREATE, + ), + ), + MetadataChangeProposalWrapper( + entityUrn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)", + aspect=models.OperationClass( + timestampMillis=11, + lastUpdatedTimestamp=20, + operationType=OperationTypeClass.CREATE, + ), + ), + ] + mcps.extend(mcps_to_append) + return mcps + + +def to_patch_work_units(patch_builder: DatasetPatchBuilder) -> List[MetadataWorkUnit]: + return [ + MetadataWorkUnit( + id=MetadataWorkUnit.generate_workunit_id(patch_mcp), mcp_raw=patch_mcp + ) + for patch_mcp in patch_builder.build() + ] + + +def get_auto_generated_wu() -> List[MetadataWorkUnit]: + dataset_patch_builder = DatasetPatchBuilder( + urn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)" + ).set_last_modified(TimeStampClass(time=20)) + + auto_generated_work_units = to_patch_work_units(dataset_patch_builder) + + return auto_generated_work_units + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_no_change(): + mcps = [ + MetadataChangeProposalWrapper( + entityUrn="urn:li:container:008e111aa1d250dd52e0fd5d4b307b1a", + aspect=models.StatusClass(removed=False), + ) + ] + + initial_wu = list(auto_workunit(mcps)) + + expected = initial_wu + + assert ( + list(auto_patch_last_modified(initial_wu)) == expected + ) # There should be no change + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_max_last_updated_timestamp(): + mcps = get_sample_mcps() + + expected = list(auto_workunit(mcps)) + + auto_generated_work_units = get_auto_generated_wu() + + expected.extend(auto_generated_work_units) + + # work unit should contain a path of datasetProperties with lastModified set to max of operation.lastUpdatedTime + # i.e., 20 + assert list(auto_patch_last_modified(auto_workunit(mcps))) == expected + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_multi_patch(): + mcps = get_sample_mcps() + + dataset_patch_builder = DatasetPatchBuilder( + urn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)" + ) + + dataset_patch_builder.set_display_name("foo") + dataset_patch_builder.set_description("it is fake") + + patch_work_units = to_patch_work_units(dataset_patch_builder) + + work_units = [*list(auto_workunit(mcps)), *patch_work_units] + + auto_generated_work_units = get_auto_generated_wu() + + expected = [*work_units, *auto_generated_work_units] + + # In this case, the final work units include two patch units: one originating from the source and + # the other from auto_patch_last_modified. + assert list(auto_patch_last_modified(work_units)) == expected + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_last_modified_patch_exist(): + mcps = get_sample_mcps() + + patch_builder = create_dataset_props_patch_builder( + dataset_urn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)", + dataset_properties=DatasetPropertiesClass( + name="foo", + description="dataset for collection of foo", + lastModified=TimeStampClass(time=20), + ), + ) + + work_units = [ + *list(auto_workunit(mcps)), + *to_patch_work_units(patch_builder), + ] + # The input and output should align since the source is generating a patch for datasetProperties with the + # lastModified attribute. + # Therefore, `auto_patch_last_modified` should not create any additional patch. + assert list(auto_patch_last_modified(work_units)) == work_units + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_last_modified_patch_not_exist(): + mcps = get_sample_mcps() + + patch_builder = create_dataset_props_patch_builder( + dataset_urn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)", + dataset_properties=DatasetPropertiesClass( + name="foo", + description="dataset for collection of foo", + ), + ) + + work_units = [ + *list(auto_workunit(mcps)), + *to_patch_work_units(patch_builder), + ] + + expected = [ + *work_units, + *get_auto_generated_wu(), # The output should include an additional patch for the `lastModified` attribute. + ] + + assert list(auto_patch_last_modified(work_units)) == expected diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py index 90ff78b16f652b..7d01ecd034523d 100644 --- a/metadata-ingestion/tests/unit/test_dbt_source.py +++ b/metadata-ingestion/tests/unit/test_dbt_source.py @@ -1,3 +1,4 @@ +import doctest from datetime import timedelta from typing import Dict, List, Union from unittest import mock @@ -7,10 +8,8 @@ from datahub.emitter import mce_builder from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.source.dbt.dbt_cloud import ( - DBTCloudConfig, - infer_metadata_endpoint, -) +from datahub.ingestion.source.dbt import dbt_cloud +from datahub.ingestion.source.dbt.dbt_cloud import DBTCloudConfig from datahub.ingestion.source.dbt.dbt_core import ( DBTCoreConfig, DBTCoreSource, @@ -401,17 +400,7 @@ def test_dbt_cloud_config_with_defined_metadata_endpoint(): def test_infer_metadata_endpoint() -> None: - assert ( - infer_metadata_endpoint("https://cloud.getdbt.com") - == "https://metadata.cloud.getdbt.com/graphql" - ) - assert ( - infer_metadata_endpoint("https://prefix.us1.dbt.com") - == "https://prefix.metadata.us1.dbt.com/graphql" - ) - assert ( - infer_metadata_endpoint("http://dbt.corp.internal") - ) == "http://metadata.dbt.corp.internal/graphql" + assert doctest.testmod(dbt_cloud, raise_on_error=True).attempted > 0 def test_dbt_time_parsing() -> None: diff --git a/metadata-integration/java/datahub-protobuf-example/build.gradle b/metadata-integration/java/datahub-protobuf-example/build.gradle index 1efb43360457a0..3dea6554bd2646 100644 --- a/metadata-integration/java/datahub-protobuf-example/build.gradle +++ b/metadata-integration/java/datahub-protobuf-example/build.gradle @@ -13,8 +13,8 @@ repositories { } ext { - protobuf_version = '3.19.3' - datahub_protobuf_version = '0.8.45-SNAPSHOT' + protobuf_version = '3.25.5' + datahub_protobuf_version = '0.14.1' } configurations { @@ -66,13 +66,12 @@ task publishSchema(dependsOn: build) { fileTree("schema").matching { exclude "protobuf/meta/**" - }.each {f -> + }.each { f -> doLast { javaexec { - executable = javaLauncher.get().getExecutablePath().getAsFile().getAbsolutePath() classpath = configurations.datahub - main = "datahub.protobuf.Proto2DataHub" - args = ["--descriptor", "${projectDir}/build/descriptors/main.dsc", "--file", file(f).getAbsoluteFile()] + mainClass = "datahub.protobuf.Proto2DataHub" + args = ["--descriptor", "${projectDir}/build/descriptors/main.dsc", "--file", file(f).absolutePath] } } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/JavaGraphClient.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/JavaGraphClient.java index c54ba4a222b73d..12c59324e3f7cf 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/JavaGraphClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/JavaGraphClient.java @@ -9,6 +9,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.search.utils.QueryUtils; +import io.datahubproject.metadata.context.OperationContext; import java.net.URISyntaxException; import java.util.List; import java.util.stream.Collectors; @@ -19,10 +20,13 @@ @Slf4j public class JavaGraphClient implements GraphClient { - GraphService _graphService; + private final OperationContext systemOpContext; + private final GraphService graphService; - public JavaGraphClient(@Nonnull GraphService graphService) { - this._graphService = graphService; + public JavaGraphClient( + @Nonnull OperationContext systemOpContext, @Nonnull GraphService graphService) { + this.systemOpContext = systemOpContext; + this.graphService = graphService; } /** @@ -43,7 +47,8 @@ public EntityRelationships getRelatedEntities( count = count == null ? DEFAULT_PAGE_SIZE : count; RelatedEntitiesResult relatedEntitiesResult = - _graphService.findRelatedEntities( + graphService.findRelatedEntities( + systemOpContext, null, QueryUtils.newFilter("urn", rawUrn), null, @@ -91,7 +96,8 @@ public EntityLineageResult getLineageEntities( @Nullable Integer count, int maxHops, String actor) { - return _graphService.getLineage( + return graphService.getLineage( + systemOpContext, UrnUtils.getUrn(rawUrn), direction, start != null ? start : 0, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java index 0dff2870808422..f9287ab34cf192 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java @@ -64,13 +64,7 @@ public EntityLineageResult getLineage( if (separateSiblings) { return ValidationUtils.validateEntityLineageResult( opContext, - _graphService.getLineage( - entityUrn, - direction, - offset, - count, - maxHops, - opContext.getSearchContext().getLineageFlags()), + _graphService.getLineage(opContext, entityUrn, direction, offset, count, maxHops), _entityService); } @@ -81,13 +75,7 @@ public EntityLineageResult getLineage( } EntityLineageResult entityLineage = - _graphService.getLineage( - entityUrn, - direction, - offset, - count, - maxHops, - opContext.getSearchContext().getLineageFlags()); + _graphService.getLineage(opContext, entityUrn, direction, offset, count, maxHops); Siblings siblingAspectOfEntity = (Siblings) _entityService.getLatestAspect(opContext, entityUrn, SIBLINGS_ASPECT_NAME); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/SystemGraphRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/SystemGraphRetriever.java new file mode 100644 index 00000000000000..33cb1a7130f142 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/SystemGraphRetriever.java @@ -0,0 +1,48 @@ +package com.linkedin.metadata.graph; + +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.RelationshipFilter; +import com.linkedin.metadata.query.filter.SortCriterion; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Setter; + +@Builder +public class SystemGraphRetriever implements GraphRetriever { + @Setter private OperationContext systemOperationContext; + @Nonnull private final GraphService graphService; + + @Nonnull + @Override + public RelatedEntitiesScrollResult scrollRelatedEntities( + @Nullable List sourceTypes, + @Nonnull Filter sourceEntityFilter, + @Nullable List destinationTypes, + @Nonnull Filter destinationEntityFilter, + @Nonnull List relationshipTypes, + @Nonnull RelationshipFilter relationshipFilter, + @Nonnull List sortCriteria, + @Nullable String scrollId, + int count, + @Nullable Long startTimeMillis, + @Nullable Long endTimeMillis) { + return graphService.scrollRelatedEntities( + systemOperationContext, + sourceTypes, + sourceEntityFilter, + destinationTypes, + destinationEntityFilter, + relationshipTypes, + relationshipFilter, + sortCriteria, + scrollId, + count, + startTimeMillis, + endTimeMillis); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java index 6703e07bfd915c..352e89baefc25b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java @@ -19,6 +19,7 @@ import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.query.filter.SortCriterion; +import io.datahubproject.metadata.context.OperationContext; import io.dgraph.DgraphClient; import io.dgraph.DgraphProto.Mutation; import io.dgraph.DgraphProto.NQuad; @@ -453,6 +454,7 @@ public void removeEdge(final Edge edge) { @Nonnull @Override public RelatedEntitiesResult findRelatedEntities( + @Nonnull final OperationContext opContext, @Nullable List sourceTypes, @Nonnull Filter sourceEntityFilter, @Nullable List destinationTypes, @@ -662,7 +664,7 @@ protected static List getRelatedEntitiesFromResponseData( } @Override - public void removeNode(@Nonnull Urn urn) { + public void removeNode(@Nonnull final OperationContext opContext, @Nonnull Urn urn) { String query = String.format("query {\n" + " node as var(func: eq(urn, \"%s\"))\n" + "}", urn); String deletion = "uid(node) * * ."; @@ -679,6 +681,7 @@ public void removeNode(@Nonnull Urn urn) { @Override public void removeEdgesFromNode( + @Nonnull final OperationContext opContext, @Nonnull Urn urn, @Nonnull List relationshipTypes, @Nonnull RelationshipFilter relationshipFilter) { @@ -782,6 +785,7 @@ public void clear() { @Nonnull @Override public RelatedEntitiesScrollResult scrollRelatedEntities( + @Nonnull OperationContext opContext, @Nullable List sourceTypes, @Nonnull Filter sourceEntityFilter, @Nullable List destinationTypes, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index 50e5aa6ba893d4..40fa79a0ef1719 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -14,6 +14,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.IntegerArray; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.graph.GraphFilters; import com.linkedin.metadata.graph.LineageDirection; @@ -34,14 +35,17 @@ import com.linkedin.metadata.utils.DataPlatformInstanceUtils; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.metrics.MetricUtils; +import io.datahubproject.metadata.context.OperationContext; import io.opentelemetry.extension.annotations.WithSpan; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; @@ -105,8 +109,7 @@ public class ESGraphQueryDAO { static final String GROUP_BY_DESTINATION_AGG = "group_by_destination"; static final String TOP_DOCUMENTS_AGG = "top_documents"; - @Nonnull - public static void addFilterToQueryBuilder( + private static void addFilterToQueryBuilder( @Nonnull Filter filter, @Nullable String node, BoolQueryBuilder rootQuery) { BoolQueryBuilder orQuery = new BoolQueryBuilder(); for (ConjunctiveCriterion conjunction : filter.getOr()) { @@ -231,7 +234,7 @@ private SearchResponse executeGroupByLineageSearchQuery( } } - private BoolQueryBuilder getAggregationFilter( + private static BoolQueryBuilder getAggregationFilter( Pair pair, RelationshipDirection direction) { BoolQueryBuilder subFilter = QueryBuilders.boolQuery(); TermQueryBuilder relationshipTypeTerm = @@ -258,6 +261,7 @@ private BoolQueryBuilder getAggregationFilter( } public SearchResponse getSearchResponse( + @Nonnull final OperationContext opContext, @Nullable final List sourceTypes, @Nonnull final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -268,6 +272,8 @@ public SearchResponse getSearchResponse( final int count) { BoolQueryBuilder finalQuery = buildQuery( + opContext, + graphQueryConfiguration, sourceTypes, sourceEntityFilter, destinationTypes, @@ -279,6 +285,8 @@ public SearchResponse getSearchResponse( } public static BoolQueryBuilder buildQuery( + @Nonnull final OperationContext opContext, + @Nonnull final GraphQueryConfiguration graphQueryConfiguration, @Nullable final List sourceTypes, @Nullable final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -286,6 +294,8 @@ public static BoolQueryBuilder buildQuery( @Nonnull final List relationshipTypes, @Nonnull final RelationshipFilter relationshipFilter) { return buildQuery( + opContext, + graphQueryConfiguration, sourceTypes, sourceEntityFilter, destinationTypes, @@ -296,6 +306,8 @@ public static BoolQueryBuilder buildQuery( } public static BoolQueryBuilder buildQuery( + @Nonnull final OperationContext opContext, + @Nonnull final GraphQueryConfiguration graphQueryConfiguration, @Nullable final List sourceTypes, @Nonnull final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -345,19 +357,23 @@ public static BoolQueryBuilder buildQuery( if (lifecycleOwner != null) { finalQuery.filter(QueryBuilders.termQuery(EDGE_FIELD_LIFECYCLE_OWNER, lifecycleOwner)); } + if (!Optional.ofNullable(opContext.getSearchContext().getSearchFlags().isIncludeSoftDeleted()) + .orElse(false)) { + applyExcludeSoftDelete(graphQueryConfiguration, finalQuery); + } return finalQuery; } @WithSpan public LineageResponse getLineage( + @Nonnull final OperationContext opContext, @Nonnull Urn entityUrn, @Nonnull LineageDirection direction, GraphFilters graphFilters, int offset, int count, - int maxHops, - @Nullable LineageFlags lineageFlags) { + int maxHops) { Map result = new HashMap<>(); long currentTime = System.currentTimeMillis(); long remainingTime = graphQueryConfiguration.getTimeoutSeconds() * 1000; @@ -388,6 +404,7 @@ public LineageResponse getLineage( // Do one hop on the lineage graph Stream intermediateStream = processOneHopLineage( + opContext, currentLevel, remainingTime, direction, @@ -398,7 +415,6 @@ public LineageResponse getLineage( existingPaths, exploreMultiplePaths, result, - lineageFlags, i); currentLevel = intermediateStream.collect(Collectors.toList()); currentTime = System.currentTimeMillis(); @@ -421,6 +437,7 @@ public LineageResponse getLineage( } private Stream processOneHopLineage( + @Nonnull OperationContext opContext, List currentLevel, Long remainingTime, LineageDirection direction, @@ -431,7 +448,6 @@ private Stream processOneHopLineage( Map existingPaths, boolean exploreMultiplePaths, Map result, - LineageFlags lineageFlags, int i) { // Do one hop on the lineage graph @@ -439,6 +455,7 @@ private Stream processOneHopLineage( int remainingHops = maxHops - numHops; List oneHopRelationships = getLineageRelationshipsInBatches( + opContext, currentLevel, direction, graphFilters, @@ -448,8 +465,10 @@ private Stream processOneHopLineage( remainingHops, remainingTime, existingPaths, - exploreMultiplePaths, - lineageFlags); + exploreMultiplePaths); + + final LineageFlags lineageFlags = opContext.getSearchContext().getLineageFlags(); + for (LineageRelationship oneHopRelnship : oneHopRelationships) { if (result.containsKey(oneHopRelnship.getEntity())) { log.debug("Urn encountered again during graph walk {}", oneHopRelnship.getEntity()); @@ -487,6 +506,7 @@ private Stream processOneHopLineage( if (!additionalCurrentLevel.isEmpty()) { Stream ignoreAsHopUrns = processOneHopLineage( + opContext, additionalCurrentLevel, remainingTime, direction, @@ -497,7 +517,6 @@ private Stream processOneHopLineage( existingPaths, exploreMultiplePaths, result, - lineageFlags, i); intermediateStream = Stream.concat(intermediateStream, ignoreAsHopUrns); } @@ -560,6 +579,7 @@ private LineageRelationship mergeLineageRelationships( // Get 1-hop lineage relationships asynchronously in batches with timeout @WithSpan public List getLineageRelationshipsInBatches( + @Nonnull final OperationContext opContext, @Nonnull List entityUrns, @Nonnull LineageDirection direction, GraphFilters graphFilters, @@ -569,8 +589,7 @@ public List getLineageRelationshipsInBatches( int remainingHops, long remainingTime, Map existingPaths, - boolean exploreMultiplePaths, - @Nullable LineageFlags lineageFlags) { + boolean exploreMultiplePaths) { List> batches = Lists.partition(entityUrns, graphQueryConfiguration.getBatchSize()); return ConcurrencyUtils.getAllCompleted( batches.stream() @@ -579,6 +598,7 @@ public List getLineageRelationshipsInBatches( CompletableFuture.supplyAsync( () -> getLineageRelationships( + opContext, batchUrns, direction, graphFilters, @@ -587,8 +607,7 @@ public List getLineageRelationshipsInBatches( numHops, remainingHops, existingPaths, - exploreMultiplePaths, - lineageFlags))) + exploreMultiplePaths))) .collect(Collectors.toList()), remainingTime, TimeUnit.MILLISECONDS) @@ -600,6 +619,7 @@ public List getLineageRelationshipsInBatches( // Get 1-hop lineage relationships @WithSpan private List getLineageRelationships( + @Nonnull final OperationContext opContext, @Nonnull List entityUrns, @Nonnull LineageDirection direction, GraphFilters graphFilters, @@ -608,8 +628,8 @@ private List getLineageRelationships( int numHops, int remainingHops, Map existingPaths, - boolean exploreMultiplePaths, - @Nullable LineageFlags lineageFlags) { + boolean exploreMultiplePaths) { + final LineageFlags lineageFlags = opContext.getSearchContext().getLineageFlags(); Map> urnsPerEntityType = entityUrns.stream().collect(Collectors.groupingBy(Urn::getEntityType)); Map> edgesPerEntityType = @@ -628,7 +648,7 @@ private List getLineageRelationships( .collect(Collectors.toSet()); QueryBuilder finalQuery = - getLineageQuery(urnsPerEntityType, edgesPerEntityType, graphFilters, lineageFlags); + getLineageQuery(opContext, urnsPerEntityType, edgesPerEntityType, graphFilters); SearchResponse response; if (lineageFlags != null && lineageFlags.getEntitiesExploredPerHopLimit() != null) { response = @@ -660,11 +680,12 @@ private List getLineageRelationships( } @VisibleForTesting - public QueryBuilder getLineageQuery( + public static QueryBuilder getLineageQuery( + @Nonnull OperationContext opContext, @Nonnull Map> urnsPerEntityType, @Nonnull Map> edgesPerEntityType, - @Nonnull GraphFilters graphFilters, - @Nullable LineageFlags lineageFlags) { + @Nonnull GraphFilters graphFilters) { + final LineageFlags lineageFlags = opContext.getSearchContext().getLineageFlags(); BoolQueryBuilder entityTypeQueries = QueryBuilders.boolQuery(); // Get all relation types relevant to the set of urns to hop from urnsPerEntityType.forEach( @@ -690,7 +711,7 @@ public QueryBuilder getLineageQuery( && lineageFlags.getStartTimeMillis() != null && lineageFlags.getEndTimeMillis() != null) { finalQuery.filter( - TimeFilterUtils.getEdgeTimeFilterQuery( + GraphFilterUtils.getEdgeTimeFilterQuery( lineageFlags.getStartTimeMillis(), lineageFlags.getEndTimeMillis())); } else { log.debug("Empty time filter range provided. Skipping application of time filters"); @@ -700,7 +721,7 @@ public QueryBuilder getLineageQuery( } @VisibleForTesting - public QueryBuilder getLineageQueryForEntityType( + static QueryBuilder getLineageQueryForEntityType( @Nonnull List urns, @Nonnull List lineageEdges, @Nonnull GraphFilters graphFilters) { @@ -769,7 +790,7 @@ private void addViaNodeBoostQuery(final SearchSourceBuilder sourceBuilder) { * the Graph Store. */ @VisibleForTesting - public static void addEdgeToPaths( + static void addEdgeToPaths( @Nonnull final Map existingPaths, @Nonnull final Urn parentUrn, @Nonnull final Urn childUrn) { @@ -782,7 +803,7 @@ private static boolean containsCycle(final UrnArray path) { return (path.size() != urnSet.size()); } - public static boolean addEdgeToPaths( + static boolean addEdgeToPaths( @Nonnull final Map existingPaths, @Nonnull final Urn parentUrn, final Urn viaUrn, @@ -1317,6 +1338,7 @@ public static class LineageResponse { } public SearchResponse getSearchResponse( + @Nonnull final OperationContext opContext, @Nullable final List sourceTypes, @Nullable final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -1329,6 +1351,8 @@ public SearchResponse getSearchResponse( BoolQueryBuilder finalQuery = buildQuery( + opContext, + graphQueryConfiguration, sourceTypes, sourceEntityFilter, destinationTypes, @@ -1371,4 +1395,17 @@ private SearchResponse executeScrollSearchQuery( throw new ESQueryException("Search query failed:", e); } } + + private static void applyExcludeSoftDelete( + GraphQueryConfiguration graphQueryConfiguration, BoolQueryBuilder boolQueryBuilder) { + if (graphQueryConfiguration.isGraphStatusEnabled()) { + Arrays.stream(EdgeUrnType.values()) + .map( + edgeUrnType -> + QueryBuilders.termsQuery( + GraphFilterUtils.getUrnStatusFieldName(edgeUrnType), "true")) + .filter(statusQuery -> !boolQueryBuilder.mustNot().contains(statusQuery)) + .forEach(boolQueryBuilder::mustNot); + } + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java index ddbd00f90ef684..ba481bdfa109f3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java @@ -4,10 +4,12 @@ import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.INDEX_NAME; import com.google.common.collect.ImmutableList; +import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import io.datahubproject.metadata.context.OperationContext; import java.util.List; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -17,7 +19,9 @@ import org.opensearch.action.update.UpdateRequest; import org.opensearch.common.xcontent.XContentType; import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.reindex.BulkByScrollResponse; +import org.opensearch.script.Script; @Slf4j @RequiredArgsConstructor @@ -25,8 +29,7 @@ public class ESGraphWriteDAO { private final IndexConvention indexConvention; private final ESBulkProcessor bulkProcessor; private final int numRetries; - - private static final String ES_WRITES_METRIC = "num_elasticSearch_writes"; + private final GraphQueryConfiguration graphQueryConfiguration; /** * Updates or inserts the given search document. @@ -56,6 +59,7 @@ public void deleteDocument(@Nonnull String docId) { } public BulkByScrollResponse deleteByQuery( + @Nonnull final OperationContext opContext, @Nullable final String sourceType, @Nonnull final Filter sourceEntityFilter, @Nullable final String destinationType, @@ -64,6 +68,8 @@ public BulkByScrollResponse deleteByQuery( @Nonnull final RelationshipFilter relationshipFilter) { BoolQueryBuilder finalQuery = buildQuery( + opContext, + graphQueryConfiguration, sourceType == null ? ImmutableList.of() : ImmutableList.of(sourceType), sourceEntityFilter, destinationType == null ? ImmutableList.of() : ImmutableList.of(destinationType), @@ -77,6 +83,7 @@ public BulkByScrollResponse deleteByQuery( } public BulkByScrollResponse deleteByQuery( + @Nonnull final OperationContext opContext, @Nullable final String sourceType, @Nonnull final Filter sourceEntityFilter, @Nullable final String destinationType, @@ -86,6 +93,8 @@ public BulkByScrollResponse deleteByQuery( String lifecycleOwner) { BoolQueryBuilder finalQuery = buildQuery( + opContext, + graphQueryConfiguration, sourceType == null ? ImmutableList.of() : ImmutableList.of(sourceType), sourceEntityFilter, destinationType == null ? ImmutableList.of() : ImmutableList.of(destinationType), @@ -98,4 +107,12 @@ public BulkByScrollResponse deleteByQuery( .deleteByQuery(finalQuery, indexConvention.getIndexName(INDEX_NAME)) .orElse(null); } + + @Nullable + public BulkByScrollResponse updateByQuery( + @Nonnull Script script, @Nonnull final QueryBuilder query) { + return bulkProcessor + .updateByQuery(script, query, indexConvention.getIndexName(INDEX_NAME)) + .orElse(null); + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java index e1532ea4e26c06..1769c53e4cd9bb 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java @@ -1,12 +1,16 @@ package com.linkedin.metadata.graph.elastic; import static com.linkedin.metadata.aspect.models.graph.Edge.*; +import static com.linkedin.metadata.graph.elastic.GraphFilterUtils.getUrnStatusFieldName; +import static com.linkedin.metadata.graph.elastic.GraphFilterUtils.getUrnStatusQuery; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.StringArray; import com.linkedin.metadata.aspect.models.graph.Edge; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; import com.linkedin.metadata.aspect.models.graph.RelatedEntities; import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; import com.linkedin.metadata.aspect.models.graph.RelatedEntity; @@ -17,7 +21,6 @@ import com.linkedin.metadata.graph.LineageRelationshipArray; import com.linkedin.metadata.graph.RelatedEntitiesResult; import com.linkedin.metadata.models.registry.LineageRegistry; -import com.linkedin.metadata.query.LineageFlags; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -35,6 +38,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.structured.StructuredPropertyDefinition; import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; import io.opentelemetry.extension.annotations.WithSpan; import java.io.IOException; import java.util.ArrayList; @@ -51,13 +55,15 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.opensearch.action.search.SearchResponse; +import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.script.Script; +import org.opensearch.script.ScriptType; import org.opensearch.search.SearchHit; @Slf4j @RequiredArgsConstructor public class ElasticSearchGraphService implements GraphService, ElasticSearchIndexed { - private final LineageRegistry _lineageRegistry; private final ESBulkProcessor _esBulkProcessor; private final IndexConvention _indexConvention; @@ -68,7 +74,7 @@ public class ElasticSearchGraphService implements GraphService, ElasticSearchInd public static final String INDEX_NAME = "graph_service_v1"; private static final Map EMPTY_HASH = new HashMap<>(); - private String toDocument(@Nonnull final Edge edge) { + private static String toDocument(@Nonnull final Edge edge) { final ObjectNode searchDocument = JsonNodeFactory.instance.objectNode(); final ObjectNode sourceObject = JsonNodeFactory.instance.objectNode(); @@ -114,6 +120,18 @@ private String toDocument(@Nonnull final Edge edge) { if (edge.getVia() != null) { searchDocument.put(EDGE_FIELD_VIA, edge.getVia().toString()); } + if (edge.getViaStatus() != null) { + searchDocument.put(EDGE_FIELD_VIA_STATUS, edge.getViaStatus()); + } + if (edge.getLifecycleOwnerStatus() != null) { + searchDocument.put(EDGE_FIELD_LIFECYCLE_OWNER_STATUS, edge.getLifecycleOwnerStatus()); + } + if (edge.getSourceStatus() != null) { + searchDocument.put(EDGE_SOURCE_STATUS, edge.getSourceStatus()); + } + if (edge.getDestinationStatus() != null) { + searchDocument.put(EDGE_DESTINATION_STATUS, edge.getDestinationStatus()); + } log.debug("Search doc for write {}", searchDocument); return searchDocument.toString(); @@ -142,8 +160,10 @@ public void removeEdge(@Nonnull final Edge edge) { _graphWriteDAO.deleteDocument(docId); } + @Override @Nonnull public RelatedEntitiesResult findRelatedEntities( + @Nonnull final OperationContext opContext, @Nullable final List sourceTypes, @Nonnull final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -161,6 +181,7 @@ public RelatedEntitiesResult findRelatedEntities( SearchResponse response = _graphReadDAO.getSearchResponse( + opContext, sourceTypes, sourceEntityFilter, destinationTypes, @@ -188,35 +209,16 @@ public RelatedEntitiesResult findRelatedEntities( @Override @Deprecated public EntityLineageResult getLineage( + @Nonnull final OperationContext opContext, @Nonnull Urn entityUrn, @Nonnull LineageDirection direction, GraphFilters graphFilters, int offset, int count, int maxHops) { - ESGraphQueryDAO.LineageResponse lineageResponse = - _graphReadDAO.getLineage(entityUrn, direction, graphFilters, offset, count, maxHops, null); - return new EntityLineageResult() - .setRelationships(new LineageRelationshipArray(lineageResponse.getLineageRelationships())) - .setStart(offset) - .setCount(count) - .setTotal(lineageResponse.getTotal()); - } - - @Nonnull - @WithSpan - @Override - public EntityLineageResult getLineage( - @Nonnull Urn entityUrn, - @Nonnull LineageDirection direction, - GraphFilters graphFilters, - int offset, - int count, - int maxHops, - @Nullable LineageFlags lineageFlags) { ESGraphQueryDAO.LineageResponse lineageResponse = _graphReadDAO.getLineage( - entityUrn, direction, graphFilters, offset, count, maxHops, lineageFlags); + opContext, entityUrn, direction, graphFilters, offset, count, maxHops); return new EntityLineageResult() .setRelationships(new LineageRelationshipArray(lineageResponse.getLineageRelationships())) .setStart(offset) @@ -224,13 +226,14 @@ public EntityLineageResult getLineage( .setTotal(lineageResponse.getTotal()); } - private Filter createUrnFilter(@Nonnull final Urn urn) { + private static Filter createUrnFilter(@Nonnull final Urn urn) { Filter filter = new Filter(); CriterionArray criterionArray = new CriterionArray(); Criterion criterion = new Criterion(); criterion.setCondition(Condition.EQUAL); criterion.setField("urn"); criterion.setValue(urn.toString()); + criterion.setValues(new StringArray(urn.toString())); criterionArray.add(criterion); filter.setOr( new ConjunctiveCriterionArray( @@ -239,7 +242,7 @@ private Filter createUrnFilter(@Nonnull final Urn urn) { return filter; } - public void removeNode(@Nonnull final Urn urn) { + public void removeNode(@Nonnull final OperationContext opContext, @Nonnull final Urn urn) { Filter urnFilter = createUrnFilter(urn); Filter emptyFilter = new Filter().setOr(new ConjunctiveCriterionArray()); List relationshipTypes = new ArrayList<>(); @@ -250,19 +253,47 @@ public void removeNode(@Nonnull final Urn urn) { new RelationshipFilter().setDirection(RelationshipDirection.INCOMING); _graphWriteDAO.deleteByQuery( - null, urnFilter, null, emptyFilter, relationshipTypes, outgoingFilter); + opContext, null, urnFilter, null, emptyFilter, relationshipTypes, outgoingFilter); _graphWriteDAO.deleteByQuery( - null, urnFilter, null, emptyFilter, relationshipTypes, incomingFilter); + opContext, null, urnFilter, null, emptyFilter, relationshipTypes, incomingFilter); // Delete all edges where this entity is a lifecycle owner _graphWriteDAO.deleteByQuery( - null, emptyFilter, null, emptyFilter, relationshipTypes, incomingFilter, urn.toString()); + opContext, + null, + emptyFilter, + null, + emptyFilter, + relationshipTypes, + incomingFilter, + urn.toString()); + } - return; + @Override + public void setEdgeStatus( + @Nonnull Urn urn, boolean removed, @Nonnull EdgeUrnType... edgeUrnTypes) { + + for (EdgeUrnType edgeUrnType : edgeUrnTypes) { + // Update the graph status fields per urn type which do not match target state + QueryBuilder negativeQuery = getUrnStatusQuery(edgeUrnType, urn, !removed); + + // Set up the script to update the boolean field + String scriptContent = + "ctx._source." + getUrnStatusFieldName(edgeUrnType) + " = params.newValue"; + Script script = + new Script( + ScriptType.INLINE, + "painless", + scriptContent, + Collections.singletonMap("newValue", removed)); + + _graphWriteDAO.updateByQuery(script, negativeQuery); + } } public void removeEdgesFromNode( + @Nonnull final OperationContext opContext, @Nonnull final Urn urn, @Nonnull final List relationshipTypes, @Nonnull final RelationshipFilter relationshipFilter) { @@ -271,7 +302,7 @@ public void removeEdgesFromNode( Filter emptyFilter = new Filter().setOr(new ConjunctiveCriterionArray()); _graphWriteDAO.deleteByQuery( - null, urnFilter, null, emptyFilter, relationshipTypes, relationshipFilter); + opContext, null, urnFilter, null, emptyFilter, relationshipTypes, relationshipFilter); } @Override @@ -308,8 +339,8 @@ public boolean supportsMultiHop() { } @Nonnull - @Override public RelatedEntitiesScrollResult scrollRelatedEntities( + @Nonnull final OperationContext opContext, @Nullable List sourceTypes, @Nullable Filter sourceEntityFilter, @Nullable List destinationTypes, @@ -326,6 +357,7 @@ public RelatedEntitiesScrollResult scrollRelatedEntities( SearchResponse response = _graphReadDAO.getSearchResponse( + opContext, sourceTypes, sourceEntityFilter, destinationTypes, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphFilterUtils.java similarity index 67% rename from metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java rename to metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphFilterUtils.java index 7ee84ce834cfab..982bcae9b5fd96 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphFilterUtils.java @@ -1,14 +1,81 @@ package com.linkedin.metadata.graph.elastic; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_DESTINATION_STATUS; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_DESTINATION_URN_FIELD; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_FIELD_LIFECYCLE_OWNER; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_FIELD_LIFECYCLE_OWNER_STATUS; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_FIELD_VIA; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_FIELD_VIA_STATUS; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_SOURCE_STATUS; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_SOURCE_URN_FIELD; import static com.linkedin.metadata.graph.elastic.ESGraphQueryDAO.*; +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; +import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; @Slf4j -public class TimeFilterUtils { +public class GraphFilterUtils { + + public static QueryBuilder getUrnStatusQuery( + @Nonnull EdgeUrnType edgeUrnType, @Nonnull final Urn urn, @Nonnull Boolean removed) { + + final String urnField = getUrnFieldName(edgeUrnType); + final String statusField = getUrnStatusFieldName(edgeUrnType); + + // Create a BoolQueryBuilder + BoolQueryBuilder finalQuery = QueryBuilders.boolQuery(); + + // urn filter + finalQuery.filter(QueryBuilders.termQuery(urnField, urn.toString())); + + // status filter + if (removed) { + finalQuery.filter(QueryBuilders.termQuery(statusField, removed.toString())); + } else { + finalQuery.minimumShouldMatch(1); + finalQuery.should(QueryBuilders.termQuery(statusField, removed.toString())); + finalQuery.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(statusField))); + } + + return finalQuery; + } + + public static String getUrnStatusFieldName(EdgeUrnType edgeUrnType) { + switch (edgeUrnType) { + case SOURCE: + return EDGE_SOURCE_STATUS; + case DESTINATION: + return EDGE_DESTINATION_STATUS; + case VIA: + return EDGE_FIELD_VIA_STATUS; + case LIFECYCLE_OWNER: + return EDGE_FIELD_LIFECYCLE_OWNER_STATUS; + default: + throw new IllegalStateException( + String.format("Unhandled EdgeUrnType. Found: %s", edgeUrnType)); + } + } + + public static String getUrnFieldName(EdgeUrnType edgeUrnType) { + switch (edgeUrnType) { + case SOURCE: + return EDGE_SOURCE_URN_FIELD; + case DESTINATION: + return EDGE_DESTINATION_URN_FIELD; + case VIA: + return EDGE_FIELD_VIA; + case LIFECYCLE_OWNER: + return EDGE_FIELD_LIFECYCLE_OWNER; + default: + throw new IllegalStateException( + String.format("Unhandled EdgeUrnType. Found: %s", edgeUrnType)); + } + } /** * In order to filter for edges that fall into a specific filter window, we perform a @@ -141,5 +208,5 @@ private static QueryBuilder buildManualLineageFilter() { return QueryBuilders.termQuery(String.format("%s.%s", PROPERTIES, SOURCE), UI); } - private TimeFilterUtils() {} + private GraphFilterUtils() {} } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphRelationshipMappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphRelationshipMappingsBuilder.java index 7a6c7701fde5f2..164bf3ad17d8cd 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphRelationshipMappingsBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphRelationshipMappingsBuilder.java @@ -20,6 +20,8 @@ public static Map getMappings() { mappings.put(EDGE_FIELD_PROPERTIES, getMappingsForEdgeProperties()); mappings.put(EDGE_FIELD_LIFECYCLE_OWNER, getMappingsForKeyword()); mappings.put(EDGE_FIELD_VIA, getMappingsForKeyword()); + mappings.put(EDGE_FIELD_LIFECYCLE_OWNER_STATUS, getMappingsForBoolean()); + mappings.put(EDGE_FIELD_VIA_STATUS, getMappingsForBoolean()); return ImmutableMap.of("properties", mappings); } @@ -27,12 +29,17 @@ private static Map getMappingsForKeyword() { return ImmutableMap.builder().put("type", "keyword").build(); } + private static Map getMappingsForBoolean() { + return ImmutableMap.builder().put("type", "boolean").build(); + } + private static Map getMappingsForEntity() { Map mappings = ImmutableMap.builder() .put("urn", getMappingsForKeyword()) .put("entityType", getMappingsForKeyword()) + .put("removed", getMappingsForBoolean()) .build(); return ImmutableMap.of("properties", mappings); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java index 9fe9c242fe48c7..75d993f52680a7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java @@ -32,7 +32,7 @@ import com.linkedin.metadata.search.elasticsearch.query.request.SearchAfterWrapper; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.util.Pair; -import io.opentelemetry.extension.annotations.WithSpan; +import io.datahubproject.metadata.context.OperationContext; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collections; @@ -250,33 +250,24 @@ public void removeEdge(final Edge edge) { } @Nonnull - @WithSpan @Override public EntityLineageResult getLineage( + @Nonnull final OperationContext opContext, @Nonnull Urn entityUrn, @Nonnull LineageDirection direction, GraphFilters graphFilters, int offset, int count, int maxHops) { - return getLineage(entityUrn, direction, graphFilters, offset, count, maxHops, null); - } - - @Nonnull - @Override - public EntityLineageResult getLineage( - @Nonnull Urn entityUrn, - @Nonnull LineageDirection direction, - GraphFilters graphFilters, - int offset, - int count, - int maxHops, - @Nullable LineageFlags lineageFlags) { log.debug(String.format("Neo4j getLineage maxHops = %d", maxHops)); final var statementAndParams = generateLineageStatementAndParameters( - entityUrn, direction, graphFilters, maxHops, lineageFlags); + entityUrn, + direction, + graphFilters, + maxHops, + opContext.getSearchContext().getLineageFlags()); final var statement = statementAndParams.getFirst(); final var parameters = statementAndParams.getSecond(); @@ -457,6 +448,7 @@ private Pair> generateLineageStatementAndParameters( @Nonnull public RelatedEntitiesResult findRelatedEntities( + @Nonnull final OperationContext opContext, @Nullable final List sourceTypes, @Nonnull final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -600,7 +592,7 @@ private String computeEntityTypeWhereClause( return whereClause; } - public void removeNode(@Nonnull final Urn urn) { + public void removeNode(@Nonnull final OperationContext opContext, @Nonnull final Urn urn) { log.debug(String.format("Removing Neo4j node with urn: %s", urn)); final String srcNodeLabel = urn.getEntityType(); @@ -627,6 +619,7 @@ public void removeNode(@Nonnull final Urn urn) { * @param relationshipFilter Query relationship filter */ public void removeEdgesFromNode( + @Nonnull final OperationContext opContext, @Nonnull final Urn urn, @Nonnull final List relationshipTypes, @Nonnull final RelationshipFilter relationshipFilter) { @@ -915,6 +908,7 @@ private boolean isSourceDestReversed( @Nonnull @Override public RelatedEntitiesScrollResult scrollRelatedEntities( + @Nonnull OperationContext opContext, @Nullable List sourceTypes, @Nonnull Filter sourceEntityFilter, @Nullable List destinationTypes, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index 435731a3f9d041..ec9c44e42f7f43 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -173,13 +173,7 @@ public LineageSearchResult searchAcrossLineage( if (cachedLineageResult == null || finalOpContext.getSearchContext().getSearchFlags().isSkipCache()) { lineageResult = - _graphService.getLineage( - sourceUrn, - direction, - 0, - MAX_RELATIONSHIPS, - maxHops, - opContext.getSearchContext().getLineageFlags()); + _graphService.getLineage(opContext, sourceUrn, direction, 0, MAX_RELATIONSHIPS, maxHops); if (cacheEnabled) { try { cache.put( @@ -210,12 +204,7 @@ public LineageSearchResult searchAcrossLineage( // we have to refetch EntityLineageResult result = _graphService.getLineage( - sourceUrn, - direction, - 0, - MAX_RELATIONSHIPS, - finalMaxHops, - opContext.getSearchContext().getLineageFlags()); + opContext, sourceUrn, direction, 0, MAX_RELATIONSHIPS, finalMaxHops); cache.put(cacheKey, result); log.debug("Refilled Cached lineage entry for: {}.", sourceUrn); } else { @@ -770,13 +759,7 @@ public LineageScrollResult scrollAcrossLineage( if (cachedLineageResult == null) { maxHops = maxHops != null ? maxHops : 1000; lineageResult = - _graphService.getLineage( - sourceUrn, - direction, - 0, - MAX_RELATIONSHIPS, - maxHops, - opContext.getSearchContext().getLineageFlags()); + _graphService.getLineage(opContext, sourceUrn, direction, 0, MAX_RELATIONSHIPS, maxHops); if (cacheEnabled) { cache.put( cacheKey, new CachedEntityLineageResult(lineageResult, System.currentTimeMillis())); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java index 2d04e997740502..6de79b6c4b181e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java @@ -87,6 +87,8 @@ public class ESIndexBuilder { @Getter private final GitVersion gitVersion; + @Getter private final int maxReindexHours; + private static final RequestOptions REQUEST_OPTIONS = RequestOptions.DEFAULT.toBuilder() .setRequestConfig(RequestConfig.custom().setSocketTimeout(180 * 1000).build()) @@ -106,6 +108,34 @@ public ESIndexBuilder( boolean enableStructuredPropertiesReindex, ElasticSearchConfiguration elasticSearchConfiguration, GitVersion gitVersion) { + this( + searchClient, + numShards, + numReplicas, + numRetries, + refreshIntervalSeconds, + indexSettingOverrides, + enableIndexSettingsReindex, + enableIndexMappingsReindex, + enableStructuredPropertiesReindex, + elasticSearchConfiguration, + gitVersion, + 0); + } + + public ESIndexBuilder( + RestHighLevelClient searchClient, + int numShards, + int numReplicas, + int numRetries, + int refreshIntervalSeconds, + Map> indexSettingOverrides, + boolean enableIndexSettingsReindex, + boolean enableIndexMappingsReindex, + boolean enableStructuredPropertiesReindex, + ElasticSearchConfiguration elasticSearchConfiguration, + GitVersion gitVersion, + int maxReindexHours) { this._searchClient = searchClient; this.numShards = numShards; this.numReplicas = numReplicas; @@ -117,6 +147,7 @@ public ESIndexBuilder( this.elasticSearchConfiguration = elasticSearchConfiguration; this.enableStructuredPropertiesReindex = enableStructuredPropertiesReindex; this.gitVersion = gitVersion; + this.maxReindexHours = maxReindexHours; RetryConfig config = RetryConfig.custom() @@ -348,10 +379,10 @@ private static String getNextIndexName(String base, long startTime) { private void reindex(ReindexConfig indexState) throws Throwable { final long startTime = System.currentTimeMillis(); - final int maxReindexHours = 8; final long initialCheckIntervalMilli = 1000; final long finalCheckIntervalMilli = 60000; - final long timeoutAt = startTime + (1000 * 60 * 60 * maxReindexHours); + final long timeoutAt = + maxReindexHours > 0 ? startTime + (1000L * 60 * 60 * maxReindexHours) : Long.MAX_VALUE; String tempIndexName = getNextIndexName(indexState.name(), startTime); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java index fc29aca4117845..63a9c731a2d396 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java @@ -23,6 +23,8 @@ import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.reindex.BulkByScrollResponse; import org.opensearch.index.reindex.DeleteByQueryRequest; +import org.opensearch.index.reindex.UpdateByQueryRequest; +import org.opensearch.script.Script; @Slf4j @Builder(builderMethodName = "hiddenBuilder") @@ -30,6 +32,7 @@ public class ESBulkProcessor implements Closeable { private static final String ES_WRITES_METRIC = "num_elasticSearch_writes"; private static final String ES_BATCHES_METRIC = "num_elasticSearch_batches_submitted"; private static final String ES_DELETE_EXCEPTION_METRIC = "delete_by_query"; + private static final String ES_UPDATE_EXCEPTION_METRIC = "update_by_query"; private static final String ES_SUBMIT_DELETE_EXCEPTION_METRIC = "submit_delete_by_query_task"; private static final String ES_SUBMIT_REINDEX_METRIC = "reindex_submit"; private static final String ES_REINDEX_SUCCESS_METRIC = "reindex_success"; @@ -97,6 +100,26 @@ public Optional deleteByQuery( return deleteByQuery(queryBuilder, refresh, bulkRequestsLimit, defaultTimeout, indices); } + public Optional updateByQuery( + Script script, QueryBuilder queryBuilder, String... indices) { + // Create an UpdateByQueryRequest + UpdateByQueryRequest updateByQuery = new UpdateByQueryRequest(indices); + updateByQuery.setQuery(queryBuilder); + updateByQuery.setScript(script); + + try { + final BulkByScrollResponse updateResponse = + searchClient.updateByQuery(updateByQuery, RequestOptions.DEFAULT); + MetricUtils.counter(this.getClass(), ES_WRITES_METRIC).inc(updateResponse.getTotal()); + return Optional.of(updateResponse); + } catch (Exception e) { + log.error("ERROR: Failed to update by query. See stacktrace for a more detailed error:", e); + MetricUtils.exceptionCounter(ESBulkProcessor.class, ES_UPDATE_EXCEPTION_METRIC, e); + } + + return Optional.empty(); + } + public Optional deleteByQuery( QueryBuilder queryBuilder, boolean refresh, int limit, TimeValue timeout, String... indices) { DeleteByQueryRequest deleteByQueryRequest = diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java index 3d31b552db0bb6..b6d9357ecd65e8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java @@ -389,13 +389,9 @@ private Optional getNodeForValue( // By default run toString default: String value = fieldValue.toString(); - // If index type is BROWSE_PATH, make sure the value starts with a slash - if (fieldType == FieldType.BROWSE_PATH && !value.startsWith("/")) { - value = "/" + value; - } return value.isEmpty() ? Optional.empty() - : Optional.of(JsonNodeFactory.instance.textNode(fieldValue.toString())); + : Optional.of(JsonNodeFactory.instance.textNode(value)); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java new file mode 100644 index 00000000000000..7549aea2007da5 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java @@ -0,0 +1,452 @@ +package com.linkedin.metadata.service; + +import static com.linkedin.metadata.Constants.FORCE_INDEXING_KEY; +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; +import static com.linkedin.metadata.search.utils.QueryUtils.createRelationshipFilter; +import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.InputField; +import com.linkedin.common.InputFields; +import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.datajob.DataJobInputOutput; +import com.linkedin.dataset.FineGrainedLineage; +import com.linkedin.dataset.FineGrainedLineageArray; +import com.linkedin.dataset.UpstreamLineage; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.models.graph.Edge; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; +import com.linkedin.metadata.entity.SearchIndicesService; +import com.linkedin.metadata.entity.ebean.batch.MCLItemImpl; +import com.linkedin.metadata.graph.GraphIndexUtils; +import com.linkedin.metadata.graph.GraphService; +import com.linkedin.metadata.graph.dgraph.DgraphGraphService; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.RelationshipFieldSpec; +import com.linkedin.metadata.models.extractor.FieldExtractor; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.metadata.utils.SchemaFieldUtils; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Getter; +import lombok.Setter; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class UpdateGraphIndicesService implements SearchIndicesService { + private static final String DOWNSTREAM_OF = "DownstreamOf"; + + public static UpdateGraphIndicesService withService(GraphService graphService) { + return new UpdateGraphIndicesService(graphService); + } + + private final GraphService graphService; + + @Getter private final boolean graphStatusEnabled; + + @Getter @Setter @VisibleForTesting private boolean graphDiffMode; + + private static final Set UPDATE_CHANGE_TYPES = + ImmutableSet.of( + ChangeType.CREATE, + ChangeType.CREATE_ENTITY, + ChangeType.UPSERT, + ChangeType.RESTATE, + ChangeType.PATCH); + + public UpdateGraphIndicesService(GraphService graphService) { + this(graphService, true, true); + } + + public UpdateGraphIndicesService( + GraphService graphService, boolean graphDiffMode, boolean graphStatusEnabled) { + this.graphService = graphService; + this.graphDiffMode = graphDiffMode; + this.graphStatusEnabled = graphStatusEnabled; + } + + @Override + public void handleChangeEvent( + @Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) { + try { + MCLItemImpl mclItem = + MCLItemImpl.builder().build(event, opContext.getAspectRetrieverOpt().get()); + + if (UPDATE_CHANGE_TYPES.contains(event.getChangeType())) { + handleUpdateChangeEvent(opContext, mclItem); + + if (graphStatusEnabled && mclItem.getAspectName().equals(STATUS_ASPECT_NAME)) { + handleStatusUpdateChangeEvent(opContext, mclItem); + } + } else if (event.getChangeType() == ChangeType.DELETE) { + handleDeleteChangeEvent(opContext, mclItem); + + if (graphStatusEnabled && mclItem.getAspectName().equals(STATUS_ASPECT_NAME)) { + handleStatusUpdateChangeEvent(opContext, mclItem); + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private void handleStatusUpdateChangeEvent( + @Nonnull final OperationContext opContext, @Nonnull final MCLItem item) { + final Boolean removed; + if (ChangeType.DELETE.equals(item.getChangeType())) { + removed = false; + } else if (ChangeType.RESTATE.equals(item.getChangeType()) + || item.getPreviousRecordTemplate() == null + || !item.getPreviousAspect(Status.class).equals(item.getAspect(Status.class))) { + removed = item.getAspect(Status.class).isRemoved(); + } else { + removed = null; + } + + if (removed != null) { + graphService.setEdgeStatus(item.getUrn(), removed, EdgeUrnType.values()); + } + } + + /** + * This very important method processes {@link MetadataChangeLog} events that represent changes to + * the Metadata Graph. + * + *

In particular, it handles updating the Search, Graph, Timeseries, and System Metadata stores + * in response to a given change type to reflect the changes present in the new aspect. + * + * @param event the change event to be processed. + */ + private void handleUpdateChangeEvent( + @Nonnull final OperationContext opContext, @Nonnull final MCLItem event) throws IOException { + + final AspectSpec aspectSpec = event.getAspectSpec(); + final Urn urn = event.getUrn(); + + RecordTemplate aspect = event.getRecordTemplate(); + RecordTemplate previousAspect = event.getPreviousRecordTemplate(); + + // For all aspects, attempt to update Graph + SystemMetadata systemMetadata = event.getSystemMetadata(); + if (graphDiffMode + && !(graphService instanceof DgraphGraphService) + && (systemMetadata == null + || systemMetadata.getProperties() == null + || !Boolean.parseBoolean(systemMetadata.getProperties().get(FORCE_INDEXING_KEY)))) { + updateGraphServiceDiff(urn, aspectSpec, previousAspect, aspect, event.getMetadataChangeLog()); + } else { + updateGraphService(opContext, urn, aspectSpec, aspect, event.getMetadataChangeLog()); + } + } + + /** + * This very important method processes {@link MetadataChangeLog} deletion events to cleanup the + * Metadata Graph when an aspect or entity is removed. + * + *

In particular, it handles updating the Search, Graph, Timeseries, and System Metadata stores + * to reflect the deletion of a particular aspect. + * + *

Note that if an entity's key aspect is deleted, the entire entity will be purged from + * search, graph, timeseries, etc. + * + * @param event the change event to be processed. + */ + private void handleDeleteChangeEvent( + @Nonnull final OperationContext opContext, @Nonnull final MCLItem event) { + + final EntitySpec entitySpec = event.getEntitySpec(); + final Urn urn = event.getUrn(); + + AspectSpec aspectSpec = entitySpec.getAspectSpec(event.getAspectName()); + if (aspectSpec == null) { + throw new RuntimeException( + String.format( + "Failed to retrieve Aspect Spec for entity with name %s, aspect with name %s. Cannot update indices for MCL.", + urn.getEntityType(), event.getAspectName())); + } + + RecordTemplate aspect = event.getRecordTemplate(); + Boolean isDeletingKey = event.getAspectName().equals(entitySpec.getKeyAspectName()); + + if (!aspectSpec.isTimeseries()) { + deleteGraphData( + opContext, urn, aspectSpec, aspect, isDeletingKey, event.getMetadataChangeLog()); + } + } + + // TODO: remove this method once we implement sourceOverride when creating graph edges + private void updateFineGrainedEdgesAndRelationships( + Urn entity, + FineGrainedLineageArray fineGrainedLineageArray, + List edgesToAdd, + HashMap> urnToRelationshipTypesBeingAdded) { + if (fineGrainedLineageArray != null) { + for (FineGrainedLineage fineGrainedLineage : fineGrainedLineageArray) { + if (!fineGrainedLineage.hasDownstreams() || !fineGrainedLineage.hasUpstreams()) { + break; + } + // Fine grained lineage array is present either on datajob (datajob input/output) or dataset + // We set the datajob as the viaEntity in scenario 1, and the query (if present) as the + // viaEntity in scenario 2 + Urn viaEntity = + entity.getEntityType().equals("dataJob") ? entity : fineGrainedLineage.getQuery(); + // for every downstream, create an edge with each of the upstreams + for (Urn downstream : fineGrainedLineage.getDownstreams()) { + for (Urn upstream : fineGrainedLineage.getUpstreams()) { + // TODO: add edges uniformly across aspects + edgesToAdd.add( + new Edge( + downstream, + upstream, + DOWNSTREAM_OF, + null, + null, + null, + null, + null, + entity, + viaEntity)); + Set relationshipTypes = + urnToRelationshipTypesBeingAdded.getOrDefault(downstream, new HashSet<>()); + relationshipTypes.add(DOWNSTREAM_OF); + urnToRelationshipTypesBeingAdded.put(downstream, relationshipTypes); + } + } + } + } + } + + // TODO: remove this method once we implement sourceOverride and update inputFields aspect + private void updateInputFieldEdgesAndRelationships( + @Nonnull final Urn urn, + @Nonnull final InputFields inputFields, + @Nonnull final List edgesToAdd, + @Nonnull final HashMap> urnToRelationshipTypesBeingAdded) { + if (inputFields.hasFields()) { + for (final InputField field : inputFields.getFields()) { + if (field.hasSchemaFieldUrn() + && field.hasSchemaField() + && field.getSchemaField().hasFieldPath()) { + final Urn sourceFieldUrn = + SchemaFieldUtils.generateSchemaFieldUrn(urn, field.getSchemaField().getFieldPath()); + // TODO: add edges uniformly across aspects + edgesToAdd.add( + new Edge( + sourceFieldUrn, + field.getSchemaFieldUrn(), + DOWNSTREAM_OF, + null, + null, + null, + null, + null)); + final Set relationshipTypes = + urnToRelationshipTypesBeingAdded.getOrDefault(sourceFieldUrn, new HashSet<>()); + relationshipTypes.add(DOWNSTREAM_OF); + urnToRelationshipTypesBeingAdded.put(sourceFieldUrn, relationshipTypes); + } + } + } + } + + private Pair, HashMap>> getEdgesAndRelationshipTypesFromAspect( + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nonnull final RecordTemplate aspect, + @Nonnull final MetadataChangeLog event, + final boolean isNewAspectVersion) { + final List edgesToAdd = new ArrayList<>(); + final HashMap> urnToRelationshipTypesBeingAdded = new HashMap<>(); + + // we need to manually set schemaField <-> schemaField edges for fineGrainedLineage and + // inputFields + // since @Relationship only links between the parent entity urn and something else. + if (aspectSpec.getName().equals(Constants.UPSTREAM_LINEAGE_ASPECT_NAME)) { + UpstreamLineage upstreamLineage = new UpstreamLineage(aspect.data()); + updateFineGrainedEdgesAndRelationships( + urn, + upstreamLineage.getFineGrainedLineages(), + edgesToAdd, + urnToRelationshipTypesBeingAdded); + } else if (aspectSpec.getName().equals(Constants.INPUT_FIELDS_ASPECT_NAME)) { + final InputFields inputFields = new InputFields(aspect.data()); + updateInputFieldEdgesAndRelationships( + urn, inputFields, edgesToAdd, urnToRelationshipTypesBeingAdded); + } else if (aspectSpec.getName().equals(Constants.DATA_JOB_INPUT_OUTPUT_ASPECT_NAME)) { + DataJobInputOutput dataJobInputOutput = new DataJobInputOutput(aspect.data()); + updateFineGrainedEdgesAndRelationships( + urn, + dataJobInputOutput.getFineGrainedLineages(), + edgesToAdd, + urnToRelationshipTypesBeingAdded); + } + + Map> extractedFields = + FieldExtractor.extractFields(aspect, aspectSpec.getRelationshipFieldSpecs()); + + for (Map.Entry> entry : extractedFields.entrySet()) { + Set relationshipTypes = + urnToRelationshipTypesBeingAdded.getOrDefault(urn, new HashSet<>()); + relationshipTypes.add(entry.getKey().getRelationshipName()); + urnToRelationshipTypesBeingAdded.put(urn, relationshipTypes); + final List newEdges = + GraphIndexUtils.extractGraphEdges(entry, aspect, urn, event, isNewAspectVersion); + edgesToAdd.addAll(newEdges); + } + return Pair.of(edgesToAdd, urnToRelationshipTypesBeingAdded); + } + + /** Process snapshot and update graph index */ + private void updateGraphService( + @Nonnull final OperationContext opContext, + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nonnull final RecordTemplate aspect, + @Nonnull final MetadataChangeLog event) { + Pair, HashMap>> edgeAndRelationTypes = + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event, true); + + final List edgesToAdd = edgeAndRelationTypes.getFirst(); + final HashMap> urnToRelationshipTypesBeingAdded = + edgeAndRelationTypes.getSecond(); + + log.debug("Here's the relationship types found {}", urnToRelationshipTypesBeingAdded); + if (!urnToRelationshipTypesBeingAdded.isEmpty()) { + for (Map.Entry> entry : urnToRelationshipTypesBeingAdded.entrySet()) { + graphService.removeEdgesFromNode( + opContext, + entry.getKey(), + new ArrayList<>(entry.getValue()), + newRelationshipFilter( + new Filter().setOr(new ConjunctiveCriterionArray()), + RelationshipDirection.OUTGOING)); + } + edgesToAdd.forEach(graphService::addEdge); + } + } + + private void updateGraphServiceDiff( + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nullable final RecordTemplate oldAspect, + @Nonnull final RecordTemplate newAspect, + @Nonnull final MetadataChangeLog event) { + Pair, HashMap>> oldEdgeAndRelationTypes = null; + if (oldAspect != null) { + oldEdgeAndRelationTypes = + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, oldAspect, event, false); + } + + final List oldEdges = + oldEdgeAndRelationTypes != null + ? oldEdgeAndRelationTypes.getFirst() + : Collections.emptyList(); + final Set oldEdgeSet = new HashSet<>(oldEdges); + + Pair, HashMap>> newEdgeAndRelationTypes = + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, newAspect, event, true); + + final List newEdges = newEdgeAndRelationTypes.getFirst(); + final Set newEdgeSet = new HashSet<>(newEdges); + + // Edges to add + final List additiveDifference = + newEdgeSet.stream().filter(edge -> !oldEdgeSet.contains(edge)).collect(Collectors.toList()); + + // Edges to remove + final List subtractiveDifference = + oldEdgeSet.stream().filter(edge -> !newEdgeSet.contains(edge)).collect(Collectors.toList()); + + // Edges to update + final List mergedEdges = getMergedEdges(oldEdgeSet, newEdgeSet); + + // Remove any old edges that no longer exist first + if (subtractiveDifference.size() > 0) { + log.debug("Removing edges: {}", subtractiveDifference); + subtractiveDifference.forEach(graphService::removeEdge); + } + + // Then add new edges + if (additiveDifference.size() > 0) { + log.debug("Adding edges: {}", additiveDifference); + additiveDifference.forEach(graphService::addEdge); + } + + // Then update existing edges + if (mergedEdges.size() > 0) { + log.debug("Updating edges: {}", mergedEdges); + mergedEdges.forEach(graphService::upsertEdge); + } + } + + private static List getMergedEdges(final Set oldEdgeSet, final Set newEdgeSet) { + final Map oldEdgesMap = + oldEdgeSet.stream() + .map(edge -> Pair.of(edge.hashCode(), edge)) + .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)); + + final List mergedEdges = new ArrayList<>(); + if (!oldEdgesMap.isEmpty()) { + for (Edge newEdge : newEdgeSet) { + if (oldEdgesMap.containsKey(newEdge.hashCode())) { + final Edge oldEdge = oldEdgesMap.get(newEdge.hashCode()); + final Edge mergedEdge = GraphIndexUtils.mergeEdges(oldEdge, newEdge); + mergedEdges.add(mergedEdge); + } + } + } + + return mergedEdges; + } + + private void deleteGraphData( + @Nonnull final OperationContext opContext, + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nonnull final RecordTemplate aspect, + @Nonnull final Boolean isKeyAspect, + @Nonnull final MetadataChangeLog event) { + if (isKeyAspect) { + graphService.removeNode(opContext, urn); + return; + } + + Pair, HashMap>> edgeAndRelationTypes = + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event, true); + + final HashMap> urnToRelationshipTypesBeingAdded = + edgeAndRelationTypes.getSecond(); + if (urnToRelationshipTypesBeingAdded.size() > 0) { + for (Map.Entry> entry : urnToRelationshipTypesBeingAdded.entrySet()) { + graphService.removeEdgesFromNode( + opContext, + entry.getKey(), + new ArrayList<>(entry.getValue()), + createRelationshipFilter( + new Filter().setOr(new ConjunctiveCriterionArray()), + RelationshipDirection.OUTGOING)); + } + } + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index 2274b0a7c1cd82..3795fd19316b19 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -2,59 +2,37 @@ import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.search.transformer.SearchDocumentTransformer.withSystemCreated; -import static com.linkedin.metadata.search.utils.QueryUtils.*; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; -import com.linkedin.common.InputField; -import com.linkedin.common.InputFields; import com.linkedin.common.Status; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.datajob.DataJobInputOutput; -import com.linkedin.dataset.FineGrainedLineage; -import com.linkedin.dataset.FineGrainedLineageArray; -import com.linkedin.dataset.UpstreamLineage; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.MCLItem; -import com.linkedin.metadata.aspect.models.graph.Edge; import com.linkedin.metadata.entity.SearchIndicesService; import com.linkedin.metadata.entity.ebean.batch.MCLItemImpl; -import com.linkedin.metadata.graph.GraphIndexUtils; -import com.linkedin.metadata.graph.GraphService; -import com.linkedin.metadata.graph.dgraph.DgraphGraphService; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.metadata.models.RelationshipFieldSpec; -import com.linkedin.metadata.models.extractor.FieldExtractor; -import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Filter; -import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders; import com.linkedin.metadata.search.transformer.SearchDocumentTransformer; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.timeseries.transformer.TimeseriesAspectTransformer; -import com.linkedin.metadata.utils.SchemaFieldUtils; import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.SystemMetadata; import com.linkedin.structured.StructuredPropertyDefinition; -import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -63,33 +41,25 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; -import javax.annotation.Nullable; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Value; @Slf4j public class UpdateIndicesService implements SearchIndicesService { - private static final String DOWNSTREAM_OF = "DownstreamOf"; - - private final GraphService _graphService; - private final EntitySearchService _entitySearchService; - private final TimeseriesAspectService _timeseriesAspectService; - private final SystemMetadataService _systemMetadataService; - private final SearchDocumentTransformer _searchDocumentTransformer; - private final EntityIndexBuilders _entityIndexBuilders; - @Nonnull private final String idHashAlgo; - @Value("${featureFlags.graphServiceDiffModeEnabled:true}") - private boolean _graphDiffMode; + @VisibleForTesting @Getter private final UpdateGraphIndicesService updateGraphIndicesService; + private final EntitySearchService entitySearchService; + private final TimeseriesAspectService timeseriesAspectService; + private final SystemMetadataService systemMetadataService; + private final SearchDocumentTransformer searchDocumentTransformer; + private final EntityIndexBuilders entityIndexBuilders; + @Nonnull private final String idHashAlgo; - @Value("${featureFlags.searchServiceDiffModeEnabled:true}") - private boolean _searchDiffMode; + @Getter private final boolean searchDiffMode; - @Value("${structuredProperties.enabled}") - private boolean _structuredPropertiesHookEnabled; + @Getter private final boolean structuredPropertiesHookEnabled; - @Value("${structuredProperties.writeEnabled}") - private boolean _structuredPropertiesWriteEnabled; + @Getter private final boolean structuredPropertiesWriteEnabled; private static final Set UPDATE_CHANGE_TYPES = ImmutableSet.of( @@ -99,31 +69,48 @@ public class UpdateIndicesService implements SearchIndicesService { ChangeType.RESTATE, ChangeType.PATCH); - @VisibleForTesting - public void setGraphDiffMode(boolean graphDiffMode) { - _graphDiffMode = graphDiffMode; - } - - @VisibleForTesting - public void setSearchDiffMode(boolean searchDiffMode) { - _searchDiffMode = searchDiffMode; + public UpdateIndicesService( + UpdateGraphIndicesService updateGraphIndicesService, + EntitySearchService entitySearchService, + TimeseriesAspectService timeseriesAspectService, + SystemMetadataService systemMetadataService, + SearchDocumentTransformer searchDocumentTransformer, + EntityIndexBuilders entityIndexBuilders, + @Nonnull String idHashAlgo) { + this( + updateGraphIndicesService, + entitySearchService, + timeseriesAspectService, + systemMetadataService, + searchDocumentTransformer, + entityIndexBuilders, + idHashAlgo, + true, + true, + true); } public UpdateIndicesService( - GraphService graphService, + UpdateGraphIndicesService updateGraphIndicesService, EntitySearchService entitySearchService, TimeseriesAspectService timeseriesAspectService, SystemMetadataService systemMetadataService, SearchDocumentTransformer searchDocumentTransformer, EntityIndexBuilders entityIndexBuilders, - @Nonnull String idHashAlgo) { - _graphService = graphService; - _entitySearchService = entitySearchService; - _timeseriesAspectService = timeseriesAspectService; - _systemMetadataService = systemMetadataService; - _searchDocumentTransformer = searchDocumentTransformer; - _entityIndexBuilders = entityIndexBuilders; + @Nonnull String idHashAlgo, + boolean searchDiffMode, + boolean structuredPropertiesHookEnabled, + boolean structuredPropertiesWriteEnabled) { + this.updateGraphIndicesService = updateGraphIndicesService; + this.entitySearchService = entitySearchService; + this.timeseriesAspectService = timeseriesAspectService; + this.systemMetadataService = systemMetadataService; + this.searchDocumentTransformer = searchDocumentTransformer; + this.entityIndexBuilders = entityIndexBuilders; this.idHashAlgo = idHashAlgo; + this.searchDiffMode = searchDiffMode; + this.structuredPropertiesHookEnabled = structuredPropertiesHookEnabled; + this.structuredPropertiesWriteEnabled = structuredPropertiesWriteEnabled; } @Override @@ -144,6 +131,9 @@ public void handleChangeEvent( } else if (hookEvent.getChangeType() == ChangeType.DELETE) { handleDeleteChangeEvent(opContext, mclItem); } + + // graph update + updateGraphIndicesService.handleChangeEvent(opContext, event); } } catch (IOException e) { throw new RuntimeException(e); @@ -191,18 +181,6 @@ private void handleUpdateChangeEvent( // Step 2. For all aspects, attempt to update Search updateSearchService(opContext, event); - - // Step 3. For all aspects, attempt to update Graph - SystemMetadata systemMetadata = event.getSystemMetadata(); - if (_graphDiffMode - && !(_graphService instanceof DgraphGraphService) - && (systemMetadata == null - || systemMetadata.getProperties() == null - || !Boolean.parseBoolean(systemMetadata.getProperties().get(FORCE_INDEXING_KEY)))) { - updateGraphServiceDiff(urn, aspectSpec, previousAspect, aspect, event.getMetadataChangeLog()); - } else { - updateGraphService(urn, aspectSpec, aspect, event.getMetadataChangeLog()); - } } public void updateIndexMappings( @@ -210,9 +188,8 @@ public void updateIndexMappings( EntitySpec entitySpec, AspectSpec aspectSpec, RecordTemplate newValue, - RecordTemplate oldValue) - throws IOException { - if (_structuredPropertiesHookEnabled + RecordTemplate oldValue) { + if (structuredPropertiesHookEnabled && STRUCTURED_PROPERTY_ENTITY_NAME.equals(entitySpec.getName()) && STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME.equals(aspectSpec.getName())) { @@ -228,7 +205,7 @@ public void updateIndexMappings( newDefinition.getEntityTypes().removeAll(oldEntityTypes); if (newDefinition.getEntityTypes().size() > 0) { - _entityIndexBuilders + entityIndexBuilders .buildReindexConfigsWithNewStructProp(urn, newDefinition) .forEach( reindexState -> { @@ -237,7 +214,7 @@ public void updateIndexMappings( "Applying new structured property {} to index {}", newDefinition, reindexState.name()); - _entityIndexBuilders.getIndexBuilder().applyMappings(reindexState, false); + entityIndexBuilders.getIndexBuilder().applyMappings(reindexState, false); } catch (IOException e) { throw new RuntimeException(e); } @@ -277,236 +254,10 @@ private void handleDeleteChangeEvent( if (!aspectSpec.isTimeseries()) { deleteSystemMetadata(urn, aspectSpec, isDeletingKey); - deleteGraphData(urn, aspectSpec, aspect, isDeletingKey, event.getMetadataChangeLog()); deleteSearchData(opContext, urn, entitySpec.getName(), aspectSpec, aspect, isDeletingKey); } } - // TODO: remove this method once we implement sourceOverride when creating graph edges - private void updateFineGrainedEdgesAndRelationships( - Urn entity, - FineGrainedLineageArray fineGrainedLineageArray, - List edgesToAdd, - HashMap> urnToRelationshipTypesBeingAdded) { - if (fineGrainedLineageArray != null) { - for (FineGrainedLineage fineGrainedLineage : fineGrainedLineageArray) { - if (!fineGrainedLineage.hasDownstreams() || !fineGrainedLineage.hasUpstreams()) { - break; - } - // Fine grained lineage array is present either on datajob (datajob input/output) or dataset - // We set the datajob as the viaEntity in scenario 1, and the query (if present) as the - // viaEntity in scenario 2 - Urn viaEntity = - entity.getEntityType().equals("dataJob") ? entity : fineGrainedLineage.getQuery(); - // for every downstream, create an edge with each of the upstreams - for (Urn downstream : fineGrainedLineage.getDownstreams()) { - for (Urn upstream : fineGrainedLineage.getUpstreams()) { - // TODO: add edges uniformly across aspects - edgesToAdd.add( - new Edge( - downstream, - upstream, - DOWNSTREAM_OF, - null, - null, - null, - null, - null, - entity, - viaEntity)); - Set relationshipTypes = - urnToRelationshipTypesBeingAdded.getOrDefault(downstream, new HashSet<>()); - relationshipTypes.add(DOWNSTREAM_OF); - urnToRelationshipTypesBeingAdded.put(downstream, relationshipTypes); - } - } - } - } - } - - // TODO: remove this method once we implement sourceOverride and update inputFields aspect - private void updateInputFieldEdgesAndRelationships( - @Nonnull final Urn urn, - @Nonnull final InputFields inputFields, - @Nonnull final List edgesToAdd, - @Nonnull final HashMap> urnToRelationshipTypesBeingAdded) { - if (inputFields.hasFields()) { - for (final InputField field : inputFields.getFields()) { - if (field.hasSchemaFieldUrn() - && field.hasSchemaField() - && field.getSchemaField().hasFieldPath()) { - final Urn sourceFieldUrn = - SchemaFieldUtils.generateSchemaFieldUrn(urn, field.getSchemaField().getFieldPath()); - // TODO: add edges uniformly across aspects - edgesToAdd.add( - new Edge( - sourceFieldUrn, - field.getSchemaFieldUrn(), - DOWNSTREAM_OF, - null, - null, - null, - null, - null)); - final Set relationshipTypes = - urnToRelationshipTypesBeingAdded.getOrDefault(sourceFieldUrn, new HashSet<>()); - relationshipTypes.add(DOWNSTREAM_OF); - urnToRelationshipTypesBeingAdded.put(sourceFieldUrn, relationshipTypes); - } - } - } - } - - private Pair, HashMap>> getEdgesAndRelationshipTypesFromAspect( - @Nonnull final Urn urn, - @Nonnull final AspectSpec aspectSpec, - @Nonnull final RecordTemplate aspect, - @Nonnull final MetadataChangeLog event, - final boolean isNewAspectVersion) { - final List edgesToAdd = new ArrayList<>(); - final HashMap> urnToRelationshipTypesBeingAdded = new HashMap<>(); - - // we need to manually set schemaField <-> schemaField edges for fineGrainedLineage and - // inputFields - // since @Relationship only links between the parent entity urn and something else. - if (aspectSpec.getName().equals(Constants.UPSTREAM_LINEAGE_ASPECT_NAME)) { - UpstreamLineage upstreamLineage = new UpstreamLineage(aspect.data()); - updateFineGrainedEdgesAndRelationships( - urn, - upstreamLineage.getFineGrainedLineages(), - edgesToAdd, - urnToRelationshipTypesBeingAdded); - } else if (aspectSpec.getName().equals(Constants.INPUT_FIELDS_ASPECT_NAME)) { - final InputFields inputFields = new InputFields(aspect.data()); - updateInputFieldEdgesAndRelationships( - urn, inputFields, edgesToAdd, urnToRelationshipTypesBeingAdded); - } else if (aspectSpec.getName().equals(Constants.DATA_JOB_INPUT_OUTPUT_ASPECT_NAME)) { - DataJobInputOutput dataJobInputOutput = new DataJobInputOutput(aspect.data()); - updateFineGrainedEdgesAndRelationships( - urn, - dataJobInputOutput.getFineGrainedLineages(), - edgesToAdd, - urnToRelationshipTypesBeingAdded); - } - - Map> extractedFields = - FieldExtractor.extractFields(aspect, aspectSpec.getRelationshipFieldSpecs()); - - for (Map.Entry> entry : extractedFields.entrySet()) { - Set relationshipTypes = - urnToRelationshipTypesBeingAdded.getOrDefault(urn, new HashSet<>()); - relationshipTypes.add(entry.getKey().getRelationshipName()); - urnToRelationshipTypesBeingAdded.put(urn, relationshipTypes); - final List newEdges = - GraphIndexUtils.extractGraphEdges(entry, aspect, urn, event, isNewAspectVersion); - edgesToAdd.addAll(newEdges); - } - return Pair.of(edgesToAdd, urnToRelationshipTypesBeingAdded); - } - - /** Process snapshot and update graph index */ - private void updateGraphService( - @Nonnull final Urn urn, - @Nonnull final AspectSpec aspectSpec, - @Nonnull final RecordTemplate aspect, - @Nonnull final MetadataChangeLog event) { - Pair, HashMap>> edgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event, true); - - final List edgesToAdd = edgeAndRelationTypes.getFirst(); - final HashMap> urnToRelationshipTypesBeingAdded = - edgeAndRelationTypes.getSecond(); - - log.debug("Here's the relationship types found {}", urnToRelationshipTypesBeingAdded); - if (!urnToRelationshipTypesBeingAdded.isEmpty()) { - for (Map.Entry> entry : urnToRelationshipTypesBeingAdded.entrySet()) { - _graphService.removeEdgesFromNode( - entry.getKey(), - new ArrayList<>(entry.getValue()), - newRelationshipFilter( - new Filter().setOr(new ConjunctiveCriterionArray()), - RelationshipDirection.OUTGOING)); - } - edgesToAdd.forEach(_graphService::addEdge); - } - } - - private void updateGraphServiceDiff( - @Nonnull final Urn urn, - @Nonnull final AspectSpec aspectSpec, - @Nullable final RecordTemplate oldAspect, - @Nonnull final RecordTemplate newAspect, - @Nonnull final MetadataChangeLog event) { - Pair, HashMap>> oldEdgeAndRelationTypes = null; - if (oldAspect != null) { - oldEdgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, oldAspect, event, false); - } - - final List oldEdges = - oldEdgeAndRelationTypes != null - ? oldEdgeAndRelationTypes.getFirst() - : Collections.emptyList(); - final Set oldEdgeSet = new HashSet<>(oldEdges); - - Pair, HashMap>> newEdgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, newAspect, event, true); - - final List newEdges = newEdgeAndRelationTypes.getFirst(); - final Set newEdgeSet = new HashSet<>(newEdges); - - // Edges to add - final List additiveDifference = - newEdgeSet.stream().filter(edge -> !oldEdgeSet.contains(edge)).collect(Collectors.toList()); - - // Edges to remove - final List subtractiveDifference = - oldEdgeSet.stream().filter(edge -> !newEdgeSet.contains(edge)).collect(Collectors.toList()); - - // Edges to update - final List mergedEdges = getMergedEdges(oldEdgeSet, newEdgeSet); - - // Remove any old edges that no longer exist first - if (subtractiveDifference.size() > 0) { - log.debug("Removing edges: {}", subtractiveDifference); - subtractiveDifference.forEach(_graphService::removeEdge); - } - - // Then add new edges - if (additiveDifference.size() > 0) { - log.debug("Adding edges: {}", additiveDifference); - additiveDifference.forEach(_graphService::addEdge); - } - - // Then update existing edges - if (mergedEdges.size() > 0) { - log.debug("Updating edges: {}", mergedEdges); - mergedEdges.forEach(_graphService::upsertEdge); - } - } - - private static List getMergedEdges(final Set oldEdgeSet, final Set newEdgeSet) { - final Map oldEdgesMap = - oldEdgeSet.stream() - .map(edge -> Pair.of(edge.hashCode(), edge)) - .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)); - - final List mergedEdges = new ArrayList<>(); - if (!oldEdgesMap.isEmpty()) { - for (com.linkedin.metadata.aspect.models.graph.Edge newEdge : newEdgeSet) { - if (oldEdgesMap.containsKey(newEdge.hashCode())) { - final com.linkedin.metadata.aspect.models.graph.Edge oldEdge = - oldEdgesMap.get(newEdge.hashCode()); - final com.linkedin.metadata.aspect.models.graph.Edge mergedEdge = - GraphIndexUtils.mergeEdges(oldEdge, newEdge); - mergedEdges.add(mergedEdge); - } - } - } - - return mergedEdges; - } - /** Process snapshot and update search index */ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem event) { Urn urn = event.getUrn(); @@ -520,7 +271,7 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev Optional previousSearchDocument = Optional.empty(); try { searchDocument = - _searchDocumentTransformer + searchDocumentTransformer .transformAspect(opContext, urn, aspect, aspectSpec, false) .map( objectNode -> @@ -540,16 +291,16 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev return; } - final String docId = _entityIndexBuilders.getIndexConvention().getEntityDocumentId(urn); + final String docId = entityIndexBuilders.getIndexConvention().getEntityDocumentId(urn); - if (_searchDiffMode + if (searchDiffMode && (systemMetadata == null || systemMetadata.getProperties() == null || !Boolean.parseBoolean(systemMetadata.getProperties().get(FORCE_INDEXING_KEY)))) { if (previousAspect != null) { try { previousSearchDocument = - _searchDocumentTransformer.transformAspect( + searchDocumentTransformer.transformAspect( opContext, urn, previousAspect, aspectSpec, false); } catch (Exception e) { log.error( @@ -572,7 +323,7 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev searchDocument.get(), previousSearchDocument.orElse(null)) .toString(); - _entitySearchService.upsertDocument(opContext, entityName, finalDocument, docId); + entitySearchService.upsertDocument(opContext, entityName, finalDocument, docId); } /** Process snapshot and update time-series index */ @@ -597,18 +348,18 @@ private void updateTimeseriesFields( .entrySet() .forEach( document -> { - _timeseriesAspectService.upsertDocument( + timeseriesAspectService.upsertDocument( opContext, entityType, aspectName, document.getKey(), document.getValue()); }); } private void updateSystemMetadata( SystemMetadata systemMetadata, Urn urn, AspectSpec aspectSpec, RecordTemplate aspect) { - _systemMetadataService.insert(systemMetadata, urn.toString(), aspectSpec.getName()); + systemMetadataService.insert(systemMetadata, urn.toString(), aspectSpec.getName()); // If processing status aspect update all aspects for this urn to removed if (aspectSpec.getName().equals(Constants.STATUS_ASPECT_NAME)) { - _systemMetadataService.setDocStatus(urn.toString(), ((Status) aspect).isRemoved()); + systemMetadataService.setDocStatus(urn.toString(), ((Status) aspect).isRemoved()); } } @@ -616,41 +367,13 @@ private void deleteSystemMetadata(Urn urn, AspectSpec aspectSpec, Boolean isKeyA if (isKeyAspect) { // Delete all aspects log.debug(String.format("Deleting all system metadata for urn: %s", urn)); - _systemMetadataService.deleteUrn(urn.toString()); + systemMetadataService.deleteUrn(urn.toString()); } else { // Delete all aspects from system metadata service log.debug( String.format( "Deleting system metadata for urn: %s, aspect: %s", urn, aspectSpec.getName())); - _systemMetadataService.deleteAspect(urn.toString(), aspectSpec.getName()); - } - } - - private void deleteGraphData( - @Nonnull final Urn urn, - @Nonnull final AspectSpec aspectSpec, - @Nonnull final RecordTemplate aspect, - @Nonnull final Boolean isKeyAspect, - @Nonnull final MetadataChangeLog event) { - if (isKeyAspect) { - _graphService.removeNode(urn); - return; - } - - Pair, HashMap>> edgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event, true); - - final HashMap> urnToRelationshipTypesBeingAdded = - edgeAndRelationTypes.getSecond(); - if (urnToRelationshipTypesBeingAdded.size() > 0) { - for (Map.Entry> entry : urnToRelationshipTypesBeingAdded.entrySet()) { - _graphService.removeEdgesFromNode( - entry.getKey(), - new ArrayList<>(entry.getValue()), - createRelationshipFilter( - new Filter().setOr(new ConjunctiveCriterionArray()), - RelationshipDirection.OUTGOING)); - } + systemMetadataService.deleteAspect(urn.toString(), aspectSpec.getName()); } } @@ -670,14 +393,14 @@ private void deleteSearchData( } if (isKeyAspect) { - _entitySearchService.deleteDocument(opContext, entityName, docId); + entitySearchService.deleteDocument(opContext, entityName, docId); return; } Optional searchDocument; try { searchDocument = - _searchDocumentTransformer + searchDocumentTransformer .transformAspect(opContext, urn, aspect, aspectSpec, true) .map(Objects::toString); // TODO } catch (Exception e) { @@ -690,18 +413,6 @@ private void deleteSearchData( return; } - _entitySearchService.upsertDocument(opContext, entityName, searchDocument.get(), docId); - } - - private EntitySpec getEventEntitySpec( - @Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) { - try { - return opContext.getEntityRegistry().getEntitySpec(event.getEntityType()); - } catch (IllegalArgumentException e) { - throw new RuntimeException( - String.format( - "Failed to retrieve Entity Spec for entity with name %s. Cannot update indices for MCL.", - event.getEntityType())); - } + entitySearchService.upsertDocument(opContext, entityName, searchDocument.get(), docId); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java index fe3608a2cf71db..d585ff1ce8383f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java @@ -37,6 +37,7 @@ import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; import java.sql.Timestamp; +import java.util.List; import java.util.Map; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -79,14 +80,15 @@ public void testDeleteUniqueRefGeneratesValidMCP() { Mockito.when( _graphService.findRelatedEntities( - null, - newFilter("urn", container.toString()), - null, - EMPTY_FILTER, - ImmutableList.of(), - newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), - 0, - 10000)) + any(OperationContext.class), + nullable(List.class), + eq(newFilter("urn", container.toString())), + nullable(List.class), + eq(EMPTY_FILTER), + eq(ImmutableList.of()), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(0), + eq((10000)))) .thenReturn(mockRelatedEntities); final EntityResponse entityResponse = new EntityResponse(); @@ -195,14 +197,15 @@ public void testDeleteSearchReferences() { new RelatedEntitiesResult(0, 0, 0, ImmutableList.of()); Mockito.when( _graphService.findRelatedEntities( - null, - newFilter("urn", form.toString()), - null, - EMPTY_FILTER, - ImmutableList.of(), - newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), - 0, - 10000)) + any(OperationContext.class), + nullable(List.class), + eq(newFilter("urn", form.toString())), + nullable(List.class), + eq(EMPTY_FILTER), + eq(ImmutableList.of()), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(0), + eq((10000)))) .thenReturn(mockRelatedEntities); final DeleteReferencesResponse response = @@ -249,14 +252,15 @@ public void testDeleteNoSearchReferences() { new RelatedEntitiesResult(0, 0, 0, ImmutableList.of()); Mockito.when( _graphService.findRelatedEntities( - null, - newFilter("urn", form.toString()), - null, - EMPTY_FILTER, - ImmutableList.of(), - newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), - 0, - 10000)) + any(OperationContext.class), + nullable(List.class), + eq(newFilter("urn", form.toString())), + nullable(List.class), + eq(EMPTY_FILTER), + eq(ImmutableList.of()), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(0), + eq((10000)))) .thenReturn(mockRelatedEntities); final DeleteReferencesResponse response = @@ -308,14 +312,15 @@ public void testDeleteSearchReferencesDryRun() { new RelatedEntitiesResult(0, 0, 0, ImmutableList.of()); Mockito.when( _graphService.findRelatedEntities( - null, - newFilter("urn", form.toString()), - null, - EMPTY_FILTER, - ImmutableList.of(), - newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), - 0, - 10000)) + any(OperationContext.class), + nullable(List.class), + eq(newFilter("urn", form.toString())), + nullable(List.class), + eq(EMPTY_FILTER), + eq(ImmutableList.of()), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(0), + eq((10000)))) .thenReturn(mockRelatedEntities); final DeleteReferencesResponse response = diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java index 5d9a5079f2a3b3..64ab95b5c68436 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java @@ -11,16 +11,16 @@ import com.linkedin.common.urn.DataJobUrn; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.metadata.aspect.models.graph.Edge; import com.linkedin.metadata.aspect.models.graph.RelatedEntity; import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.graph.dgraph.DgraphGraphService; import com.linkedin.metadata.graph.neo4j.Neo4jGraphService; -import com.linkedin.metadata.query.LineageFlags; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.query.filter.RelationshipFilter; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URISyntaxException; import java.time.Duration; import java.util.ArrayList; @@ -47,7 +47,6 @@ import javax.annotation.Nullable; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.Assert; -import org.testng.annotations.BeforeMethod; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -273,20 +272,16 @@ public int compare(RelatedEntity left, RelatedEntity right) { /** Any source and destination type value. */ protected static @Nullable List anyType = null; - protected final GraphQueryConfiguration _graphQueryConfiguration = getGraphQueryConfiguration(); + protected static final GraphQueryConfiguration _graphQueryConfiguration = + getGraphQueryConfiguration(); + protected static final OperationContext operationContext = + TestOperationContexts.systemContextNoSearchAuthorization(); /** Timeout used to test concurrent ops in doTestConcurrentOp. */ protected Duration getTestConcurrentOpTimeout() { return Duration.ofMinutes(1); } - @BeforeMethod - public void disableAssert() { - PathSpecBasedSchemaAnnotationVisitor.class - .getClassLoader() - .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); - } - @Test public void testStaticUrns() { assertNotNull(dataset1Urn); @@ -502,6 +497,7 @@ public void testAddEdge( RelatedEntitiesResult relatedOutgoing = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -514,6 +510,7 @@ public void testAddEdge( RelatedEntitiesResult relatedIncoming = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -531,6 +528,7 @@ public void testPopulatedGraphService() throws Exception { RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -549,6 +547,7 @@ public void testPopulatedGraphService() throws Exception { downstreamOfSchemaFieldTwoVia, downstreamOfSchemaFieldTwo)); RelatedEntitiesResult relatedIncomingEntitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -574,13 +573,13 @@ public void testPopulatedGraphService() throws Exception { downstreamOfSchemaFieldOne)); EntityLineageResult viaNodeResult = service.getLineage( + operationContext, schemaFieldUrnOne, LineageDirection.UPSTREAM, new GraphFilters(List.of("schemaField")), 0, 1000, - 100, - null); + 100); // Multi-path enabled assertEquals(viaNodeResult.getRelationships().size(), 2); // First one is via node @@ -589,13 +588,13 @@ public void testPopulatedGraphService() throws Exception { EntityLineageResult viaNodeResultNoMulti = getGraphService(false) .getLineage( + operationContext, schemaFieldUrnOne, LineageDirection.UPSTREAM, new GraphFilters(List.of("schemaField")), 0, 1000, - 100, - null); + 100); // Multi-path disabled, still has two because via flow creates both edges in response assertEquals(viaNodeResultNoMulti.getRelationships().size(), 2); @@ -612,12 +611,12 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { GraphService service = getLineagePopulatedGraphService(); EntityLineageResult upstreamLineage = - service.getLineage(dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 1); + service.getLineage(operationContext, dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 1); assertEquals(upstreamLineage.getTotal().intValue(), 0); assertEquals(upstreamLineage.getRelationships().size(), 0); EntityLineageResult downstreamLineage = - service.getLineage(dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); + service.getLineage(operationContext, dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 3); assertEquals(downstreamLineage.getRelationships().size(), 3); Map relationships = @@ -630,7 +629,8 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { assertTrue(relationships.containsKey(dataJobTwoUrn)); assertEquals(relationships.get(dataJobTwoUrn).getType(), consumes); - upstreamLineage = service.getLineage(dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 1); + upstreamLineage = + service.getLineage(operationContext, dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 1); assertEquals(upstreamLineage.getTotal().intValue(), 2); assertEquals(upstreamLineage.getRelationships().size(), 2); relationships = @@ -641,11 +641,13 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { assertTrue(relationships.containsKey(dataJobOneUrn)); assertEquals(relationships.get(dataJobOneUrn).getType(), produces); - downstreamLineage = service.getLineage(dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); + downstreamLineage = + service.getLineage(operationContext, dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 0); assertEquals(downstreamLineage.getRelationships().size(), 0); - upstreamLineage = service.getLineage(dataJobOneUrn, LineageDirection.UPSTREAM, 0, 1000, 1); + upstreamLineage = + service.getLineage(operationContext, dataJobOneUrn, LineageDirection.UPSTREAM, 0, 1000, 1); assertEquals(upstreamLineage.getTotal().intValue(), 2); assertEquals(upstreamLineage.getRelationships().size(), 2); relationships = @@ -656,7 +658,9 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { assertTrue(relationships.containsKey(dataset2Urn)); assertEquals(relationships.get(dataset2Urn).getType(), consumes); - downstreamLineage = service.getLineage(dataJobOneUrn, LineageDirection.DOWNSTREAM, 0, 1000, 1); + downstreamLineage = + service.getLineage( + operationContext, dataJobOneUrn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 3); assertEquals(downstreamLineage.getRelationships().size(), 3); relationships = @@ -834,6 +838,7 @@ private void doTestFindRelatedEntities( RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, anyType, sourceEntityFilter, anyType, @@ -1118,6 +1123,7 @@ private void doTestFindRelatedEntities( RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, sourceType, EMPTY_FILTER, destinationType, @@ -1139,6 +1145,7 @@ private void doTestFindRelatedEntitiesEntityType( @Nonnull RelatedEntity... expectedEntities) { RelatedEntitiesResult actualEntities = service.findRelatedEntities( + operationContext, sourceType, EMPTY_FILTER, destinationType, @@ -1244,6 +1251,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allOutgoingRelatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1263,6 +1271,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allIncomingRelatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1289,6 +1298,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allUnknownRelationshipTypeRelatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1301,6 +1311,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult someUnknownRelationshipTypeRelatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1325,6 +1336,7 @@ public void testFindRelatedEntitiesNoRelationshipTypes() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1340,6 +1352,7 @@ public void testFindRelatedEntitiesNoRelationshipTypes() throws Exception { // did not get any related urns? RelatedEntitiesResult relatedEntitiesAll = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1358,6 +1371,7 @@ public void testFindRelatedEntitiesAllFilters() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), newFilter("urn", dataset1UrnString), ImmutableList.of(userType), @@ -1371,6 +1385,7 @@ public void testFindRelatedEntitiesAllFilters() throws Exception { relatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), newFilter("urn", dataset1UrnString), ImmutableList.of(userType), @@ -1389,6 +1404,7 @@ public void testFindRelatedEntitiesMultipleEntityTypes() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType, userType), newFilter("urn", dataset1UrnString), ImmutableList.of(datasetType, userType), @@ -1402,6 +1418,7 @@ public void testFindRelatedEntitiesMultipleEntityTypes() throws Exception { relatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType, userType), newFilter("urn", dataset1UrnString), ImmutableList.of(datasetType, userType), @@ -1421,6 +1438,7 @@ public void testFindRelatedEntitiesOffsetAndCount() throws Exception { // populated graph asserted in testPopulatedGraphService RelatedEntitiesResult allRelatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), EMPTY_FILTER, anyType, @@ -1436,6 +1454,7 @@ public void testFindRelatedEntitiesOffsetAndCount() throws Exception { idx -> individualRelatedEntities.addAll( service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), EMPTY_FILTER, anyType, @@ -1540,6 +1559,7 @@ public void testRemoveEdgesFromNode( RelatedEntitiesResult actualOutgoingRelatedUrnsBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1550,6 +1570,7 @@ public void testRemoveEdgesFromNode( 100); RelatedEntitiesResult actualIncomingRelatedUrnsBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1566,6 +1587,7 @@ public void testRemoveEdgesFromNode( // we expect these do not change RelatedEntitiesResult relatedEntitiesOfOtherOutgoingRelationTypesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1576,6 +1598,7 @@ public void testRemoveEdgesFromNode( 100); RelatedEntitiesResult relatedEntitiesOfOtherIncomingRelationTypesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1585,11 +1608,13 @@ public void testRemoveEdgesFromNode( 0, 100); - service.removeEdgesFromNode(nodeToRemoveFrom, relationTypes, relationshipFilter); + service.removeEdgesFromNode( + operationContext, nodeToRemoveFrom, relationTypes, relationshipFilter); syncAfterWrite(); RelatedEntitiesResult actualOutgoingRelatedUrnsAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1600,6 +1625,7 @@ public void testRemoveEdgesFromNode( 100); RelatedEntitiesResult actualIncomingRelatedUrnsAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1616,6 +1642,7 @@ public void testRemoveEdgesFromNode( // assert these did not change RelatedEntitiesResult relatedEntitiesOfOtherOutgoingRelationTypesAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1626,6 +1653,7 @@ public void testRemoveEdgesFromNode( 100); RelatedEntitiesResult relatedEntitiesOfOtherIncomingRelationTypesAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1650,6 +1678,7 @@ public void testRemoveEdgesFromNodeNoRelationshipTypes() throws Exception { // populated graph asserted in testPopulatedGraphService RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1661,12 +1690,15 @@ public void testRemoveEdgesFromNodeNoRelationshipTypes() throws Exception { // can be replaced with a single removeEdgesFromNode and undirectedRelationships once supported // by all implementations - service.removeEdgesFromNode(nodeToRemoveFrom, Collections.emptyList(), outgoingRelationships); - service.removeEdgesFromNode(nodeToRemoveFrom, Collections.emptyList(), incomingRelationships); + service.removeEdgesFromNode( + operationContext, nodeToRemoveFrom, Collections.emptyList(), outgoingRelationships); + service.removeEdgesFromNode( + operationContext, nodeToRemoveFrom, Collections.emptyList(), incomingRelationships); syncAfterWrite(); RelatedEntitiesResult relatedOutgoingEntitiesAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1680,13 +1712,20 @@ public void testRemoveEdgesFromNodeNoRelationshipTypes() throws Exception { // does the test actually test something? is the Collections.emptyList() the only reason why we // did not see changes? service.removeEdgesFromNode( - nodeToRemoveFrom, Arrays.asList(downstreamOf, hasOwner, knowsUser), outgoingRelationships); + operationContext, + nodeToRemoveFrom, + Arrays.asList(downstreamOf, hasOwner, knowsUser), + outgoingRelationships); service.removeEdgesFromNode( - nodeToRemoveFrom, Arrays.asList(downstreamOf, hasOwner, knowsUser), incomingRelationships); + operationContext, + nodeToRemoveFrom, + Arrays.asList(downstreamOf, hasOwner, knowsUser), + incomingRelationships); syncAfterWrite(); RelatedEntitiesResult relatedOutgoingEntitiesAfterRemoveAll = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1706,6 +1745,7 @@ public void testRemoveEdgesFromUnknownNode() throws Exception { // populated graph asserted in testPopulatedGraphService RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1718,13 +1758,20 @@ public void testRemoveEdgesFromUnknownNode() throws Exception { // can be replaced with a single removeEdgesFromNode and undirectedRelationships once supported // by all implementations service.removeEdgesFromNode( - nodeToRemoveFrom, Arrays.asList(downstreamOf, hasOwner, knowsUser), outgoingRelationships); + operationContext, + nodeToRemoveFrom, + Arrays.asList(downstreamOf, hasOwner, knowsUser), + outgoingRelationships); service.removeEdgesFromNode( - nodeToRemoveFrom, Arrays.asList(downstreamOf, hasOwner, knowsUser), incomingRelationships); + operationContext, + nodeToRemoveFrom, + Arrays.asList(downstreamOf, hasOwner, knowsUser), + incomingRelationships); syncAfterWrite(); RelatedEntitiesResult relatedOutgoingEntitiesAfterRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1740,13 +1787,14 @@ public void testRemoveEdgesFromUnknownNode() throws Exception { public void testRemoveNode() throws Exception { GraphService service = getPopulatedGraphService(); - service.removeNode(dataset2Urn); + service.removeNode(operationContext, dataset2Urn); syncAfterWrite(); // assert the modified graph // All downstreamOf, hasOwner, knowsUser relationships minus datasetTwo's, outgoing assertEqualsAnyOrder( service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1768,6 +1816,7 @@ public void testRemoveUnknownNode() throws Exception { // populated graph asserted in testPopulatedGraphService RelatedEntitiesResult entitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1777,11 +1826,12 @@ public void testRemoveUnknownNode() throws Exception { 0, 100); - service.removeNode(unknownUrn); + service.removeNode(operationContext, unknownUrn); syncAfterWrite(); RelatedEntitiesResult entitiesAfterRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1806,6 +1856,7 @@ public void testClear() throws Exception { // again assertEqualsAnyOrder( service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), EMPTY_FILTER, anyType, @@ -1817,6 +1868,7 @@ public void testClear() throws Exception { Collections.emptyList()); assertEqualsAnyOrder( service.findRelatedEntities( + operationContext, ImmutableList.of(userType), EMPTY_FILTER, anyType, @@ -1828,6 +1880,7 @@ public void testClear() throws Exception { Collections.emptyList()); assertEqualsAnyOrder( service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, ImmutableList.of(userType), @@ -1891,6 +1944,7 @@ public void testConcurrentAddEdge() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -1937,6 +1991,7 @@ public void testConcurrentRemoveEdgesFromNode() throws Exception { // assert the graph is there RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -1956,6 +2011,7 @@ public void testConcurrentRemoveEdgesFromNode() throws Exception { edge -> () -> service.removeEdgesFromNode( + operationContext, edge.getSource(), Collections.singletonList(edge.getRelationshipType()), outgoingRelationships)); @@ -1965,6 +2021,7 @@ public void testConcurrentRemoveEdgesFromNode() throws Exception { // assert the graph is gone RelatedEntitiesResult relatedEntitiesAfterDeletion = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -1998,6 +2055,7 @@ public void testConcurrentRemoveNodes() throws Exception { // assert the graph is there RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -2013,13 +2071,14 @@ public void testConcurrentRemoveNodes() throws Exception { // remove all nodes concurrently // nodes will be removed multiple times Stream operations = - edges.stream().map(edge -> () -> service.removeNode(edge.getSource())); + edges.stream().map(edge -> () -> service.removeNode(operationContext, edge.getSource())); doTestConcurrentOp(operations); syncAfterWrite(); // assert the graph is gone RelatedEntitiesResult relatedEntitiesAfterDeletion = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -2094,12 +2153,12 @@ public void testPopulatedGraphServiceGetLineageMultihop(Boolean attemptMultiPath (!((service instanceof Neo4jGraphService) || (service instanceof DgraphGraphService))); EntityLineageResult upstreamLineage = - service.getLineage(dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 2); + service.getLineage(operationContext, dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 2); assertEquals(upstreamLineage.getTotal().intValue(), 0); assertEquals(upstreamLineage.getRelationships().size(), 0); EntityLineageResult downstreamLineage = - service.getLineage(dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); + service.getLineage(operationContext, dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); assertEquals(downstreamLineage.getTotal().intValue(), 5); assertEquals(downstreamLineage.getRelationships().size(), 5); @@ -2124,7 +2183,8 @@ public void testPopulatedGraphServiceGetLineageMultihop(Boolean attemptMultiPath assertTrue(relationships.containsKey(dataJobTwoUrn)); assertEquals(relationships.get(dataJobTwoUrn).getDegree(), 1); - upstreamLineage = service.getLineage(dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 2); + upstreamLineage = + service.getLineage(operationContext, dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 2); assertEquals(upstreamLineage.getTotal().intValue(), 3); assertEquals(upstreamLineage.getRelationships().size(), 3); relationships = @@ -2137,7 +2197,8 @@ public void testPopulatedGraphServiceGetLineageMultihop(Boolean attemptMultiPath assertTrue(relationships.containsKey(dataJobOneUrn)); assertEquals(relationships.get(dataJobOneUrn).getDegree(), 1); - downstreamLineage = service.getLineage(dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); + downstreamLineage = + service.getLineage(operationContext, dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); assertEquals(downstreamLineage.getTotal().intValue(), 0); assertEquals(downstreamLineage.getRelationships().size(), 0); } @@ -2156,6 +2217,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { Set expectedRelatedEntities = convertEdgesToRelatedEntities(edges); RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -2169,9 +2231,13 @@ public void testHighlyConnectedGraphWalk() throws Exception { expectedRelatedEntities); Urn root = dataset1Urn; + OperationContext limitedHopOpContext = + operationContext.withLineageFlags(f -> f.setEntitiesExploredPerHopLimit(5)); + EntityLineageResult lineageResult = getGraphService(false) .getLineage( + limitedHopOpContext, root, LineageDirection.UPSTREAM, new GraphFilters( @@ -2183,8 +2249,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { .collect(Collectors.toList())), 0, 1000, - 100, - new LineageFlags().setEntitiesExploredPerHopLimit(5)); + 100); // Unable to explore all paths because multi is disabled, but will be at least 5 since it will // explore 5 edges assertTrue( @@ -2201,6 +2266,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { EntityLineageResult lineageResultMulti = getGraphService(true) .getLineage( + limitedHopOpContext, root, LineageDirection.UPSTREAM, new GraphFilters( @@ -2212,8 +2278,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { .collect(Collectors.toList())), 0, 1000, - 100, - new LineageFlags().setEntitiesExploredPerHopLimit(5)); + 100); assertTrue( lineageResultMulti.getRelationships().size() >= 5 diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java index e4cefaa1feaa1a..a4a93b29f50c62 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java @@ -1,8 +1,10 @@ package com.linkedin.metadata.graph; import static com.linkedin.metadata.search.utils.QueryUtils.*; +import static org.mockito.Mockito.mock; import static org.testng.Assert.*; +import io.datahubproject.metadata.context.OperationContext; import java.util.Arrays; import java.util.Collections; import org.testng.annotations.DataProvider; @@ -220,6 +222,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allOutgoingRelatedEntities = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -243,6 +246,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allIncomingRelatedEntities = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -269,6 +273,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allUnknownRelationshipTypeRelatedEntities = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -281,6 +286,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult someUnknownRelationshipTypeRelatedEntities = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -306,6 +312,7 @@ public void testPopulatedGraphService() throws Exception { RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -328,6 +335,7 @@ public void testPopulatedGraphService() throws Exception { downstreamOfSchemaFieldTwo)); RelatedEntitiesResult relatedIncomingEntitiesBeforeRemove = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -360,13 +368,14 @@ public void testPopulatedGraphService() throws Exception { public void testRemoveNode() throws Exception { GraphService service = getPopulatedGraphService(); - service.removeNode(dataset2Urn); + service.removeNode(mock(OperationContext.class), dataset2Urn); syncAfterWrite(); // assert the modified graph // All downstreamOf, hasOwner, knowsUser relationships minus datasetTwo's, outgoing assertEqualsAnyOrder( service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java similarity index 92% rename from metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java index 0bf7df1fc8e7c9..b8e3a6e1071289 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.graph.search; +package com.linkedin.metadata.graph.elastic; import com.google.common.collect.ImmutableList; import com.google.common.io.Resources; @@ -9,10 +9,11 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.graph.GraphFilters; -import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO; import com.linkedin.metadata.models.registry.LineageRegistry; import com.linkedin.metadata.query.LineageFlags; import com.linkedin.metadata.query.filter.RelationshipDirection; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -21,6 +22,7 @@ import java.util.Map; import org.opensearch.index.query.QueryBuilder; import org.testng.Assert; +import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; public class ESGraphQueryDAOTest { @@ -34,8 +36,15 @@ public class ESGraphQueryDAOTest { private static final String TEST_QUERY_FILE_FULL_MULTIPLE_FILTERS = "elasticsearch/sample_filters/lineage_query_filters_full_multiple_filters.json"; + private OperationContext operationContext; + + @BeforeTest + public void init() { + operationContext = TestOperationContexts.systemContextNoSearchAuthorization(); + } + @Test - private static void testGetQueryForLineageFullArguments() throws Exception { + private void testGetQueryForLineageFullArguments() throws Exception { URL urlLimited = Resources.getResource(TEST_QUERY_FILE_LIMITED); String expectedQueryLimited = Resources.toString(urlLimited, StandardCharsets.UTF_8); @@ -108,21 +117,26 @@ private static void testGetQueryForLineageFullArguments() throws Exception { QueryBuilder fullBuilder = graphQueryDAO.getLineageQuery( + operationContext.withLineageFlags( + f -> new LineageFlags().setEndTimeMillis(endTime).setStartTimeMillis(startTime)), urnsPerEntityType, edgesPerEntityType, - graphFilters, - new LineageFlags().setEndTimeMillis(endTime).setStartTimeMillis(startTime)); + graphFilters); QueryBuilder fullBuilderEmptyFilters = graphQueryDAO.getLineageQuery( - urnsPerEntityType, edgesPerEntityType, GraphFilters.emptyGraphFilters, null); + operationContext, + urnsPerEntityType, + edgesPerEntityType, + GraphFilters.emptyGraphFilters); QueryBuilder fullBuilderMultipleFilters = graphQueryDAO.getLineageQuery( + operationContext.withLineageFlags( + f -> new LineageFlags().setEndTimeMillis(endTime).setStartTimeMillis(startTime)), urnsPerEntityTypeMultiple, edgesPerEntityTypeMultiple, - graphFiltersMultiple, - new LineageFlags().setEndTimeMillis(endTime).setStartTimeMillis(startTime)); + graphFiltersMultiple); Assert.assertEquals(limitedBuilder.toString(), expectedQueryLimited); Assert.assertEquals(fullBuilder.toString(), expectedQueryFull); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAOTest.java new file mode 100644 index 00000000000000..ac96257e8ec41b --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAOTest.java @@ -0,0 +1,33 @@ +package com.linkedin.metadata.graph.elastic; + +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; + +import com.linkedin.metadata.config.search.GraphQueryConfiguration; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.script.Script; +import org.testng.annotations.Test; + +public class ESGraphWriteDAOTest { + public static final IndexConvention TEST_INDEX_CONVENTION = IndexConventionImpl.noPrefix("md5"); + + @Test + public void testUpdateByQuery() { + ESBulkProcessor mockBulkProcess = mock(ESBulkProcessor.class); + GraphQueryConfiguration config = new GraphQueryConfiguration(); + config.setGraphStatusEnabled(true); + ESGraphWriteDAO test = new ESGraphWriteDAO(TEST_INDEX_CONVENTION, mockBulkProcess, 0, config); + + test.updateByQuery(new Script("test"), QueryBuilders.boolQuery()); + + verify(mockBulkProcess) + .updateByQuery( + eq(new Script("test")), eq(QueryBuilders.boolQuery()), eq("graph_service_v1")); + verifyNoMoreInteractions(mockBulkProcess); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java new file mode 100644 index 00000000000000..1f53b9c4e999eb --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java @@ -0,0 +1,98 @@ +package com.linkedin.metadata.graph.elastic; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; +import com.linkedin.metadata.entity.TestEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.models.registry.LineageRegistry; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; +import java.util.Set; +import org.mockito.ArgumentCaptor; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.ExistsQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.script.Script; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +public class ElasticSearchGraphServiceTest { + + private ElasticSearchGraphService test; + private ESBulkProcessor mockESBulkProcessor; + private ESGraphWriteDAO mockWriteDAO; + private ESGraphQueryDAO mockReadDAO; + + @BeforeTest + public void beforeTest() { + EntityRegistry entityRegistry = new TestEntityRegistry(); + mockESBulkProcessor = mock(ESBulkProcessor.class); + mockWriteDAO = mock(ESGraphWriteDAO.class); + mockReadDAO = mock(ESGraphQueryDAO.class); + test = + new ElasticSearchGraphService( + new LineageRegistry(entityRegistry), + mockESBulkProcessor, + IndexConventionImpl.noPrefix("md5"), + mockWriteDAO, + mockReadDAO, + mock(ESIndexBuilder.class), + "md5"); + } + + @BeforeMethod + public void beforeMethod() { + reset(mockESBulkProcessor, mockWriteDAO, mockReadDAO); + } + + @Test + public void testSetEdgeStatus() { + final Urn testUrn = UrnUtils.getUrn("urn:li:container:test"); + for (boolean removed : Set.of(true, false)) { + test.setEdgeStatus(testUrn, removed, EdgeUrnType.values()); + + ArgumentCaptor