Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bedrock Agent DataSource custom transformation support #39556

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/39556.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:enhancement
resource/aws_bedrockagent_data_source: Add `vector_ingestion_configuration.custom_transformation_configuration` argument
```
15 changes: 8 additions & 7 deletions internal/service/bedrockagent/bedrockagent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@ func TestAccBedrockAgent_serial(t *testing.T) {
"updateOpenSearch": testAccKnowledgeBase_updateOpenSearch,
},
"DataSource": {
acctest.CtBasic: testAccDataSource_basic,
acctest.CtDisappears: testAccDataSource_disappears,
"full": testAccDataSource_full,
"update": testAccDataSource_update,
"semantic": testAccDataSource_fullSemantic,
"hierarchical": testAccDataSource_fullHierarchical,
"parsing": testAccDataSource_parsing,
acctest.CtBasic: testAccDataSource_basic,
acctest.CtDisappears: testAccDataSource_disappears,
"full": testAccDataSource_full,
"update": testAccDataSource_update,
"semantic": testAccDataSource_fullSemantic,
"hierarchical": testAccDataSource_fullHierarchical,
"parsing": testAccDataSource_parsing,
"customtransformation": testAccDataSource_fullCustomTranformation,
},
}

Expand Down
131 changes: 129 additions & 2 deletions internal/service/bedrockagent/data_source.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,106 @@ func (r *dataSourceResource) Schema(ctx context.Context, request resource.Schema
},
},
},
"custom_transformation_configuration": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[customTransformationConfigurationModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Blocks: map[string]schema.Block{
"intermediate_storage": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[intermediaStorageModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Blocks: map[string]schema.Block{
"s3_location": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[s3LocationModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Attributes: map[string]schema.Attribute{
names.AttrURI: schema.StringAttribute{
Required: true,
PlanModifiers: []planmodifier.String{
stringplanmodifier.RequiresReplace(),
},
},
},
},
},
},
},
},
"transformation": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[transformationModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Attributes: map[string]schema.Attribute{
"step_to_apply": schema.StringAttribute{
CustomType: fwtypes.StringEnumType[awstypes.StepType](),
Required: true,
PlanModifiers: []planmodifier.String{
stringplanmodifier.RequiresReplace(),
},
},
},
Blocks: map[string]schema.Block{
"transformation_function": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[transformationFunctionModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Blocks: map[string]schema.Block{
"transformation_lambda_configuration": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[transformationLambdaConfigurationModel](ctx),
PlanModifiers: []planmodifier.List{
listplanmodifier.RequiresReplace(),
},
Validators: []validator.List{
listvalidator.SizeAtMost(1),
},
NestedObject: schema.NestedBlockObject{
Attributes: map[string]schema.Attribute{
"lambda_arn": schema.StringAttribute{
CustomType: fwtypes.ARNType,
Required: true,
PlanModifiers: []planmodifier.String{
stringplanmodifier.RequiresReplace(),
},
},
},
},
},
},
},
},
},
},
},
},
},
},
"parsing_configuration": schema.ListNestedBlock{
CustomType: fwtypes.NewListNestedObjectTypeOf[parsingConfigurationModel](ctx),
PlanModifiers: []planmodifier.List{
Expand Down Expand Up @@ -657,15 +757,42 @@ type serverSideEncryptionConfigurationModel struct {
}

type vectorIngestionConfigurationModel struct {
ChunkingConfiguration fwtypes.ListNestedObjectValueOf[chunkingConfigurationModel] `tfsdk:"chunking_configuration"`
ParsingConfiguration fwtypes.ListNestedObjectValueOf[parsingConfigurationModel] `tfsdk:"parsing_configuration"`
ChunkingConfiguration fwtypes.ListNestedObjectValueOf[chunkingConfigurationModel] `tfsdk:"chunking_configuration"`
CustomTransformationConfiguration fwtypes.ListNestedObjectValueOf[customTransformationConfigurationModel] `tfsdk:"custom_transformation_configuration"`
ParsingConfiguration fwtypes.ListNestedObjectValueOf[parsingConfigurationModel] `tfsdk:"parsing_configuration"`
}

type parsingConfigurationModel struct {
ParsingStrategy fwtypes.StringEnum[awstypes.ParsingStrategy] `tfsdk:"parsing_strategy"`
BedrockFoundationModelConfiguration fwtypes.ListNestedObjectValueOf[bedrockFoundationModelConfigurationModel] `tfsdk:"bedrock_foundation_model_configuration"`
}

type customTransformationConfigurationModel struct {
IntermediateStorage fwtypes.ListNestedObjectValueOf[intermediaStorageModel] `tfsdk:"intermediate_storage"`
Transformation fwtypes.ListNestedObjectValueOf[transformationModel] `tfsdk:"transformation"`
}

type intermediaStorageModel struct {
S3Location fwtypes.ListNestedObjectValueOf[s3LocationModel] `tfsdk:"s3_location"`
}

type s3LocationModel struct {
Uri types.String `tfsdk:"uri"`
}

type transformationModel struct {
StepToApply fwtypes.StringEnum[awstypes.StepType] `tfsdk:"step_to_apply"`
TransformationFunction fwtypes.ListNestedObjectValueOf[transformationFunctionModel] `tfsdk:"transformation_function"`
}

type transformationFunctionModel struct {
TransformationLambdaConfiguration fwtypes.ListNestedObjectValueOf[transformationLambdaConfigurationModel] `tfsdk:"transformation_lambda_configuration"`
}

type transformationLambdaConfigurationModel struct {
LambdaArn fwtypes.ARN `tfsdk:"lambda_arn"`
}

type bedrockFoundationModelConfigurationModel struct {
ModelArn fwtypes.ARN `tfsdk:"model_arn"`
ParsingPrompt fwtypes.ListNestedObjectValueOf[parsingPromptModel] `tfsdk:"parsing_prompt"`
Expand Down
116 changes: 116 additions & 0 deletions internal/service/bedrockagent/data_source_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,71 @@ func testAccDataSource_fullHierarchical(t *testing.T) {
})
}

// Prerequisites:
// * psql run via null_resource/provisioner "local-exec"
// * jq for parsing output from aws cli to retrieve postgres password
func testAccDataSource_fullCustomTranformation(t *testing.T) {
acctest.SkipIfExeNotOnPath(t, "psql")
acctest.SkipIfExeNotOnPath(t, "jq")
acctest.SkipIfExeNotOnPath(t, "aws")

ctx := acctest.Context(t)
if testing.Short() {
t.Skip("skipping long-running test in short mode")
}

var dataSource types.DataSource
rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix)
resourceName := "aws_bedrockagent_data_source.test"
foundationModel := "amazon.titan-embed-text-v1"

resource.Test(t, resource.TestCase{
PreCheck: func() {
acctest.PreCheck(ctx, t)
},
ErrorCheck: acctest.ErrorCheck(t, names.BedrockAgentServiceID),
ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories,
ExternalProviders: map[string]resource.ExternalProvider{
"null": {
Source: "hashicorp/null",
VersionConstraint: "3.2.2",
},
},
CheckDestroy: testAccCheckDataSourceDestroy(ctx),
Steps: []resource.TestStep{
{
Config: testAccDataSourceConfig_fullCustomTransformation(rName, foundationModel),
Check: resource.ComposeAggregateTestCheckFunc(
testAccCheckDataSourceExists(ctx, resourceName, &dataSource),
resource.TestCheckResourceAttr(resourceName, "data_deletion_policy", "RETAIN"),
resource.TestCheckResourceAttr(resourceName, "data_source_configuration.#", acctest.Ct1),
resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.#", acctest.Ct1),
resource.TestCheckResourceAttrSet(resourceName, "data_source_configuration.0.s3_configuration.0.bucket_arn"),
resource.TestCheckNoResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.0.bucket_owner_account_id"),
resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.s3_configuration.0.inclusion_prefixes.#", acctest.Ct1),
resource.TestCheckTypeSetElemAttr(resourceName, "data_source_configuration.0.s3_configuration.0.inclusion_prefixes.*", "Europe/France/Nouvelle-Aquitaine/Bordeaux"),
resource.TestCheckResourceAttr(resourceName, "data_source_configuration.0.type", "S3"),
resource.TestCheckResourceAttrSet(resourceName, "data_source_id"),
resource.TestCheckResourceAttr(resourceName, names.AttrDescription, "testing"),
resource.TestCheckResourceAttr(resourceName, names.AttrName, rName),
resource.TestCheckResourceAttr(resourceName, "server_side_encryption_configuration.#", acctest.Ct0),
resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.#", acctest.Ct1),
resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.#", acctest.Ct1),
resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.chunking_strategy", "FIXED_SIZE"),
resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.fixed_size_chunking_configuration.#", acctest.Ct1),
resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.fixed_size_chunking_configuration.0.max_tokens", acctest.Ct3),
resource.TestCheckResourceAttr(resourceName, "vector_ingestion_configuration.0.chunking_configuration.0.fixed_size_chunking_configuration.0.overlap_percentage", "80"),
),
},
{
ResourceName: resourceName,
ImportState: true,
ImportStateVerify: true,
},
},
})
}

// Prerequisites:
// * psql run via null_resource/provisioner "local-exec"
// * jq for parsing output from aws cli to retrieve postgres password
Expand Down Expand Up @@ -659,6 +724,57 @@ resource "aws_bedrockagent_data_source" "test" {
`, rName))
}

func testAccDataSourceConfig_fullCustomTransformation(rName, embeddingModel string) string {
return acctest.ConfigCompose(testAccDataSourceConfig_base(rName, embeddingModel),
testAccAgentActionGroupConfig_lambda(rName), fmt.Sprintf(`
resource "aws_bedrockagent_data_source" "test" {
name = %[1]q
knowledge_base_id = aws_bedrockagent_knowledge_base.test.id
data_deletion_policy = "RETAIN"
description = "testing"

data_source_configuration {
type = "S3"

s3_configuration {
bucket_arn = aws_s3_bucket.test.arn
inclusion_prefixes = ["Europe/France/Nouvelle-Aquitaine/Bordeaux"]
}
}

vector_ingestion_configuration {
chunking_configuration {
chunking_strategy = "FIXED_SIZE"

fixed_size_chunking_configuration {
max_tokens = 3
overlap_percentage = 80
}
}
custom_transformation_configuration {
intermediate_storage {
s3_location {
uri = "s3://${aws_s3_bucket.test_im.bucket}/customTransform"
}
}
transformation {
step_to_apply = "POST_CHUNKING"
transformation_function {
transformation_lambda_configuration {
lambda_arn = aws_lambda_function.test_lambda.arn
}
}
}
}
}
}
resource "aws_s3_bucket" "test_im" {
bucket = "%[1]s-im"
}

`, rName))
}

func testAccDataSourceConfig_updated(rName, embeddingModel string) string {
return acctest.ConfigCompose(testAccDataSourceConfig_base(rName, embeddingModel), fmt.Sprintf(`
resource "aws_bedrockagent_data_source" "test" {
Expand Down
33 changes: 33 additions & 0 deletions website/docs/r/bedrockagent_data_source.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ The `server_side_encryption_configuration` configuration block supports the foll
The `vector_ingestion_configuration` configuration block supports the following arguments:

* `chunking_configuration` - (Optional, Forces new resource) Details about how to chunk the documents in the data source. A chunk refers to an excerpt from a data source that is returned when the knowledge base that it belongs to is queried. See [`chunking_configuration` block](#chunking_configuration-block) for details.
* `custom_transformation_configuration`- (Optional, Forces new resource) Configuration for custom transformation of data source documents.
* `parsing_configuration` - (Optional, Forces new resource) Configuration for custom parsing of data source documents. See [`parsing_configuration` block](#parsing_configuration-block) for details.

### `chunking_configuration` block
Expand Down Expand Up @@ -107,6 +108,38 @@ The `semantic_chunking_configuration` block supports the following arguments:
* `buffer_size` - (Required, Forces new resource) The buffer size.
* `max_tokens` - (Required, Forces new resource) The maximum number of tokens a chunk can contain.

### `custom_transformation_configuration` block

The `custom_transformation_configuration` block supports the following arguments:

* `intermediate_storage` - (Required, Forces new resource) The intermediate storage for custom transformation.
* `transformation_function` - (Required) The configuration of transformation function.

### `intermediate_storage` block

The `intermediate_storage` block supports the following arguments:

* `s3_location` - (Required, Forces new resource) Configuration block for intermedia S3 storage.

### `s3_location` block

The `s3_location` block supports the following arguments:

* `uri` - (Required, Forces new resource) S3 URI for intermediate storage.

### `transformation_function` block

The `transformation_function` block supports the following arguments:

* `step_to_apply` - (Required, Forces new resource) Currently only `POST_CHUNKING` is supported.
* `transformation_lambda_configuration` - (Required, Forces new resource) The lambda configuration for custom transformation.

### `transformation_lambda_configuration` block

The `transformation_lambda_configuration` block supports the following arguments:

* `lambda_arn` - (Required, Forces new resource) The ARN of the lambda to use for custom transformation.

### `parsing_configuration` block

The `parsing_configuration` configuration block supports the following arguments:
Expand Down
Loading