Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace non-utf8 characters in bodies #228

Merged
merged 8 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 27 additions & 2 deletions sdk/instrumentation/bodyattribute/bodyattribute.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package bodyattribute // import "github.com/hypertrace/goagent/sdk/instrumentati
import (
"encoding/base64"
"fmt"
"unicode/utf8"

"github.com/hypertrace/goagent/sdk"
)
Expand All @@ -21,7 +22,9 @@ func SetTruncatedBodyAttribute(attrName string, body []byte, bodyMaxSize int, sp
return
}

SetBodyAttribute(attrName, body[:bodyMaxSize], true, span)
truncatedBody := truncateUTF8Bytes(body, bodyMaxSize)

SetBodyAttribute(attrName, truncatedBody, true, span)
}

// SetTruncatedEncodedBodyAttribute is like SetTruncatedBodyAttribute above but also base64 encodes the
Expand All @@ -38,7 +41,8 @@ func SetTruncatedEncodedBodyAttribute(attrName string, body []byte, bodyMaxSize
return
}

SetEncodedBodyAttribute(attrName, body[:bodyMaxSize], true, span)
truncatedBody := truncateUTF8Bytes(body, bodyMaxSize)
SetEncodedBodyAttribute(attrName, truncatedBody, true, span)
}

// SetBodyAttribute sets the body as a span attribute.
Expand Down Expand Up @@ -69,3 +73,24 @@ func SetEncodedBodyAttribute(attrName string, body []byte, truncated bool, span
span.SetAttribute(fmt.Sprintf("%s.truncated", attrName), true)
}
}

// Largely based on:
// https://github.com/jmacd/opentelemetry-go/blob/e8973b75b230246545cdae072a548c83877cba09/sdk/trace/span.go#L358-L375
func truncateUTF8Bytes(b []byte, maxBytes int) []byte {
ryanericson marked this conversation as resolved.
Show resolved Hide resolved
// We subtract 4 as that is the largest possible byte size for single rune
startIndex := maxBytes - 4
if startIndex < 0 {
startIndex = 0
}

for idx := startIndex; idx < maxBytes; {
_, size := utf8.DecodeRune(b[idx:])
Copy link
Contributor

@puneet-traceable puneet-traceable May 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is our assumption here? max-4 can land us in the middle of previous rune and when we try DecodeRune from that index, it may give runeError which we are ignoring.
So, are we just trying to make sure that we correct the last rune if it got corrupted because of truncation.
I am fine with that, just trying to make sure I understand the behaviour well and we add comments around the assumption/behaviour.

if idx+size > maxBytes {
// We're past maxBytes with this rune, we will not include this in truncated value
return b[:idx]
}
idx += size
}

return b[:maxBytes]
}
22 changes: 22 additions & 0 deletions sdk/instrumentation/bodyattribute/bodyattribute_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,25 @@ func TestSetEncodedBodyAttribute(t *testing.T) {
})
}
}

func TestSetBodyWithoutUtf8(t *testing.T) {
multiByteCharString := []byte("こんにちは世界こんにちは世界こんにちは世界こんにちは世界こんにちは世界")
span := mock.NewSpan()
SetTruncatedBodyAttribute("http.request.body", multiByteCharString, 23, span)
value := span.ReadAttribute("http.request.body")
assert.Equal(t, value.(string), "こんにちは世界")
v := len(value.(string))
assert.Equal(t, v, 21)
}

func TestSetB64BodyWithoutUtf8(t *testing.T) {
multiByteCharString := []byte("こんにちは世界こんにちは世界こんにちは世界こんにちは世界こんにちは世界")
span := mock.NewSpan()
SetTruncatedEncodedBodyAttribute("http.request.body", multiByteCharString, 23, span)
value := span.ReadAttribute("http.request.body.base64")
decodedBytes, err := base64.StdEncoding.DecodeString(value.(string))
assert.NoError(t, err)
assert.Equal(t, string(decodedBytes), "こんにちは世界")
v := len(decodedBytes)
assert.Equal(t, v, 21)
}
Loading