diff --git a/encoding.go b/encoding.go index 7cefad8..623ec89 100644 --- a/encoding.go +++ b/encoding.go @@ -33,6 +33,8 @@ func ConvertToUTF8(b []byte) string { s, _ = utf16ToUTF8(b[2:], false) } else if hasUTF8Marker(b) { s = string(b[3:]) + } else if utf8.ValidString(string(b)) { + s = string(b) } else if looksLikeLatin1(b) { s = latin1toUTF8(b) } else { diff --git a/encoding_test.go b/encoding_test.go index d7dfac5..04a9d79 100644 --- a/encoding_test.go +++ b/encoding_test.go @@ -12,6 +12,12 @@ func TestLooksLikeLatin1(t *testing.T) { assert.Equal(t, false, looksLikeLatin1([]byte("hallå"))) } +func TestTranscodeToUTF8(t *testing.T) { + assert.Equal(t, "hallå", ConvertToUTF8([]byte("hall\xe5"))) // from: latin1 + assert.Equal(t, "hallå", ConvertToUTF8([]byte("hallå"))) // from: utf8 (Swedish) + assert.Equal(t, "烟火里的尘埃", ConvertToUTF8([]byte("烟火里的尘埃"))) // from: utf8 (Chinese) +} + func TestReadFileAsUTF8(t *testing.T) { f, err := os.Open("README.md") assert.Equal(t, nil, err)