From 3b8ff36d8e2b383f9de7f2172f6b00eca00938eb Mon Sep 17 00:00:00 2001 From: Maksym Ostroverkhov Date: Tue, 2 Jul 2024 07:40:07 +0300 Subject: [PATCH] Add utf8 validation utility for text frames payload --- .../websocketx/WebSocketValidationTest.java | 38 ++++++++ .../websocketx/WebSocketFrameListener.java | 90 +++++++++++++++++++ 2 files changed, 128 insertions(+) diff --git a/netty-websocket-http1-test/src/test/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketValidationTest.java b/netty-websocket-http1-test/src/test/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketValidationTest.java index b4bdb5c..09439e8 100644 --- a/netty-websocket-http1-test/src/test/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketValidationTest.java +++ b/netty-websocket-http1-test/src/test/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketValidationTest.java @@ -19,6 +19,8 @@ import io.netty.bootstrap.Bootstrap; import io.netty.bootstrap.ServerBootstrap; import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufAllocator; +import io.netty.buffer.ByteBufUtil; import io.netty.channel.Channel; import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelInitializer; @@ -37,7 +39,9 @@ import java.net.InetSocketAddress; import java.net.SocketAddress; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.Collections; +import java.util.List; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; @@ -268,6 +272,40 @@ void invalidFragmentCompletion() throws Exception { } } + @Test + void utf8TextFrameValidator() { + ByteBufAllocator alloc = ByteBufAllocator.DEFAULT; + List utf8 = + Arrays.asList( + ByteBufUtil.writeUtf8(alloc, "ab"), + ByteBufUtil.writeUtf8(alloc, "c"), + ByteBufUtil.writeUtf8(alloc, "def"), + ByteBufUtil.writeUtf8(alloc, "ghijk"), + ByteBufUtil.writeUtf8(alloc, "lmn")); + ByteBuf nonUtf8 = alloc.buffer(2).writeByte(0xc3).writeByte(0x28); + + WebSocketFrameListener.Utf8FrameValidator validator = + WebSocketFrameListener.Utf8FrameValidator.create(); + + try { + Assertions.assertThat(validator.validateTextFrame(utf8.get(0))).isTrue(); + Assertions.assertThat(validator.state).isEqualTo(0); + Assertions.assertThat(validator.codep).isEqualTo(0); + Assertions.assertThat(validator.validateTextFragmentStart(utf8.get(1))).isTrue(); + Assertions.assertThat(validator.validateFragmentContinuation(utf8.get(2))).isTrue(); + Assertions.assertThat(validator.validateFragmentEnd(utf8.get(3))).isTrue(); + Assertions.assertThat(validator.state).isEqualTo(0); + Assertions.assertThat(validator.codep).isEqualTo(0); + Assertions.assertThat(validator.validateTextFrame(utf8.get(4))).isTrue(); + Assertions.assertThat(validator.validateTextFrame(nonUtf8)).isFalse(); + } finally { + for (ByteBuf string : utf8) { + string.release(); + } + nonUtf8.release(); + } + } + static WebSocketDecoderConfig decoderConfig(int maxFramePayloadLength) { return WebSocketDecoderConfig.newBuilder() .allowMaskMismatch(true) diff --git a/netty-websocket-http1/src/main/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketFrameListener.java b/netty-websocket-http1/src/main/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketFrameListener.java index 7c5bfdf..3842e0b 100644 --- a/netty-websocket-http1/src/main/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketFrameListener.java +++ b/netty-websocket-http1/src/main/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketFrameListener.java @@ -18,6 +18,7 @@ import io.netty.buffer.ByteBuf; import io.netty.channel.ChannelHandlerContext; +import io.netty.util.ByteProcessor; import io.netty.util.CharsetUtil; /** @@ -72,4 +73,93 @@ public static String reason(ByteBuf payload) { Short.BYTES, payload.readableBytes() - Short.BYTES, CharsetUtil.UTF_8); } } + + /** + * UTF8 finite state machine based implementation from + * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + */ + final class Utf8FrameValidator implements ByteProcessor { + public static final int UTF8_VALIDATION_ERROR_CODE = 1007; + public static final String UTF8_VALIDATION_ERROR_MESSAGE = + "inbound text frame with non-utf8 contents"; + + private static final int UTF8_ACCEPT = 0; + private static final int UTF8_REJECT = 12; + + private static final byte[] TYPES = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8 + }; + + private static final byte[] STATES = { + 0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, + 12, 12, 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 + }; + + int state = UTF8_ACCEPT; + int codep; + + private Utf8FrameValidator() {} + + public static Utf8FrameValidator create() { + return new Utf8FrameValidator(); + } + + /** + * @param buffer text frame payload + * @return true if payload is utf8 encoded, false otherwise + */ + public boolean validateTextFrame(ByteBuf buffer) { + buffer.forEachByte(this); + int st = state; + state = UTF8_ACCEPT; + codep = 0; + return st == UTF8_ACCEPT; + } + + /** + * @param buffer text fragment frame payload + * @return true if payload is utf8 encoded, false otherwise + */ + public boolean validateTextFragmentStart(ByteBuf buffer) { + buffer.forEachByte(this); + return state != UTF8_REJECT; + } + + /** + * @param buffer text fragment frame payload + * @return true if payload is utf8 encoded, false otherwise + */ + public boolean validateFragmentContinuation(ByteBuf buffer) { + return validateTextFragmentStart(buffer); + } + + /** + * @param buffer text fragment frame payload + * @return true if payload is utf8 encoded, false otherwise + */ + public boolean validateFragmentEnd(ByteBuf buffer) { + return validateTextFrame(buffer); + } + + @Override + public boolean process(byte bufferByte) { + byte type = TYPES[bufferByte & 0xFF]; + int st = state; + codep = st != UTF8_ACCEPT ? bufferByte & 0x3f | codep << 6 : 0xff >> type & bufferByte; + st = state = STATES[st + type]; + + return st != UTF8_REJECT; + } + } }