diff --git a/netty-websocket-http1-test/src/test/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketValidationTest.java b/netty-websocket-http1-test/src/test/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketValidationTest.java index 09439e8..9843801 100644 --- a/netty-websocket-http1-test/src/test/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketValidationTest.java +++ b/netty-websocket-http1-test/src/test/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketValidationTest.java @@ -39,6 +39,7 @@ import java.net.InetSocketAddress; import java.net.SocketAddress; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -272,6 +273,42 @@ void invalidFragmentCompletion() throws Exception { } } + @Test + void utf8Validator() { + String ascii = "Are those shy Eurasian footwear, cowboy chaps, or jolly earthmoving headgear"; + String utf8 = "Чуєш їх, доцю, га? Кумедна ж ти, прощайся без ґольфів!"; + List asciiList = stringList(ByteBufAllocator.DEFAULT, ascii); + List utf8List = stringList(ByteBufAllocator.DEFAULT, utf8); + try { + WebSocketFrameListener.Utf8FrameValidator validator = + WebSocketFrameListener.Utf8FrameValidator.create(); + for (ByteBuf byteBuf : asciiList) { + Assertions.assertThat(validator.validateTextFrame(byteBuf)).isTrue(); + } + for (ByteBuf byteBuf : utf8List) { + Assertions.assertThat(validator.validateTextFrame(byteBuf)).isTrue(); + } + } finally { + for (ByteBuf byteBuf : asciiList) { + byteBuf.release(); + } + for (ByteBuf byteBuf : utf8List) { + byteBuf.release(); + } + } + } + + static List stringList(ByteBufAllocator allocator, String string) { + int length = string.length(); + List list = new ArrayList<>(length); + for (int i = 0; i < length; i++) { + String substring = string.substring(0, i + 1); + ByteBuf byteBuf = ByteBufUtil.writeUtf8(allocator, substring); + list.add(byteBuf); + } + return list; + } + @Test void utf8TextFrameValidator() { ByteBufAllocator alloc = ByteBufAllocator.DEFAULT; diff --git a/netty-websocket-http1/src/main/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketFrameListener.java b/netty-websocket-http1/src/main/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketFrameListener.java index 3842e0b..9dd7957 100644 --- a/netty-websocket-http1/src/main/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketFrameListener.java +++ b/netty-websocket-http1/src/main/java/com/jauntsdn/netty/handler/codec/http/websocketx/WebSocketFrameListener.java @@ -18,7 +18,6 @@ import io.netty.buffer.ByteBuf; import io.netty.channel.ChannelHandlerContext; -import io.netty.util.ByteProcessor; import io.netty.util.CharsetUtil; /** @@ -76,9 +75,9 @@ public static String reason(ByteBuf payload) { /** * UTF8 finite state machine based implementation from - * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ optimized for ASCII content. */ - final class Utf8FrameValidator implements ByteProcessor { + final class Utf8FrameValidator { public static final int UTF8_VALIDATION_ERROR_CODE = 1007; public static final String UTF8_VALIDATION_ERROR_MESSAGE = "inbound text frame with non-utf8 contents"; @@ -120,7 +119,7 @@ public static Utf8FrameValidator create() { * @return true if payload is utf8 encoded, false otherwise */ public boolean validateTextFrame(ByteBuf buffer) { - buffer.forEachByte(this); + checkUtf8(buffer); int st = state; state = UTF8_ACCEPT; codep = 0; @@ -132,7 +131,7 @@ public boolean validateTextFrame(ByteBuf buffer) { * @return true if payload is utf8 encoded, false otherwise */ public boolean validateTextFragmentStart(ByteBuf buffer) { - buffer.forEachByte(this); + checkUtf8(buffer); return state != UTF8_REJECT; } @@ -152,8 +151,56 @@ public boolean validateFragmentEnd(ByteBuf buffer) { return validateTextFrame(buffer); } - @Override - public boolean process(byte bufferByte) { + private void checkUtf8(ByteBuf buffer) { + int readableBytes = buffer.readableBytes(); + int from = buffer.readerIndex(); + int to = from + readableBytes; + boolean cont = true; + int step = Long.BYTES; + while (to - from >= step) { + long bytes = buffer.getLong(from); + if ( + /*is non-ascii*/ (bytes & 0x8080808080808080L) != 0) { + for (int i = 0; i < step; i++) { + byte b = (byte) ((bytes >> 8 * (step - (i + 1))) & 0xFF); + cont = checkUtf8(b); + if (!cont) { + break; + } + } + } + from += step; + } + if (cont) { + step = Integer.BYTES; + while (to - from >= step) { + int bytes = buffer.getInt(from); + if ( + /*is non-ascii*/ (bytes & 0x80808080) != 0) { + for (int i = 0; i < step; i++) { + byte b = (byte) ((bytes >> 8 * (step - (i + 1))) & 0xFF); + cont = checkUtf8(b); + if (!cont) { + break; + } + } + } + from += step; + } + } + if (cont) { + while (to - from >= 1) { + byte b = buffer.getByte(from); + cont = checkUtf8(b); + if (!cont) { + break; + } + from += 1; + } + } + } + + private boolean checkUtf8(byte bufferByte) { byte type = TYPES[bufferByte & 0xFF]; int st = state; codep = st != UTF8_ACCEPT ? bufferByte & 0x3f | codep << 6 : 0xff >> type & bufferByte;