Skip to content

Commit

Permalink
Add utf8 validation utility for text frames payload
Browse files Browse the repository at this point in the history
  • Loading branch information
mostroverkhov committed Jul 2, 2024
1 parent e700877 commit 3b8ff36
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import io.netty.bootstrap.Bootstrap;
import io.netty.bootstrap.ServerBootstrap;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufAllocator;
import io.netty.buffer.ByteBufUtil;
import io.netty.channel.Channel;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelInitializer;
Expand All @@ -37,7 +39,9 @@
import java.net.InetSocketAddress;
import java.net.SocketAddress;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
Expand Down Expand Up @@ -268,6 +272,40 @@ void invalidFragmentCompletion() throws Exception {
}
}

@Test
void utf8TextFrameValidator() {
ByteBufAllocator alloc = ByteBufAllocator.DEFAULT;
List<ByteBuf> utf8 =
Arrays.asList(
ByteBufUtil.writeUtf8(alloc, "ab"),
ByteBufUtil.writeUtf8(alloc, "c"),
ByteBufUtil.writeUtf8(alloc, "def"),
ByteBufUtil.writeUtf8(alloc, "ghijk"),
ByteBufUtil.writeUtf8(alloc, "lmn"));
ByteBuf nonUtf8 = alloc.buffer(2).writeByte(0xc3).writeByte(0x28);

WebSocketFrameListener.Utf8FrameValidator validator =
WebSocketFrameListener.Utf8FrameValidator.create();

try {
Assertions.assertThat(validator.validateTextFrame(utf8.get(0))).isTrue();
Assertions.assertThat(validator.state).isEqualTo(0);
Assertions.assertThat(validator.codep).isEqualTo(0);
Assertions.assertThat(validator.validateTextFragmentStart(utf8.get(1))).isTrue();
Assertions.assertThat(validator.validateFragmentContinuation(utf8.get(2))).isTrue();
Assertions.assertThat(validator.validateFragmentEnd(utf8.get(3))).isTrue();
Assertions.assertThat(validator.state).isEqualTo(0);
Assertions.assertThat(validator.codep).isEqualTo(0);
Assertions.assertThat(validator.validateTextFrame(utf8.get(4))).isTrue();
Assertions.assertThat(validator.validateTextFrame(nonUtf8)).isFalse();
} finally {
for (ByteBuf string : utf8) {
string.release();
}
nonUtf8.release();
}
}

static WebSocketDecoderConfig decoderConfig(int maxFramePayloadLength) {
return WebSocketDecoderConfig.newBuilder()
.allowMaskMismatch(true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import io.netty.buffer.ByteBuf;
import io.netty.channel.ChannelHandlerContext;
import io.netty.util.ByteProcessor;
import io.netty.util.CharsetUtil;

/**
Expand Down Expand Up @@ -72,4 +73,93 @@ public static String reason(ByteBuf payload) {
Short.BYTES, payload.readableBytes() - Short.BYTES, CharsetUtil.UTF_8);
}
}

/**
* UTF8 finite state machine based implementation from
* https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
*/
final class Utf8FrameValidator implements ByteProcessor {
public static final int UTF8_VALIDATION_ERROR_CODE = 1007;
public static final String UTF8_VALIDATION_ERROR_MESSAGE =
"inbound text frame with non-utf8 contents";

private static final int UTF8_ACCEPT = 0;
private static final int UTF8_REJECT = 12;

private static final byte[] TYPES = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8
};

private static final byte[] STATES = {
0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12,
12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12,
12, 12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36,
12, 12, 12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
};

int state = UTF8_ACCEPT;
int codep;

private Utf8FrameValidator() {}

public static Utf8FrameValidator create() {
return new Utf8FrameValidator();
}

/**
* @param buffer text frame payload
* @return true if payload is utf8 encoded, false otherwise
*/
public boolean validateTextFrame(ByteBuf buffer) {
buffer.forEachByte(this);
int st = state;
state = UTF8_ACCEPT;
codep = 0;
return st == UTF8_ACCEPT;
}

/**
* @param buffer text fragment frame payload
* @return true if payload is utf8 encoded, false otherwise
*/
public boolean validateTextFragmentStart(ByteBuf buffer) {
buffer.forEachByte(this);
return state != UTF8_REJECT;
}

/**
* @param buffer text fragment frame payload
* @return true if payload is utf8 encoded, false otherwise
*/
public boolean validateFragmentContinuation(ByteBuf buffer) {
return validateTextFragmentStart(buffer);
}

/**
* @param buffer text fragment frame payload
* @return true if payload is utf8 encoded, false otherwise
*/
public boolean validateFragmentEnd(ByteBuf buffer) {
return validateTextFrame(buffer);
}

@Override
public boolean process(byte bufferByte) {
byte type = TYPES[bufferByte & 0xFF];
int st = state;
codep = st != UTF8_ACCEPT ? bufferByte & 0x3f | codep << 6 : 0xff >> type & bufferByte;
st = state = STATES[st + type];

return st != UTF8_REJECT;
}
}
}

0 comments on commit 3b8ff36

Please sign in to comment.