diff --git a/src/main/java/com/corundumstudio/socketio/protocol/PacketDecoder.java b/src/main/java/com/corundumstudio/socketio/protocol/PacketDecoder.java index 48462aa..65e92c1 100644 --- a/src/main/java/com/corundumstudio/socketio/protocol/PacketDecoder.java +++ b/src/main/java/com/corundumstudio/socketio/protocol/PacketDecoder.java @@ -32,6 +32,8 @@ import com.corundumstudio.socketio.handler.ClientHead; public class PacketDecoder { + private final UTF8CharsScanner utf8scanner = new UTF8CharsScanner(); + private final ByteBuf QUOTES = Unpooled.copiedBuffer("\"", CharsetUtil.UTF_8); private final JsonSupport jsonSupport; @@ -134,9 +136,13 @@ public class PacketDecoder { } else if (hasLengthHeader(buffer)) { // TODO refactor int lengthEndIndex = buffer.bytesBefore((byte)':'); - int len = (int) readLong(buffer, lengthEndIndex); + int lenHeader = (int) readLong(buffer, lengthEndIndex); + int len = utf8scanner.getActualLength(buffer, lenHeader); ByteBuf frame = buffer.slice(buffer.readerIndex() + 1, len); + if (lenHeader != len) { + frame = Unpooled.wrappedBuffer(frame.toString(CharsetUtil.UTF_8).getBytes(CharsetUtil.ISO_8859_1)); + } // skip this frame buffer.readerIndex(buffer.readerIndex() + 1 + len); return decode(client, frame); diff --git a/src/main/java/com/corundumstudio/socketio/protocol/UTF8CharsScanner.java b/src/main/java/com/corundumstudio/socketio/protocol/UTF8CharsScanner.java new file mode 100644 index 0000000..4d9f8ba --- /dev/null +++ b/src/main/java/com/corundumstudio/socketio/protocol/UTF8CharsScanner.java @@ -0,0 +1,111 @@ +package com.corundumstudio.socketio.protocol; + +import io.netty.buffer.ByteBuf; + +public class UTF8CharsScanner { + + /** + * Lookup table used for determining which input characters need special + * handling when contained in text segment. + */ + final static int[] sInputCodes; + static { + /* + * 96 would do for most cases (backslash is ascii 94) but if we want to + * do lookups by raw bytes it's better to have full table + */ + int[] table = new int[256]; + // Control chars and non-space white space are not allowed unquoted + for (int i = 0; i < 32; ++i) { + table[i] = -1; + } + // And then string end and quote markers are special too + table['"'] = 1; + table['\\'] = 1; + sInputCodes = table; + } + + /** + * Additionally we can combine UTF-8 decoding info into similar data table. + */ + final static int[] sInputCodesUtf8; + static { + int[] table = new int[sInputCodes.length]; + System.arraycopy(sInputCodes, 0, table, 0, sInputCodes.length); + for (int c = 128; c < 256; ++c) { + int code; + + // We'll add number of bytes needed for decoding + if ((c & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF) + code = 2; + } else if ((c & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF) + code = 3; + } else if ((c & 0xF8) == 0xF0) { + // 4 bytes; double-char with surrogates and all... + code = 4; + } else { + // And -1 seems like a good "universal" error marker... + code = -1; + } + table[c] = code; + } + sInputCodesUtf8 = table; + } + + private int getCharTailIndex(ByteBuf inputBuffer, int i) { + int c = (int) inputBuffer.getByte(i) & 0xFF; + switch (sInputCodesUtf8[c]) { + case 2: // 2-byte UTF + i += 2; + break; + case 3: // 3-byte UTF + i += 3; + break; + case 4: // 4-byte UTF + i += 4; + break; + default: + i++; + break; + } + return i; + } + + public int getLength(ByteBuf inputBuffer, int start) { + int len = 0; + for (int i = start; i < inputBuffer.writerIndex();) { + i = getCharTailIndex(inputBuffer, i); + len++; + } + return len; + } + + public int getActualLength(ByteBuf inputBuffer, int length) { + int len = 0; + int start = inputBuffer.readerIndex(); + for (int i = inputBuffer.readerIndex(); i < inputBuffer.readableBytes() + inputBuffer.readerIndex();) { + i = getCharTailIndex(inputBuffer, i); + len++; + if (length == len) { + return i-start; + } + } + throw new IllegalStateException(); + } + + + public int findTailIndex(ByteBuf inputBuffer, int start, int end, + int charsToRead) { + int len = 0; + int i = start; + while (i < end) { + i = getCharTailIndex(inputBuffer, i); + len++; + if (charsToRead == len) { + break; + } + } + return i; + } + +}