Polling encoding fixed. #209

10 years ago · 8231d2946e
2 changed files with 118 additions and 1 deletions
--- a/src/main/java/com/corundumstudio/socketio/protocol/PacketDecoder.java
+++ b/src/main/java/com/corundumstudio/socketio/protocol/PacketDecoder.java
@ -32,6 +32,8 @@ import com.corundumstudio.socketio.handler.ClientHead;

 public class PacketDecoder {

+    private final UTF8CharsScanner utf8scanner = new UTF8CharsScanner();
+
    private final ByteBuf QUOTES = Unpooled.copiedBuffer("\"", CharsetUtil.UTF_8);

    private final JsonSupport jsonSupport;
@ -134,9 +136,13 @@ public class PacketDecoder {
        } else if (hasLengthHeader(buffer)) {
            // TODO refactor
            int lengthEndIndex = buffer.bytesBefore((byte)':');
-            int len = (int) readLong(buffer, lengthEndIndex);
+            int lenHeader = (int) readLong(buffer, lengthEndIndex);
+            int len = utf8scanner.getActualLength(buffer, lenHeader);

            ByteBuf frame = buffer.slice(buffer.readerIndex() + 1, len);
+            if (lenHeader != len) {
+                frame = Unpooled.wrappedBuffer(frame.toString(CharsetUtil.UTF_8).getBytes(CharsetUtil.ISO_8859_1));
+            }
            // skip this frame
            buffer.readerIndex(buffer.readerIndex() + 1 + len);
            return decode(client, frame);
--- a/src/main/java/com/corundumstudio/socketio/protocol/UTF8CharsScanner.java
+++ b/src/main/java/com/corundumstudio/socketio/protocol/UTF8CharsScanner.java
@ -0,0 +1,111 @@
+package com.corundumstudio.socketio.protocol;
+
+import io.netty.buffer.ByteBuf;
+
+public class UTF8CharsScanner {
+
+    /**
+     * Lookup table used for determining which input characters need special
+     * handling when contained in text segment.
+     */
+    final static int[] sInputCodes;
+    static {
+        /*
+         * 96 would do for most cases (backslash is ascii 94) but if we want to
+         * do lookups by raw bytes it's better to have full table
+         */
+        int[] table = new int[256];
+        // Control chars and non-space white space are not allowed unquoted
+        for (int i = 0; i < 32; ++i) {
+            table[i] = -1;
+        }
+        // And then string end and quote markers are special too
+        table['"'] = 1;
+        table['\\'] = 1;
+        sInputCodes = table;
+    }
+
+    /**
+     * Additionally we can combine UTF-8 decoding info into similar data table.
+     */
+    final static int[] sInputCodesUtf8;
+    static {
+        int[] table = new int[sInputCodes.length];
+        System.arraycopy(sInputCodes, 0, table, 0, sInputCodes.length);
+        for (int c = 128; c < 256; ++c) {
+            int code;
+
+            // We'll add number of bytes needed for decoding
+            if ((c & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF)
+                code = 2;
+            } else if ((c & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF)
+                code = 3;
+            } else if ((c & 0xF8) == 0xF0) {
+                // 4 bytes; double-char with surrogates and all...
+                code = 4;
+            } else {
+                // And -1 seems like a good "universal" error marker...
+                code = -1;
+            }
+            table[c] = code;
+        }
+        sInputCodesUtf8 = table;
+    }
+
+    private int getCharTailIndex(ByteBuf inputBuffer, int i) {
+        int c = (int) inputBuffer.getByte(i) & 0xFF;
+        switch (sInputCodesUtf8[c]) {
+        case 2: // 2-byte UTF
+            i += 2;
+            break;
+        case 3: // 3-byte UTF
+            i += 3;
+            break;
+        case 4: // 4-byte UTF
+            i += 4;
+            break;
+        default:
+            i++;
+            break;
+        }
+        return i;
+    }
+
+    public int getLength(ByteBuf inputBuffer, int start) {
+        int len = 0;
+        for (int i = start; i < inputBuffer.writerIndex();) {
+            i = getCharTailIndex(inputBuffer, i);
+            len++;
+        }
+        return len;
+    }
+
+    public int getActualLength(ByteBuf inputBuffer, int length) {
+        int len = 0;
+        int start = inputBuffer.readerIndex();
+        for (int i = inputBuffer.readerIndex(); i < inputBuffer.readableBytes() + inputBuffer.readerIndex();) {
+            i = getCharTailIndex(inputBuffer, i);
+            len++;
+            if (length == len) {
+                return i-start;
+            }
+        }
+        throw new IllegalStateException();
+    }
+
+
+    public int findTailIndex(ByteBuf inputBuffer, int start, int end,
+            int charsToRead) {
+        int len = 0;
+        int i = start;
+        while (i < end) {
+            i = getCharTailIndex(inputBuffer, i);
+            len++;
+            if (charsToRead == len) {
+                break;
+            }
+        }
+        return i;
+    }
+
+}