Add helpers to truncate UTF-8 at code points

This will help to avoid truncating a UTF-8 string in the middle of a code point, producing an invalid UTF-8 result.
2019-05-30 19:01:08 +02:00
parent 3aa5426cad
commit 0a7fe7ad57
6 changed files with 121 additions and 1 deletions
--- a/server/src/main/java/com/genymobile/scrcpy/StringUtils.java
+++ b/server/src/main/java/com/genymobile/scrcpy/StringUtils.java
@@ -0,0 +1,23 @@
+package com.genymobile.scrcpy;
+
+public final class StringUtils {
+    private StringUtils() {
+        // not instantiable
+    }
+
+    @SuppressWarnings("checkstyle:MagicNumber")
+    public static int getUtf8TruncationIndex(byte[] utf8, int maxLength) {
+        int len = utf8.length;
+        if (len <= maxLength) {
+            return len;
+        }
+        len = maxLength;
+        // see UTF-8 encoding <https://en.wikipedia.org/wiki/UTF-8#Description>
+        while ((utf8[len] & 0x80) != 0 && (utf8[len] & 0xc0) != 0xc0) {
+            // the next byte is not the start of a new UTF-8 codepoint
+            // so if we would cut there, the character would be truncated
+            len--;
+        }
+        return len;
+    }
+}