Add helpers to truncate UTF-8 at code points

This will help to avoid truncating a UTF-8 string in the middle of a code point, producing an invalid UTF-8 result.
2019-05-30 19:01:08 +02:00
parent 3aa5426cad
commit 0a7fe7ad57
6 changed files with 121 additions and 1 deletions
--- a/server/src/test/java/com/genymobile/scrcpy/StringUtilsTest.java
+++ b/server/src/test/java/com/genymobile/scrcpy/StringUtilsTest.java
@@ -0,0 +1,44 @@
+package com.genymobile.scrcpy;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+
+public class StringUtilsTest {
+
+    @Test
+    @SuppressWarnings("checkstyle:MagicNumber")
+    public void testUtf8Trucate() {
+        String s = "aÉbÔc";
+        byte[] utf8 = s.getBytes(StandardCharsets.UTF_8);
+        Assert.assertEquals(7, utf8.length);
+
+        int count;
+
+        count = StringUtils.getUtf8TruncationIndex(utf8, 1);
+        Assert.assertEquals(1, count);
+
+        count = StringUtils.getUtf8TruncationIndex(utf8, 2);
+        Assert.assertEquals(1, count); // É is 2 bytes-wide
+
+        count = StringUtils.getUtf8TruncationIndex(utf8, 3);
+        Assert.assertEquals(3, count);
+
+        count = StringUtils.getUtf8TruncationIndex(utf8, 4);
+        Assert.assertEquals(4, count);
+
+        count = StringUtils.getUtf8TruncationIndex(utf8, 5);
+        Assert.assertEquals(4, count); // Ô is 2 bytes-wide
+
+        count = StringUtils.getUtf8TruncationIndex(utf8, 6);
+        Assert.assertEquals(6, count);
+
+        count = StringUtils.getUtf8TruncationIndex(utf8, 7);
+        Assert.assertEquals(7, count);
+
+        count = StringUtils.getUtf8TruncationIndex(utf8, 8);
+        Assert.assertEquals(7, count); // no more chars
+    }
+}