Add helpers to truncate UTF-8 at code points
This will help to avoid truncating a UTF-8 string in the middle of a code point, producing an invalid UTF-8 result.
This commit is contained in:
@@ -0,0 +1,44 @@
|
||||
package com.genymobile.scrcpy;
|
||||
|
||||
import junit.framework.Assert;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
public class StringUtilsTest {
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("checkstyle:MagicNumber")
|
||||
public void testUtf8Trucate() {
|
||||
String s = "aÉbÔc";
|
||||
byte[] utf8 = s.getBytes(StandardCharsets.UTF_8);
|
||||
Assert.assertEquals(7, utf8.length);
|
||||
|
||||
int count;
|
||||
|
||||
count = StringUtils.getUtf8TruncationIndex(utf8, 1);
|
||||
Assert.assertEquals(1, count);
|
||||
|
||||
count = StringUtils.getUtf8TruncationIndex(utf8, 2);
|
||||
Assert.assertEquals(1, count); // É is 2 bytes-wide
|
||||
|
||||
count = StringUtils.getUtf8TruncationIndex(utf8, 3);
|
||||
Assert.assertEquals(3, count);
|
||||
|
||||
count = StringUtils.getUtf8TruncationIndex(utf8, 4);
|
||||
Assert.assertEquals(4, count);
|
||||
|
||||
count = StringUtils.getUtf8TruncationIndex(utf8, 5);
|
||||
Assert.assertEquals(4, count); // Ô is 2 bytes-wide
|
||||
|
||||
count = StringUtils.getUtf8TruncationIndex(utf8, 6);
|
||||
Assert.assertEquals(6, count);
|
||||
|
||||
count = StringUtils.getUtf8TruncationIndex(utf8, 7);
|
||||
Assert.assertEquals(7, count);
|
||||
|
||||
count = StringUtils.getUtf8TruncationIndex(utf8, 8);
|
||||
Assert.assertEquals(7, count); // no more chars
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user