Skip to content

Instantly share code, notes, and snippets.

@AugustNagro
Created October 26, 2019 07:11
Show Gist options
  • Save AugustNagro/4fa5bd7287870433fb24a1fd645836bd to your computer and use it in GitHub Desktop.
Save AugustNagro/4fa5bd7287870433fb24a1fd645836bd to your computer and use it in GitHub Desktop.
Pattern match a UTF-8 ByteBuffer in Java
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
public class Utf8CharSeq implements CharSequence {
private final ByteBuffer b;
public Utf8CharSeq(ByteBuffer b) {
this.b = b;
}
@Override
public int length() {
return b.remaining();
}
@Override
public char charAt(int index) {
return (char) (0xFF & b.get(index));
}
@Override
public CharSequence subSequence(int start, int end) {
ByteBuffer bb = b.duplicate();
bb.position(bb.position() + start);
bb.limit(bb.position() + end - start);
return new Utf8CharSeq(bb);
}
/**
* Do not call. Only implemented so
* Matcher.group(..) returns in correct UTF-16
* encoding for matching subsequence.
*/
@Override
public String toString() {
return StandardCharsets.UTF_8.decode(b).toString();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment