Skip to content

Instantly share code, notes, and snippets.

@dmyersturnbull
Created May 19, 2020 21:30
Show Gist options
  • Save dmyersturnbull/073b254dbb8ded3244082c80dd29f56d to your computer and use it in GitHub Desktop.
Save dmyersturnbull/073b254dbb8ded3244082c80dd29f56d to your computer and use it in GitHub Desktop.
Escape Rfc3986 or an arbitrary set of characters with arbitrary rules.
See https://github.com/dmyersturnbull/genomics-io/tree/master/core/src/main/java/org/pharmgkb/parsers/escape
package org.pharmgkb.parsers.escape;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import javax.annotation.Nonnull;
import javax.annotation.concurrent.NotThreadSafe;
import javax.annotation.concurrent.ThreadSafe;
import java.util.function.Function;
import java.util.regex.Pattern;
/**
* Escapes and unescapes some characters with RF3986 percent-encoding.
* For an example use, this Rfc3986Escaper escapes every character that would not be in a simple floating-point number;
* that is, does not match {@code [-.0-9]}.
* <code>
* Rfc3986Escaper escaper = new Rfc3986Escaper.Builder()
* .inverseLegality() // escape everything NOT in the set instead
* .addChars('.', '-')
* .addCharRange(0x30, 0x39) // 0-9
* .build();
* String escaped = escaper.escape("ab-1.332cd"); // "%61;%62;-1..332%63;%64;"
* String unescaped = escaper.unescape(escaped) // "ab-1.332cd"
* </code>
* @author Douglas Myers-Turnbull
*/
@ThreadSafe
public class Rfc3986Escaper extends IllegalCharacterEscaper {
private static final Pattern sf_encodedPattern = Pattern.compile("%(?:\\d|[A-Fa-f]){2}");
private Rfc3986Escaper(@Nonnull Builder builder) {
super(builder.m_inverse, ImmutableSet.copyOf(builder.m_chars));
}
@Nonnull
@Override
protected Function<Character, String> encoder() {
return c -> {
String encoded = String.format("%04x", (int) c);
Preconditions.checkArgument(encoded.length() == 4);
Preconditions.checkArgument(encoded.startsWith("00"));
return "%" + encoded.substring(2);
};
}
@Nonnull
@Override
protected Function<String, Character> unencoder() {
return s -> {
Preconditions.checkArgument(sf_encodedPattern.matcher(s).matches(), "Bad RFC3986-encoded string " + s);
return (char)Integer.parseInt(s.substring(1), 16);
};
}
@NotThreadSafe
public static class Builder extends IllegalCharacterEscaper.Builder<Rfc3986Escaper, Builder> {
@Nonnull
@Override
public Rfc3986Escaper build() {
Preconditions.checkState(!m_inverse && m_chars.contains('%') || m_inverse && !m_chars.contains('%'),
getClass().getSimpleName() + " MUST escape % for escape and unescape to be inverses");
return new Rfc3986Escaper(this);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment