X-Git-Url: https://juplo.de/gitweb/?p=percentcodec;a=blobdiff_plain;f=percentcodec%2Fsrc%2Fmain%2Fjava%2Fde%2Fjuplo%2Fpercentcodec%2FPercentCodec.java;fp=percentcodec%2Fsrc%2Fmain%2Fjava%2Fde%2Fjuplo%2Fpercentcodec%2FPercentCodec.java;h=1a23c097af1ba2c9700aa9484a4e0934931dff6f;hp=0000000000000000000000000000000000000000;hb=a6e0e65cce68acb20abc6ca935471611a740c342;hpb=1d395c6002486d485bde0d5dc713886d70bd3f8a diff --git a/percentcodec/src/main/java/de/juplo/percentcodec/PercentCodec.java b/percentcodec/src/main/java/de/juplo/percentcodec/PercentCodec.java new file mode 100644 index 00000000..1a23c097 --- /dev/null +++ b/percentcodec/src/main/java/de/juplo/percentcodec/PercentCodec.java @@ -0,0 +1,198 @@ +package de.juplo.percentcodec; + +import java.nio.charset.Charset; + +/** + * This class performes percent-encoding/-decoding like described in RFC 3986. + *

+ * Complete URI's are not handled by this implementation. + * That is done best with the original {@linkplain java.net.URI}-class from core Java. + * The purpose of this class is to have a simple tool to encode/decode the + * inner parts of an URI, like a segment of the URI-path (the part between two + * forward slashes) or a name or value segment of the query, where all reserved + * characters must be encoded/decoded. + * + * @author kai + */ +public class PercentCodec { + private final Charset charset; + + + public PercentCodec(String encoding) { + charset = Charset.forName(encoding); + } + + + public String encode(CharSequence in) { + StringBuilder out = new StringBuilder(); + int i = 0; + int length = in.length(); + while (i < length) { + int codePoint = Character.codePointAt(in, i); + i += Character.charCount(codePoint); + switch (codePoint) { + case 'a': + case 'A': + case 'b': + case 'B': + case 'c': + case 'C': + case 'd': + case 'D': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + case 'h': + case 'H': + case 'i': + case 'I': + case 'j': + case 'J': + case 'k': + case 'K': + case 'l': + case 'L': + case 'm': + case 'M': + case 'n': + case 'N': + case 'o': + case 'O': + case 'p': + case 'P': + case 'q': + case 'Q': + case 'r': + case 'R': + case 's': + case 'S': + case 't': + case 'T': + case 'u': + case 'U': + case 'v': + case 'V': + case 'w': + case 'W': + case 'x': + case 'X': + case 'y': + case 'Y': + case 'z': + case 'Z': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + case '_': + case '.': + case '~': + /** + * Unreserved characters can (and should!) stay unchanged! + * (See {@link http://en.wikipedia.org/wiki/Percent-encoding#Types_of_URI_characters}) + */ + out.append(Character.toChars(codePoint)); + break; + default: + /** + * All other characters are reserved or special characters and, + * hence, must be encoded! + */ + String encoded = new String(Character.toChars(codePoint)); + byte[] bytes = encoded.getBytes(charset); + for (int j = 0; j < bytes.length; j++) { + out.append('%'); + out.append(Character.forDigit((bytes[j] >> 4) & 0xF, 16)); + out.append(Character.forDigit((bytes[j]) & 0xF, 16)); + } + } + } + return out.toString(); + } + + public String decode(CharSequence in) { + StringBuilder out = new StringBuilder(); + int i = 0; + int length = in.length(); + while (i < length) { + char c = in.charAt(i); + if (c != '%') { + out.append(c); + i++; + } + else { + byte[] bytes = new byte[length-i/3]; + int pos = 0; + while (i+2 < length && in.charAt(i) == '%' ) { + int b = 0; + switch (in.charAt(i+1)) { + case '0': break; + case '1': b = 16*1; break; + case '2': b = 16*2; break; + case '3': b = 16*3; break; + case '4': b = 16*4; break; + case '5': b = 16*5; break; + case '6': b = 16*6; break; + case '7': b = 16*7; break; + case '8': b = 16*8; break; + case '9': b = 16*9; break; + case 'a': + case 'A': b = 16*10; break; + case 'b': + case 'B': b = 16*11; break; + case 'c': + case 'C': b = 16*12; break; + case 'd': + case 'D': b = 16*13; break; + case 'e': + case 'E': b = 16*14; break; + case 'f': + case 'F': b = 16*15; break; + default: throw new IllegalArgumentException("Illegal escape-sequence: %" + in.subSequence(i, i+3)); + } + switch (in.charAt(i+2)) { + case '0': break; + case '1': b += 1; break; + case '2': b += 2; break; + case '3': b += 3; break; + case '4': b += 4; break; + case '5': b += 5; break; + case '6': b += 6; break; + case '7': b += 7; break; + case '8': b += 8; break; + case '9': b += 9; break; + case 'a': + case 'A': b += 10; break; + case 'b': + case 'B': b += 11; break; + case 'c': + case 'C': b += 12; break; + case 'd': + case 'D': b += 13; break; + case 'e': + case 'E': b += 14; break; + case 'f': + case 'F': b += 15; break; + default: throw new IllegalArgumentException("Illegal escape-sequence: %" + in.subSequence(i, i+3)); + } + bytes[pos++] = (byte)b; + i += 3; + } + out.append(new String(bytes, 0, pos, charset)); + if (i < length && in.charAt(i) == '%') + throw new IllegalArgumentException("Incomplete escape-sequence: %" + in.subSequence(i, length)); + } + } + return out.toString(); + } +}