From: Kai Moritz Date: Sun, 10 Jul 2011 15:31:12 +0000 (+0200) Subject: PercentCodec implementiert X-Git-Tag: 1.0.1~10 X-Git-Url: http://juplo.de/gitweb/?a=commitdiff_plain;h=faf6747764a013296264c0777935590ceb2f9417;p=percentcodec PercentCodec implementiert --- diff --git a/pom.xml b/pom.xml index f9bb0305..c82fe9ec 100644 --- a/pom.xml +++ b/pom.xml @@ -25,6 +25,7 @@ cachecontrol + utils @@ -34,6 +35,7 @@ 4.8.1 + 1.2.15 2.5 1.6.1 3.0.5.RELEASE diff --git a/utils/pom.xml b/utils/pom.xml new file mode 100644 index 00000000..e9bab03d --- /dev/null +++ b/utils/pom.xml @@ -0,0 +1,61 @@ + + + + 4.0.0 + + + de.halbekunst + juplo + 1.0.1-SNAPSHOT + + + ${pom.parent.artifactId}-utils + Juplo - Utils + + + + junit + junit + ${junit.version} + test + + + org.slf4j + slf4j-log4j12 + ${slf4j.version} + + + log4j + log4j + + + test + + + log4j + log4j + ${log4j.version} + + + javax.mail + mail + + + javax.jms + jms + + + com.sun.jdmk + jmxtools + + + com.sun.jmx + jmxri + + + test + + + + diff --git a/utils/src/main/java/de/halbekunst/juplo/utils/PercentCodec.java b/utils/src/main/java/de/halbekunst/juplo/utils/PercentCodec.java new file mode 100644 index 00000000..d4c53ad1 --- /dev/null +++ b/utils/src/main/java/de/halbekunst/juplo/utils/PercentCodec.java @@ -0,0 +1,198 @@ +package de.halbekunst.juplo.utils; + +import java.nio.charset.Charset; + +/** + * This class performes percent-encoding/-decoding like described in RFC 3986. + *

+ * Complete URI's are not handled by this implementation. + * That is done best with the original {@linkplain java.net.URI}-class from core Java. + * The purpose of this class is to have a simple tool to encode/decode the + * inner parts of an URI, like a segment of the URI-path (the part between two + * forward slashes) or a name or value segment of the query, where all reserved + * characters must be encoded/decoded. + * + * @author kai + */ +public class PercentCodec { + private final Charset charset; + + + public PercentCodec(String encoding) { + charset = Charset.forName(encoding); + } + + + public String encode(CharSequence in) { + StringBuilder out = new StringBuilder(); + int i = 0; + int length = in.length(); + while (i < length) { + int codePoint = Character.codePointAt(in, i); + i += Character.charCount(codePoint); + switch (codePoint) { + case 'a': + case 'A': + case 'b': + case 'B': + case 'c': + case 'C': + case 'd': + case 'D': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + case 'h': + case 'H': + case 'i': + case 'I': + case 'j': + case 'J': + case 'k': + case 'K': + case 'l': + case 'L': + case 'm': + case 'M': + case 'n': + case 'N': + case 'o': + case 'O': + case 'p': + case 'P': + case 'q': + case 'Q': + case 'r': + case 'R': + case 's': + case 'S': + case 't': + case 'T': + case 'u': + case 'U': + case 'v': + case 'V': + case 'w': + case 'W': + case 'x': + case 'X': + case 'y': + case 'Y': + case 'z': + case 'Z': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + case '_': + case '.': + case '~': + /** + * Unreserved characters can (and should!) stay unchanged! + * (See {@link http://en.wikipedia.org/wiki/Percent-encoding#Types_of_URI_characters}) + */ + out.append(Character.toChars(codePoint)); + break; + default: + /** + * All other characters are reserved or special characters and, + * hence, must be encoded! + */ + String encoded = new String(Character.toChars(codePoint)); + byte[] bytes = encoded.getBytes(charset); + for (int j = 0; j < bytes.length; j++) { + out.append('%'); + out.append(Character.forDigit((bytes[j] >> 4) & 0xF, 16)); + out.append(Character.forDigit((bytes[j]) & 0xF, 16)); + } + } + } + return out.toString(); + } + + public String decode(CharSequence in) { + StringBuilder out = new StringBuilder(); + int i = 0; + int length = in.length(); + while (i < length) { + char c = in.charAt(i); + if (c != '%') { + out.append(c); + i++; + } + else { + byte[] bytes = new byte[length-i/3]; + int pos = 0; + while (i+2 < length && in.charAt(i) == '%' ) { + int b = 0; + switch (in.charAt(i+1)) { + case '0': break; + case '1': b = 16*1; break; + case '2': b = 16*2; break; + case '3': b = 16*3; break; + case '4': b = 16*4; break; + case '5': b = 16*5; break; + case '6': b = 16*6; break; + case '7': b = 16*7; break; + case '8': b = 16*8; break; + case '9': b = 16*9; break; + case 'a': + case 'A': b = 16*10; break; + case 'b': + case 'B': b = 16*11; break; + case 'c': + case 'C': b = 16*12; break; + case 'd': + case 'D': b = 16*13; break; + case 'e': + case 'E': b = 16*14; break; + case 'f': + case 'F': b = 16*15; break; + default: throw new IllegalArgumentException("Illegal escape-sequence: %" + in.subSequence(i, i+3)); + } + switch (in.charAt(i+2)) { + case '0': break; + case '1': b += 1; break; + case '2': b += 2; break; + case '3': b += 3; break; + case '4': b += 4; break; + case '5': b += 5; break; + case '6': b += 6; break; + case '7': b += 7; break; + case '8': b += 8; break; + case '9': b += 9; break; + case 'a': + case 'A': b += 10; break; + case 'b': + case 'B': b += 11; break; + case 'c': + case 'C': b += 12; break; + case 'd': + case 'D': b += 13; break; + case 'e': + case 'E': b += 14; break; + case 'f': + case 'F': b += 15; break; + default: throw new IllegalArgumentException("Illegal escape-sequence: %" + in.subSequence(i, i+3)); + } + bytes[pos++] = (byte)b; + i += 3; + } + out.append(new String(bytes, 0, pos, charset)); + if (i < length && in.charAt(i) == '%') + throw new IllegalArgumentException("Incomplete escape-sequence: %" + in.subSequence(i, length)); + } + } + return out.toString(); + } +} diff --git a/utils/src/test/java/de/halbekunst/juplo/utils/PercentCodecTest.java b/utils/src/test/java/de/halbekunst/juplo/utils/PercentCodecTest.java new file mode 100644 index 00000000..a06784e5 --- /dev/null +++ b/utils/src/test/java/de/halbekunst/juplo/utils/PercentCodecTest.java @@ -0,0 +1,114 @@ +package de.halbekunst.juplo.utils; + +import junit.framework.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class PercentCodecTest { + private final static Logger log = LoggerFactory.getLogger(PercentCodecTest.class); + + public final static char[] decoded = { ' ', '+', 'q', 's', '8', '0', 'x', 'ä', 'ß', 'à', '€', '¢', '@', '/', '?', '#', ';','.', '&', '%' }; + public final static String[] encoded_latin1 = { "%20", "%2b", "q", "s", "8", "0", "x", "%e4", "%df", "%e0", "%3f", "%a2", "%40", "%2f", "%3f", "%23", "%3b",".", "%26", "%25" }; + public final static String[] encoded_utf8 = { "%20", "%2b", "q", "s", "8", "0", "x", "%c3%a4", "%c3%9f", "%c3%a0", "%e2%82%ac", "%c2%a2", "%40", "%2f", "%3f", "%23", "%3b",".", "%26", "%25" }; + + + @Test + public void testEncodeLatin1() throws Exception { + PercentCodec codec = new PercentCodec("latin1"); + + for (int a = 0; a < decoded.length; a++) { + for (int b = 0; b < decoded.length; b++) { + for (int c = 0; c < decoded.length; c++) { + /** Das Zeichen '€' existiert in Latin1 nicht! */ + if (a == 10 || b == 10 || c == 10) + continue; + StringBuilder input = new StringBuilder(); + input.append(decoded[a]); + input.append(decoded[b]); + input.append(decoded[c]); + StringBuilder expected = new StringBuilder(); + expected.append(encoded_latin1[a]); + expected.append(encoded_latin1[b]); + expected.append(encoded_latin1[c]); + String output = codec.encode(input); + log.debug("{}\t-> {}", input, output); + Assert.assertEquals("\"" + input + "\" was encoded falsely", expected.toString(), output); + } + } + } + } + + @Test + public void testDecodeLatin1() throws Exception { + PercentCodec codec = new PercentCodec("latin1"); + + for (int a = 0; a < decoded.length; a++) { + for (int b = 0; b < decoded.length; b++) { + for (int c = 0; c < decoded.length; c++) { + /** Das Zeichen '€' existiert in Latin1 nicht! */ + if (a == 10 || b == 10 || c == 10) + continue; + StringBuilder input = new StringBuilder(); + input.append(encoded_latin1[a]); + input.append(encoded_latin1[b]); + input.append(encoded_latin1[c]); + StringBuilder expected = new StringBuilder(); + expected.append(decoded[a]); + expected.append(decoded[b]); + expected.append(decoded[c]); + String output = codec.decode(input); + log.debug("{}\t-> {}", input, output); + Assert.assertEquals("\"" + input + "\" was decoded falsely", expected.toString(), output); + } + } + } + } + + @Test + public void testEncodeUtf8() throws Exception { + PercentCodec codec = new PercentCodec("UTF-8"); + + for (int a = 0; a < decoded.length; a++) { + for (int b = 0; b < decoded.length; b++) { + for (int c = 0; c < decoded.length; c++) { + StringBuilder input = new StringBuilder(); + input.append(decoded[a]); + input.append(decoded[b]); + input.append(decoded[c]); + StringBuilder expected = new StringBuilder(); + expected.append(encoded_utf8[a]); + expected.append(encoded_utf8[b]); + expected.append(encoded_utf8[c]); + String output = codec.encode(input); + log.debug("{}\t-> {}", input, output); + Assert.assertEquals("\"" + input + "\" was encoded falsely", expected.toString(), output); + } + } + } + } + + @Test + public void testDecodeUtf8() throws Exception { + PercentCodec codec = new PercentCodec("UTF-8"); + + for (int a = 0; a < decoded.length; a++) { + for (int b = 0; b < decoded.length; b++) { + for (int c = 0; c < decoded.length; c++) { + StringBuilder input = new StringBuilder(); + input.append(encoded_utf8[a]); + input.append(encoded_utf8[b]); + input.append(encoded_utf8[c]); + StringBuilder expected = new StringBuilder(); + expected.append(decoded[a]); + expected.append(decoded[b]); + expected.append(decoded[c]); + String output = codec.decode(input); + log.debug("{}\t-> {}", input, output); + Assert.assertEquals("\"" + input + "\" was decoded falsely", expected.toString(), output); + } + } + } + } +} diff --git a/utils/src/test/resources/log4j.xml b/utils/src/test/resources/log4j.xml new file mode 100644 index 00000000..18822a58 --- /dev/null +++ b/utils/src/test/resources/log4j.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + +