PercentCodec implementiert

author Kai Moritz <kai@coolibri.de>

Sun, 10 Jul 2011 15:31:12 +0000 (17:31 +0200)

committer Kai Moritz <kai@coolibri.de>

Sat, 28 Jan 2012 12:03:13 +0000 (13:03 +0100)
author Kai Moritz <kai@coolibri.de>
Sun, 10 Jul 2011 15:31:12 +0000 (17:31 +0200)
committer Kai Moritz <kai@coolibri.de>
Sat, 28 Jan 2012 12:03:13 +0000 (13:03 +0100)
diff --git a/pom.xml b/pom.xml

index f9bb030..c82fe9e 100644 (file)
--- a/pom.xml
+++ b/pom.xml
@@ -25,6 +25,7 @@
  
    <modules>
      <module>cachecontrol</module>
+    <module>utils</module>
    </modules>
  
    <properties>
@@ -34,6 +35,7 @@
  
      <!-- Verwendete Versionen -->
      <junit.version>4.8.1</junit.version>
+    <log4j.version>1.2.15</log4j.version>
      <servlet-api.version>2.5</servlet-api.version>
      <slf4j.version>1.6.1</slf4j.version>
      <springframework.version>3.0.5.RELEASE</springframework.version>
diff --git a/utils/pom.xml b/utils/pom.xml

new file mode 100644 (file)

index 0000000..e9bab03
--- /dev/null
+++ b/utils/pom.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>de.halbekunst</groupId>
+    <artifactId>juplo</artifactId>
+    <version>1.0.1-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>${pom.parent.artifactId}-utils</artifactId>
+  <name>Juplo - Utils</name>
+
+  <dependencies>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <version>${slf4j.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>log4j</groupId>
+          <artifactId>log4j</artifactId>
+        </exclusion>
+      </exclusions>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <version>${log4j.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.mail</groupId>
+          <artifactId>mail</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.jms</groupId>
+          <artifactId>jms</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jdmk</groupId>
+          <artifactId>jmxtools</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jmx</groupId>
+          <artifactId>jmxri</artifactId>
+        </exclusion>
+      </exclusions>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+</project>
diff --git a/utils/src/main/java/de/halbekunst/juplo/utils/PercentCodec.java b/utils/src/main/java/de/halbekunst/juplo/utils/PercentCodec.java

new file mode 100644 (file)

index 0000000..d4c53ad
--- /dev/null
+++ b/utils/src/main/java/de/halbekunst/juplo/utils/PercentCodec.java
@@ -0,0 +1,198 @@
+package de.halbekunst.juplo.utils;
+
+import java.nio.charset.Charset;
+
+/**
+ * This class performes percent-encoding/-decoding like described in RFC 3986.
+ * <p>
+ * Complete URI's are not handled by this implementation.
+ * That is done best with the original {@linkplain java.net.URI}-class from core Java.
+ * The purpose of this class is to have a simple tool to encode/decode the
+ * inner parts of an URI, like a segment of the URI-path (the part between two
+ * forward slashes) or a name or value segment of the query, where all reserved
+ * characters must be encoded/decoded.
+ *
+ * @author kai
+ */
+public class PercentCodec {
+  private final Charset charset;
+
+
+  public PercentCodec(String encoding) {
+    charset = Charset.forName(encoding);
+  }
+
+
+  public String encode(CharSequence in) {
+    StringBuilder out = new StringBuilder();
+    int i = 0;
+    int length = in.length();
+    while (i < length) {
+      int codePoint = Character.codePointAt(in, i);
+      i += Character.charCount(codePoint);
+      switch (codePoint) {
+        case 'a':
+        case 'A':
+        case 'b':
+        case 'B':
+        case 'c':
+        case 'C':
+        case 'd':
+        case 'D':
+        case 'e':
+        case 'E':
+        case 'f':
+        case 'F':
+        case 'g':
+        case 'G':
+        case 'h':
+        case 'H':
+        case 'i':
+        case 'I':
+        case 'j':
+        case 'J':
+        case 'k':
+        case 'K':
+        case 'l':
+        case 'L':
+        case 'm':
+        case 'M':
+        case 'n':
+        case 'N':
+        case 'o':
+        case 'O':
+        case 'p':
+        case 'P':
+        case 'q':
+        case 'Q':
+        case 'r':
+        case 'R':
+        case 's':
+        case 'S':
+        case 't':
+        case 'T':
+        case 'u':
+        case 'U':
+        case 'v':
+        case 'V':
+        case 'w':
+        case 'W':
+        case 'x':
+        case 'X':
+        case 'y':
+        case 'Y':
+        case 'z':
+        case 'Z':
+        case '0':
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7':
+        case '8':
+        case '9':
+        case '-':
+        case '_':
+        case '.':
+        case '~':
+          /**
+           * Unreserved characters can (and should!) stay unchanged!
+           * (See {@link http://en.wikipedia.org/wiki/Percent-encoding#Types_of_URI_characters})
+           */
+          out.append(Character.toChars(codePoint));
+          break;
+        default:
+          /**
+           * All other characters are reserved or special characters and,
+           * hence, must be encoded!
+           */
+          String encoded = new String(Character.toChars(codePoint));
+          byte[] bytes = encoded.getBytes(charset);
+          for (int j = 0; j < bytes.length; j++) {
+            out.append('%');
+            out.append(Character.forDigit((bytes[j] >> 4) & 0xF, 16));
+            out.append(Character.forDigit((bytes[j]) & 0xF, 16));
+          }
+      }
+    }
+    return out.toString();
+  }
+
+  public String decode(CharSequence in) {
+    StringBuilder out = new StringBuilder();
+    int i = 0;
+    int length = in.length();
+    while (i < length) {
+      char c = in.charAt(i);
+      if (c != '%') {
+        out.append(c);
+        i++;
+      }
+      else {
+        byte[] bytes = new byte[length-i/3];
+        int pos = 0;
+        while (i+2 < length && in.charAt(i) == '%' ) {
+          int b = 0;
+          switch (in.charAt(i+1)) {
+            case '0': break;
+            case '1': b = 16*1; break;
+            case '2': b = 16*2; break;
+            case '3': b = 16*3; break;
+            case '4': b = 16*4; break;
+            case '5': b = 16*5; break;
+            case '6': b = 16*6; break;
+            case '7': b = 16*7; break;
+            case '8': b = 16*8; break;
+            case '9': b = 16*9; break;
+            case 'a':
+            case 'A': b = 16*10; break;
+            case 'b':
+            case 'B': b = 16*11; break;
+            case 'c':
+            case 'C': b = 16*12; break;
+            case 'd':
+            case 'D': b = 16*13; break;
+            case 'e':
+            case 'E': b = 16*14; break;
+            case 'f':
+            case 'F': b = 16*15; break;
+            default: throw new IllegalArgumentException("Illegal escape-sequence: %" + in.subSequence(i, i+3));
+          }
+          switch (in.charAt(i+2)) {
+            case '0': break;
+            case '1': b += 1; break;
+            case '2': b += 2; break;
+            case '3': b += 3; break;
+            case '4': b += 4; break;
+            case '5': b += 5; break;
+            case '6': b += 6; break;
+            case '7': b += 7; break;
+            case '8': b += 8; break;
+            case '9': b += 9; break;
+            case 'a':
+            case 'A': b += 10; break;
+            case 'b':
+            case 'B': b += 11; break;
+            case 'c':
+            case 'C': b += 12; break;
+            case 'd':
+            case 'D': b += 13; break;
+            case 'e':
+            case 'E': b += 14; break;
+            case 'f':
+            case 'F': b += 15; break;
+            default: throw new IllegalArgumentException("Illegal escape-sequence: %" + in.subSequence(i, i+3));
+          }
+          bytes[pos++] = (byte)b;
+          i += 3;
+        }
+        out.append(new String(bytes, 0, pos, charset));
+        if (i < length && in.charAt(i) == '%')
+          throw  new IllegalArgumentException("Incomplete escape-sequence: %" + in.subSequence(i, length));
+      }
+    }
+    return out.toString();
+  }
+}
diff --git a/utils/src/test/java/de/halbekunst/juplo/utils/PercentCodecTest.java b/utils/src/test/java/de/halbekunst/juplo/utils/PercentCodecTest.java

new file mode 100644 (file)

index 0000000..a06784e
--- /dev/null
+++ b/utils/src/test/java/de/halbekunst/juplo/utils/PercentCodecTest.java
@@ -0,0 +1,114 @@
+package de.halbekunst.juplo.utils;
+
+import junit.framework.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class PercentCodecTest {
+  private final static Logger log = LoggerFactory.getLogger(PercentCodecTest.class);
+
+  public final static char[] decoded = { ' ', '+', 'q', 's', '8', '0', 'x', 'ä', 'ß', 'à', '€', '¢', '@', '/', '?', '#', ';','.', '&', '%' };
+  public final static String[] encoded_latin1 = { "%20", "%2b", "q", "s", "8", "0", "x", "%e4", "%df", "%e0", "%3f", "%a2", "%40", "%2f", "%3f", "%23", "%3b",".", "%26", "%25" };
+  public final static String[] encoded_utf8 = { "%20", "%2b", "q", "s", "8", "0", "x", "%c3%a4", "%c3%9f", "%c3%a0", "%e2%82%ac", "%c2%a2", "%40", "%2f", "%3f", "%23", "%3b",".", "%26", "%25" };
+
+
+  @Test
+  public void testEncodeLatin1() throws Exception {
+    PercentCodec codec = new PercentCodec("latin1");
+
+    for (int a = 0; a < decoded.length; a++) {
+      for (int b = 0; b < decoded.length; b++) {
+        for (int c = 0; c < decoded.length; c++) {
+          /** Das Zeichen '€' existiert in Latin1 nicht! */
+          if (a == 10 || b == 10 || c == 10)
+            continue;
+          StringBuilder input = new StringBuilder();
+          input.append(decoded[a]);
+          input.append(decoded[b]);
+          input.append(decoded[c]);
+          StringBuilder expected = new StringBuilder();
+          expected.append(encoded_latin1[a]);
+          expected.append(encoded_latin1[b]);
+          expected.append(encoded_latin1[c]);
+          String output = codec.encode(input);
+          log.debug("{}\t-> {}", input, output);
+          Assert.assertEquals("\"" + input + "\" was encoded falsely", expected.toString(), output);
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testDecodeLatin1() throws Exception {
+    PercentCodec codec = new PercentCodec("latin1");
+
+    for (int a = 0; a < decoded.length; a++) {
+      for (int b = 0; b < decoded.length; b++) {
+        for (int c = 0; c < decoded.length; c++) {
+          /** Das Zeichen '€' existiert in Latin1 nicht! */
+          if (a == 10 || b == 10 || c == 10)
+            continue;
+          StringBuilder input = new StringBuilder();
+          input.append(encoded_latin1[a]);
+          input.append(encoded_latin1[b]);
+          input.append(encoded_latin1[c]);
+          StringBuilder expected = new StringBuilder();
+          expected.append(decoded[a]);
+          expected.append(decoded[b]);
+          expected.append(decoded[c]);
+          String output = codec.decode(input);
+          log.debug("{}\t-> {}", input, output);
+          Assert.assertEquals("\"" + input + "\" was decoded falsely", expected.toString(), output);
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testEncodeUtf8() throws Exception {
+    PercentCodec codec = new PercentCodec("UTF-8");
+
+    for (int a = 0; a < decoded.length; a++) {
+      for (int b = 0; b < decoded.length; b++) {
+        for (int c = 0; c < decoded.length; c++) {
+          StringBuilder input = new StringBuilder();
+          input.append(decoded[a]);
+          input.append(decoded[b]);
+          input.append(decoded[c]);
+          StringBuilder expected = new StringBuilder();
+          expected.append(encoded_utf8[a]);
+          expected.append(encoded_utf8[b]);
+          expected.append(encoded_utf8[c]);
+          String output = codec.encode(input);
+          log.debug("{}\t-> {}", input, output);
+          Assert.assertEquals("\"" + input + "\" was encoded falsely", expected.toString(), output);
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testDecodeUtf8() throws Exception {
+    PercentCodec codec = new PercentCodec("UTF-8");
+
+    for (int a = 0; a < decoded.length; a++) {
+      for (int b = 0; b < decoded.length; b++) {
+        for (int c = 0; c < decoded.length; c++) {
+          StringBuilder input = new StringBuilder();
+          input.append(encoded_utf8[a]);
+          input.append(encoded_utf8[b]);
+          input.append(encoded_utf8[c]);
+          StringBuilder expected = new StringBuilder();
+          expected.append(decoded[a]);
+          expected.append(decoded[b]);
+          expected.append(decoded[c]);
+          String output = codec.decode(input);
+          log.debug("{}\t-> {}", input, output);
+          Assert.assertEquals("\"" + input + "\" was decoded falsely", expected.toString(), output);
+        }
+      }
+    }
+  }
+}
diff --git a/utils/src/test/resources/log4j.xml b/utils/src/test/resources/log4j.xml

new file mode 100644 (file)

index 0000000..18822a5
--- /dev/null
+++ b/utils/src/test/resources/log4j.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE log4j:configuration PUBLIC "-//LOGGER" "log4j.dtd">
+
+<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
+
+       <!-- Appenders -->
+       <appender name="console" class="org.apache.log4j.ConsoleAppender">
+               <param name="Target" value="System.out" />
+               <layout class="org.apache.log4j.PatternLayout">
+                       <param name="ConversionPattern" value="%-5p: %c - %m%n" />
+               </layout>
+       </appender>
+
+       <root>
+               <priority value="debug" />
+               <appender-ref ref="console" />
+       </root>
+
+</log4j:configuration>
author	Kai Moritz <kai@coolibri.de>
	Sun, 10 Jul 2011 15:31:12 +0000 (17:31 +0200)
committer	Kai Moritz <kai@coolibri.de>
	Sat, 28 Jan 2012 12:03:13 +0000 (13:03 +0100)
pom.xml		patch \| blob \| history
utils/pom.xml	[new file with mode: 0644]	patch \| blob
utils/src/main/java/de/halbekunst/juplo/utils/PercentCodec.java	[new file with mode: 0644]	patch \| blob
utils/src/test/java/de/halbekunst/juplo/utils/PercentCodecTest.java	[new file with mode: 0644]	patch \| blob
utils/src/test/resources/log4j.xml	[new file with mode: 0644]	patch \| blob