Implemented aspect to deal with long/bad data in Facebook-Entries
authorKai Moritz <kai@juplo.de>
Thu, 8 Oct 2015 07:14:20 +0000 (09:14 +0200)
committerKai Moritz <kai@juplo.de>
Tue, 10 Nov 2015 14:51:10 +0000 (15:51 +0100)
pom.xml
src/main/java/de/juplo/facebook/aspects/Sanitize.java [new file with mode: 0644]
src/main/java/de/juplo/facebook/aspects/SanitizeAspect.java [new file with mode: 0644]

diff --git a/pom.xml b/pom.xml
index e5e0440..aa6fefd 100644 (file)
--- a/pom.xml
+++ b/pom.xml
@@ -58,6 +58,7 @@
 
     <!-- used versions -->
     <commons-codec.version>1.7</commons-codec.version>
+    <aspectj.version>1.8.5</aspectj.version>
     <jackson.version>2.6.2</jackson.version>
     <junit.version>4.12</junit.version>
     <logback.version>1.1.3</logback.version>
       <scope>provided</scope>
     </dependency>
 
+    <!-- AspectJ -->
+    <dependency>
+      <groupId>org.aspectj</groupId>
+      <artifactId>aspectjrt</artifactId>
+      <version>${aspectj.version}</version>
+      <scope>provided</scope>
+    </dependency>
+
     <!-- commons-codec -->
     <dependency>
       <groupId>commons-codec</groupId>
           <showWarnings>true</showWarnings>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>aspectj-maven-plugin</artifactId>
+        <version>1.8</version>
+        <configuration>
+          <complianceLevel>1.7</complianceLevel>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>compile</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-source-plugin</artifactId>
diff --git a/src/main/java/de/juplo/facebook/aspects/Sanitize.java b/src/main/java/de/juplo/facebook/aspects/Sanitize.java
new file mode 100644 (file)
index 0000000..0856a04
--- /dev/null
@@ -0,0 +1,18 @@
+package de.juplo.facebook.aspects;
+
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * @author Kai Moritz
+ */
+@Target({ ElementType.FIELD })
+@Retention(RetentionPolicy.RUNTIME)
+public @interface Sanitize
+{
+  int length() default 255; // in accordance to @Column(length)
+  boolean fail() default false;
+}
diff --git a/src/main/java/de/juplo/facebook/aspects/SanitizeAspect.java b/src/main/java/de/juplo/facebook/aspects/SanitizeAspect.java
new file mode 100644 (file)
index 0000000..4bdf908
--- /dev/null
@@ -0,0 +1,93 @@
+package de.juplo.facebook.aspects;
+
+
+import org.aspectj.lang.ProceedingJoinPoint;
+import org.aspectj.lang.annotation.Around;
+import org.aspectj.lang.annotation.Aspect;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+
+/**
+ * @author Kai Moritz
+ */
+@Aspect
+public class SanitizeAspect
+{
+  private static final Logger log =
+      LoggerFactory.getLogger(SanitizeAspect.class);
+
+
+  /**
+   * This method sanitizes the given string in all means:
+   * <ul>
+   * <li>It removes leading and trailing whitspace.</li>
+   * <li>It removes characters, that are not allowed in the XML-output</li>
+   * <li>It checks the allowed length of the string</li>
+   * </ul>
+   *
+   * This method ensures that the output String has only
+   * valid XML unicode characters as specified by the
+   * XML 1.0 standard. For reference, please see
+   * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the
+   * standard</a>. This method will return an empty
+   * String if the input is null or empty.
+   *
+   * @param jp The join-point captured by AspectJ.
+   * @param in The String whose non-valid characters we want to remove.
+   * @param sanitize The annotation, the field was marked with.
+   * @see <a href="http://blog.mark-mclaren.info/2007/02/invalid-xml-characters-when-valid-utf8_5873.html">Invalid XML Characters: when valid UTF8 does not mean valid XML</a>
+   * @see <a href="http://up-download.de/up/docs/werkstatt/de/intrexx-werkstatt-ungueltige-zeichen-in-eingabefeldern-abfangen.pdf">Ungültige Zeichen in Eingabefeldern abfangen</a>
+   */
+  @Around("set(String *) && args(in) && @annotation(sanitize)")
+  public void sanitize(
+      ProceedingJoinPoint jp,
+      String in,
+      Sanitize sanitize
+      )
+      throws Throwable
+  {
+    if (in == null)
+    {
+      jp.proceed(new Object[] { null });
+      return;
+    }
+
+    in = in.trim();
+    if ("".equals(in))
+    {
+      jp.proceed(new Object[] { null });
+      return;
+    }
+
+    StringBuilder out = new StringBuilder(); // Used to hold the output.
+    char current; // Used to reference the current character.
+
+    for (int i = 0; i < in.length(); i++)
+    {
+      current = in.charAt(i); // NOTE: No IndexOutOfBoundsException caught here; it should not happen.
+      if ((current == 0x9) ||
+          (current == 0xA) ||
+          (current == 0xD) ||
+          ((current >= 0x20) && (current <= 0xD7FF)) ||
+          ((current >= 0xE000) && (current <= 0xFFFD)) ||
+          ((current >= 0x10000) && (current <= 0x10FFFF)))
+        out.append(current);
+    }
+    if (out.length() > sanitize.length())
+    {
+      log.error(
+          "Maximum length for attribute {} exceeded: should={}, was={}",
+          jp.getSignature().getName(),
+          sanitize.length(),
+          out.length()
+          );
+      if (sanitize.fail())
+        throw new RuntimeException("String is longer than " + sanitize.length());
+      else
+        out.setLength(sanitize.length());
+    }
+    jp.proceed(new Object[] { out.toString() });
+  }
+}