Implemented aspect to deal with long/bad data in Facebook-Entries
[facebook-errors] / src / main / java / de / juplo / facebook / aspects / SanitizeAspect.java
diff --git a/src/main/java/de/juplo/facebook/aspects/SanitizeAspect.java b/src/main/java/de/juplo/facebook/aspects/SanitizeAspect.java
new file mode 100644 (file)
index 0000000..4bdf908
--- /dev/null
@@ -0,0 +1,93 @@
+package de.juplo.facebook.aspects;
+
+
+import org.aspectj.lang.ProceedingJoinPoint;
+import org.aspectj.lang.annotation.Around;
+import org.aspectj.lang.annotation.Aspect;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+
+/**
+ * @author Kai Moritz
+ */
+@Aspect
+public class SanitizeAspect
+{
+  private static final Logger log =
+      LoggerFactory.getLogger(SanitizeAspect.class);
+
+
+  /**
+   * This method sanitizes the given string in all means:
+   * <ul>
+   * <li>It removes leading and trailing whitspace.</li>
+   * <li>It removes characters, that are not allowed in the XML-output</li>
+   * <li>It checks the allowed length of the string</li>
+   * </ul>
+   *
+   * This method ensures that the output String has only
+   * valid XML unicode characters as specified by the
+   * XML 1.0 standard. For reference, please see
+   * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the
+   * standard</a>. This method will return an empty
+   * String if the input is null or empty.
+   *
+   * @param jp The join-point captured by AspectJ.
+   * @param in The String whose non-valid characters we want to remove.
+   * @param sanitize The annotation, the field was marked with.
+   * @see <a href="http://blog.mark-mclaren.info/2007/02/invalid-xml-characters-when-valid-utf8_5873.html">Invalid XML Characters: when valid UTF8 does not mean valid XML</a>
+   * @see <a href="http://up-download.de/up/docs/werkstatt/de/intrexx-werkstatt-ungueltige-zeichen-in-eingabefeldern-abfangen.pdf">Ungültige Zeichen in Eingabefeldern abfangen</a>
+   */
+  @Around("set(String *) && args(in) && @annotation(sanitize)")
+  public void sanitize(
+      ProceedingJoinPoint jp,
+      String in,
+      Sanitize sanitize
+      )
+      throws Throwable
+  {
+    if (in == null)
+    {
+      jp.proceed(new Object[] { null });
+      return;
+    }
+
+    in = in.trim();
+    if ("".equals(in))
+    {
+      jp.proceed(new Object[] { null });
+      return;
+    }
+
+    StringBuilder out = new StringBuilder(); // Used to hold the output.
+    char current; // Used to reference the current character.
+
+    for (int i = 0; i < in.length(); i++)
+    {
+      current = in.charAt(i); // NOTE: No IndexOutOfBoundsException caught here; it should not happen.
+      if ((current == 0x9) ||
+          (current == 0xA) ||
+          (current == 0xD) ||
+          ((current >= 0x20) && (current <= 0xD7FF)) ||
+          ((current >= 0xE000) && (current <= 0xFFFD)) ||
+          ((current >= 0x10000) && (current <= 0x10FFFF)))
+        out.append(current);
+    }
+    if (out.length() > sanitize.length())
+    {
+      log.error(
+          "Maximum length for attribute {} exceeded: should={}, was={}",
+          jp.getSignature().getName(),
+          sanitize.length(),
+          out.length()
+          );
+      if (sanitize.fail())
+        throw new RuntimeException("String is longer than " + sanitize.length());
+      else
+        out.setLength(sanitize.length());
+    }
+    jp.proceed(new Object[] { out.toString() });
+  }
+}