View Javadoc
1   package de.juplo.httpresources;
2   
3   import org.slf4j.Logger;
4   import org.slf4j.LoggerFactory;
5   import org.springframework.cache.Cache;
6   import org.springframework.http.HttpHeaders;
7   import org.springframework.http.HttpMethod;
8   import org.springframework.http.HttpStatus;
9   import org.springframework.http.client.ClientHttpRequest;
10  import org.springframework.http.client.ClientHttpRequestFactory;
11  import org.springframework.http.client.ClientHttpResponse;
12  import org.springframework.util.FileCopyUtils;
13  
14  import java.io.IOException;
15  import java.io.InputStream;
16  import java.net.URI;
17  import java.time.Clock;
18  import java.util.HashMap;
19  import java.util.Map;
20  import java.util.function.Predicate;
21  
22  import static org.springframework.http.HttpHeaders.*;
23  
24  public class HttpResourceFetcher
25  {
26    private final static Logger LOG =
27            LoggerFactory.getLogger(HttpResourceFetcher.class);
28  
29    public final static int DEFAULT_TTL = 3600;
30  
31    private final ClientHttpRequestFactory requestFactory;
32    private final Cache cache;
33    private final Clock clock;
34  
35    private final Predicate<String> filter;
36  
37    private final int defaultTTL;
38    private final int minTTL;
39  
40    private final boolean serveStale;
41  
42  
43    public HttpResourceFetcher(ClientHttpRequestFactory factory, Cache cache, Clock clock)
44    {
45      this(factory, cache, clock, DEFAULT_TTL, 0, true);
46    }
47  
48    /**
49     *
50     * @param factory
51     * @param clock
52     * @param defaultTTL default time to live in milliseconds
53     * Default TTL, if no according information is present in the HTTP-headers.
54     * If set to <code>0</code>, caching will be disabled, if no according
55     * HTTP-headers are present.
56     * @param minTTL the minimum time to live, if caching is enabled
57     * A minimum TTL, that will overwrite the time to live, that was extracted
58     * from the HTTP-headers.
59     * The minimum TTL is only applied, if the default TTL is set to a value
60     * greater than zero, hence enabling caching by default.
61     * @param serveStale
62     */
63    public HttpResourceFetcher(
64        ClientHttpRequestFactory factory,
65        Cache cache,
66        Clock clock,
67        int defaultTTL,
68        int minTTL,
69        boolean serveStale)
70    {
71      this.requestFactory = factory;
72      this.cache = cache;
73      this.clock = clock;
74      this.defaultTTL = defaultTTL;
75      this.minTTL = minTTL;
76      this.serveStale = serveStale;
77  
78      /** Filter may pass on more headers and may also be configurable later */
79      filter = (key) -> CONTENT_TYPE.equalsIgnoreCase(key);
80    }
81  
82  
83    /**
84     * Fetches the remote resource and reports, if it was modified.
85     *
86     * This method fetches the remote resource, if was not already fetched.
87     * If the resource was already fetched, it revalidates it, if necessary.
88     *
89     * @return {@code true}, if the resource has changed or was fetched for
90     * the first time, otherwise {@code false}
91     */
92    public HttpData./de/juplo/httpresources/HttpData.html#HttpData">HttpData fetch(URI uri, HttpData data)
93    {
94      HttpData cached = cache.get(uri, HttpData.class);
95      if (cached != null && cached != data)
96      {
97        if (cached.expires >= clock.millis())
98          return cached;
99        else
100         data = cached;
101     }
102 
103     boolean has_existed = data.content != null;
104     boolean must_revalidate = data.revalidate;
105 
106     ClientHttpRequest request = createGetRequest(uri);
107     if (has_existed)
108     {
109       if (data.eTag != null)
110         request.getHeaders().setIfNoneMatch(data.eTag);
111       if (data.lastModified > 0)
112         request.getHeaders().setIfModifiedSince(data.lastModified);
113     }
114 
115     HttpStatus status = HttpStatus.INTERNAL_SERVER_ERROR;
116     long request_time = clock.millis();
117 
118     try (ClientHttpResponse response = request.execute())
119     {
120       status = response.getStatusCode();
121 
122       LOG.debug("{} -- {}", status, uri);
123 
124       switch (status)
125       {
126         case OK:
127 
128           InputStream body = response.getBody();
129           byte[] content = FileCopyUtils.copyToByteArray(body);
130 
131           data = readCachingHeaders(content, response, response.getHeaders(), request_time, defaultTTL);
132           cache.put(uri, data);
133           return data;
134 
135         case NOT_MODIFIED:
136 
137           data = readCachingHeaders(data.content, response, data.headers, request_time, defaultTTL);
138           cache.put(uri, data);
139           return data;
140 
141 
142         case NOT_FOUND:
143 
144           // If stale data is served, 404-responses for already cached
145           // resources are quietly ignored
146           if (!serveStale)
147           {
148             data = readCachingHeaders(null, response, response.getHeaders(), request_time, 0);
149           }
150 
151           cache.put(uri, data);
152           return data;
153 
154         default:
155 
156           // TODO:
157           // A client has at least to distinguish the status-classes, denoted
158           // by the first digit of the code.
159           // See: https://tools.ietf.org/html/rfc7231#section-6
160       }
161     }
162     catch (IOException e)
163     {
164       LOG.error("Cannot retrieve {}: {}", uri, e.toString());
165     }
166 
167 
168     if ((data.content == null) || (data.revalidate && !serveStale))
169     {
170       data = HttpData.SERVER_ERROR;
171     }
172     else
173     {
174       // An existing resource can be considered still existent, if the
175       // request fails and the resource does not have to be revalidated.
176       // It will also be considered as still existent, if revalidation is
177       // enforced through the HTTP-protocol, but serving stale data is
178       // enabled.
179     }
180 
181     cache.put(uri, data);
182     return data;
183   }
184 
185 
186   private HttpData readCachingHeaders(
187       byte[] content,
188       ClientHttpResponse response,
189       HttpHeaders headers,
190       long request_time,
191       long defaultTTL
192       )
193   {
194     HttpHeaders responseHeaders = response.getHeaders();
195     // Calculating Freshness Lifetime
196     // See: https://tools.ietf.org/html/rfc7234#section-4.2.1
197 
198     long now = clock.millis();
199 
200     long date_value = responseHeaders.getDate();
201     long apparent_age = 0L;
202 
203     if (date_value < 0L)
204     {
205       // Use the current time, if no date-header is present
206       // See: https://tools.ietf.org/html/rfc7231#section-7.1.1.2
207       date_value = now;
208     }
209     else
210     {
211       apparent_age = now - date_value;
212       apparent_age = apparent_age < 0 ? 0 : apparent_age;
213     }
214 
215     long corrected_age_value = 0L;
216     String value = responseHeaders.getFirst(AGE);
217     if (value != null)
218     {
219       try
220       {
221         long response_delay = now - request_time;
222         long age_value = Long.parseLong(value) * 1000; // convert s to ms
223         age_value = age_value < 0 ? 0 : age_value;
224         corrected_age_value = age_value + response_delay;
225       }
226       catch (NumberFormatException e) {}
227     }
228 
229     long corrected_initial_age =
230         apparent_age > corrected_age_value
231             ? apparent_age
232             : corrected_age_value;
233 
234     long lastModified = responseHeaders.getLastModified();
235     lastModified = lastModified < 0L ? 0 : lastModified;
236 
237     String eTag = responseHeaders.getETag();
238 
239 
240     Map<String, String> directives = new HashMap<>();
241     for (String field : responseHeaders.getOrEmpty(CACHE_CONTROL))
242     {
243       for (String directive : field.split("\\s*,\\s*"))
244       {
245         String[] splitted = directive.split("\\s*=\\s*");
246         if (splitted.length > 2)
247         {
248           LOG.warn("Ingoring garbled directive: {}", directive);
249           continue;
250         }
251         directives.put(
252             splitted[0].trim(),
253             splitted.length == 2 ? splitted[1] : null
254              );
255       }
256     }
257 
258     long ttl = -1l;
259     try
260     {
261       String maxAge = directives.get("max-age");
262       if (maxAge != null)
263         ttl = Long.parseUnsignedLong(maxAge) *1000;
264     }
265     catch (NumberFormatException e) {}
266 
267     boolean revalidate = directives.containsKey("must-revalidate");
268     long expires = 0l;
269 
270     if (!directives.containsKey("no-cache") ||
271         directives.get("no-cache") != null
272         )
273     {
274       if (ttl < 0)
275       {
276         if (responseHeaders.get(EXPIRES) != null)
277         {
278           expires = responseHeaders.getExpires();
279           // If the "Expires"-header cannot be parsed to a valid date, it has
280           // to be interpreted as a time in the past.
281           // See: https://tools.ietf.org/html/rfc7234#section-5.3
282           expires = expires == -1 ? 0 : expires;
283           return new HttpData(content, filter(headers), revalidate, lastModified, expires, eTag);
284         }
285 
286         ttl = defaultTTL;
287       }
288 
289       expires = date_value + (minTTL > ttl && defaultTTL > 0 ? minTTL : ttl) - corrected_initial_age;
290       return new HttpData(content, filter(headers), revalidate, lastModified, expires, eTag);
291     }
292 
293     expires = minTTL == 0 || defaultTTL == 0 ? date_value : date_value + minTTL - corrected_initial_age;
294     return new HttpData(content, filter(headers), revalidate, lastModified, expires, eTag);
295   }
296 
297   private HttpHeaders filter(HttpHeaders headers)
298   {
299     HttpHeaders result = new HttpHeaders();
300     headers.forEach((key, value) ->
301     {
302       if (filter.test(key))
303         result.put(key, value);
304     });
305     return result;
306   }
307 
308   private ClientHttpRequest createGetRequest(URI uri)
309   {
310     try
311     {
312       ClientHttpRequest request = requestFactory.createRequest(uri, HttpMethod.GET);
313       request.getHeaders().set(ACCEPT_ENCODING, "gzip");
314       return request;
315     }
316     catch (IOException e)
317     {
318       throw new RuntimeException(e);
319     }
320   }
321 }