View Javadoc
1   /**
2    * 
3    */
4   package org.archive.wayback.resourceindex.cdxserver;
5   
6   import java.io.IOException;
7   import java.io.PrintWriter;
8   import java.io.StringWriter;
9   import java.util.ArrayList;
10  import java.util.Arrays;
11  import java.util.Collections;
12  import java.util.List;
13  import java.util.logging.Level;
14  import java.util.logging.Logger;
15  
16  import javax.servlet.http.Cookie;
17  import javax.servlet.http.HttpServletRequest;
18  import javax.servlet.http.HttpServletResponse;
19  
20  import junit.framework.TestCase;
21  
22  import org.archive.cdxserver.CDXQuery;
23  import org.archive.cdxserver.CDXServer;
24  import org.archive.cdxserver.auth.AuthToken;
25  import org.archive.cdxserver.auth.PrivTokenAuthChecker;
26  import org.archive.cdxserver.writer.CDXWriter;
27  import org.archive.cdxserver.writer.HttpCDXWriter;
28  import org.archive.format.cdx.CDXFieldConstants;
29  import org.archive.format.cdx.CDXLine;
30  import org.archive.format.cdx.FieldSplitFormat;
31  import org.archive.format.gzip.zipnum.ZipNumCluster;
32  import org.archive.format.gzip.zipnum.ZipNumParams;
33  import org.archive.util.iterator.CloseableIterator;
34  import org.archive.wayback.accesscontrol.robotstxt.redis.RedisRobotExclusionFilterFactory;
35  import org.archive.wayback.core.CaptureSearchResult;
36  import org.archive.wayback.core.CaptureSearchResults;
37  import org.archive.wayback.core.SearchResults;
38  import org.archive.wayback.core.WaybackRequest;
39  import org.archive.wayback.exception.ResourceNotInArchiveException;
40  import org.archive.wayback.exception.RobotAccessControlException;
41  import org.archive.wayback.resourceindex.filters.ExclusionFilter;
42  import org.archive.wayback.util.ObjectFilter;
43  import org.archive.wayback.util.WrappedCloseableIterator;
44  import org.archive.wayback.util.url.KeyMakerUrlCanonicalizer;
45  import org.archive.wayback.webapp.PerfStats;
46  import org.easymock.EasyMock;
47  import org.easymock.IAnswer;
48  
49  /**
50   * Test {@link EmbeddedCDXServerIndex}.
51   * @author Kenji Nagahashi
52   *
53   */
54  public class EmbeddedCDXServerIndexTest extends TestCase {
55  	
56  	/**
57  	 * fixture CDXServer (unnecessary if CDServer was an interface).
58  	 * <p>
59  	 * Note: {@code testHandleRequest} and {@code testRenderMementoTimemap} uses
60  	 * {@link CDXServer#getCdx(HttpServletRequest, HttpServletResponse, CDXQuery)},
61  	 * which eventually calls {@link #getCdx(CDXQuery, AuthToken, CDXWriter)} here.
62  	 * </p>
63  	 */
64  	public static class TestCDXServer extends CDXServer {
65  		public List<Object[]> capturedArgs = new ArrayList<Object[]>();
66  		public CDXLine[] cdxLines;
67  		
68  		@Override
69  		public void getCdx(CDXQuery query, AuthToken authToken,
70  				CDXWriter responseWriter) throws IOException {
71  			capturedArgs.add(new Object[] { query, authToken, responseWriter });
72  			
73  			responseWriter.begin();
74  			for (CDXLine cdxLine : cdxLines) {
75  				responseWriter.writeLine(cdxLine);
76  			}
77  			responseWriter.end();
78  		}
79  		
80  		public void clearCapturedArgs() {
81  			capturedArgs.clear();
82  		}
83  	}
84  
85  	EmbeddedCDXServerIndex cut;
86  	TestCDXServer testCDXServer;
87  	
88  	/* (non-Javadoc)
89  	 * @see junit.framework.TestCase#setUp()
90  	 */
91  	protected void setUp() throws Exception {
92  		cut = new EmbeddedCDXServerIndex();
93  		cut.setCanonicalizer(new KeyMakerUrlCanonicalizer());
94  		cut.setCdxServer(testCDXServer = new TestCDXServer());
95  
96  		Logger.getLogger(PerfStats.class.getName()).setLevel(Level.WARNING);
97  	}
98  	
99  	/**
100 	 * Set CDX lines TestCDXServer stub returns.
101 	 * Lines are parsed with {@link CDXFieldConstants#CDX_ALL_NAMES}.
102 	 * Note {@link EmbeddedCDXServerIndex#query} will throw {@link ResourceNotInArchiveException}
103 	 * if {@code lines} is empty.
104 	 * @param lines text CDX lines
105 	 */
106 	protected void setCdxLines(String... lines) {
107 		// urlkey, timestamp, original, mimetype, statuscode, digest, redirect,
108 		// robotflags, length, offset, filename.
109 		final FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES;
110 		testCDXServer.cdxLines = new CDXLine[lines.length];
111 		int i = 0;
112 		for (String line : lines) {
113 			testCDXServer.cdxLines[i++] = new CDXLine(line, fmt);
114 		}
115 	}
116 
117 	// === sample cdx lines ===
118 	
119 	final String CDXLINE1 = "com,example)/ 20101124000000 http://example.com/ text/html 200" +
120 			" ABCDEFGHIJKLMNOPQRSTUVWXYZ012345 - - 2000 0 /a/a.warc.gz";
121 	// for testing ignore-robots
122 	final String CDXLINE2 = "com,norobots)/ 20101124000000 http://example.com/ text/html 200" +
123 			" ABCDEFGHIJKLMNOPQRSTUVWXYZ012345 - - 2000 0 /a/a.warc.gz";
124 	/**
125 	 * capture search. basic options.
126 	 * @throws Exception
127 	 */
128 	public void testQuery() throws Exception {
129 		WaybackRequest wbr = new WaybackRequest();
130 		wbr.setRequestUrl("http://example.com/");
131 		wbr.setCaptureQueryRequest();
132 		
133 		// urlkey, timestamp, original, mimetype, statuscode, digest, redirect, robotflags,
134 		// length, offset, filename.
135 		FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES;
136 		testCDXServer.cdxLines = new CDXLine[] {
137 				new CDXLine(CDXLINE1, fmt)
138 		};
139 		
140 		SearchResults sr = cut.query(wbr);
141 		
142 		assertEquals(1,  sr.getReturnedCount());
143 		
144 		assertEquals(1, testCDXServer.capturedArgs.size());
145 		
146 		Object[] args = testCDXServer.capturedArgs.get(0);
147 		CDXQuery query = (CDXQuery)args[0];
148 		String[] filter = query.getFilter();
149 		assertEquals(1, filter.length);
150 		assertEquals("!statuscode:(500|502|504)", filter[0]);
151 		
152 		AuthToken authToken = (AuthToken)args[1];
153 		assertFalse(authToken.isIgnoreRobots());
154 	}
155 	/**
156 	 * {@link EmbeddedCDXServerIndex} resolves revisits for replay requests.
157 	 * (This is actually a test of {@link CDXToCaptureSearchResultsWriter}.)
158 	 * @throws Exception
159 	 */
160 	public void testRevisitResolution() throws Exception {
161 		WaybackRequest wbr = WaybackRequest.createReplayRequest(
162 			"http://example.com/", "20101125000000", null, null);
163 		setCdxLines(
164 			"com,example)/ 20101124000000 http://example.com/ text/html 200" +
165 					" XXXX - - 2000 0 /a/a.warc.gz",
166 			"com,example)/ 20101125000000 http://example.com/ warc/revisit 200" +
167 					" XXXX - - 2000 0 /a/b.warc.gz",
168 			"com,example)/ 20101126000000 http://example.com/ text/html 200" +
169 					" XXXX - - 2000 0 /a/c.warc.gz"
170 				);
171 		SearchResults sr = cut.query(wbr);
172 
173 		assertEquals(3, sr.getReturnedCount());
174 
175 		CaptureSearchResults results = (CaptureSearchResults)sr;
176 		List<CaptureSearchResult> list = results.getResults();
177 		CaptureSearchResult capture2 = list.get(1);
178 		assertEquals("20101125000000", capture2.getCaptureTimestamp());
179 		assertEquals("20101124000000", capture2.getDuplicateDigestStoredTimestamp());
180 		assertEquals("/a/a.warc.gz", capture2.getDuplicatePayloadFile());
181 		assertEquals(0, (long)capture2.getDuplicatePayloadOffset());
182 		assertEquals(2000, capture2.getDuplicatePayloadCompressedLength());
183 
184 		assertSame(list.get(0), capture2.getDuplicatePayload());
185 	}
186 
187 	/**
188 	 * {@link CDXToCaptureSearchResultsWriter} resolves revisits for replay requests
189 	 * (reverse order input mode) (Test of {@link CDXToCaptureSearchResultsWriter}.)
190 	 * <p>Since there's no way to put {@code CDXToCaptureSearchResultsWriter}'s in reverse
191 	 * mode, this test calls {@code CDXToCaptureSearchResultWriter} directly.</p>
192 	 * <p>In other words, its reverse mode is never used in practice.</p>
193 	 * @throws Exception
194 	 */
195 	public void testRevisitResolutionReverse() throws Exception {
196 		WaybackRequest wbr = WaybackRequest.createReplayRequest(
197 			"http://example.com/", "20101125000000", null, null);
198 		final String[] CDXLINES = {
199 			"com,example)/ 20101124000000 http://example.com/ text/html 200" +
200 					" XXXX - - 2000 0 /a/a.warc.gz",
201 			"com,example)/ 20101125000000 http://example.com/ warc/revisit 200" +
202 					" XXXX - - 2000 0 /a/b.warc.gz",
203 			"com,example)/ 20101126000000 http://example.com/ text/html 200" +
204 					" XXXX - - 2000 0 /a/c.warc.gz"
205 		};
206 		CDXQuery query = new CDXQuery(wbr.getRequestUrl());
207 		query.setSort(CDXQuery.SortType.reverse);
208 		assertTrue(query.isReverse());
209 		CDXToCaptureSearchResultsWriter cdxw = new CDXToCaptureSearchResultsWriter(query, true, false, null);
210 
211 		final FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES;
212 		cdxw.begin();
213 		// feed in reverse order
214 		for (int i = CDXLINES.length; i > 0; i--) {
215 			CDXLine line = new CDXLine(CDXLINES[i - 1], fmt);
216 			cdxw.trackLine(line);
217 			cdxw.writeLine(line);
218 		}
219 		cdxw.end();
220 
221 		CaptureSearchResults results = cdxw.getSearchResults();
222 
223 		assertEquals(3, results.getReturnedCount());
224 
225 		List<CaptureSearchResult> list = results.getResults();
226 
227 		CaptureSearchResult capture1 = list.get(0);
228 		// CDXToCaptureSearchResultWriter returns CaptureSearchResult's in chronological
229 		// order (oldest to newer), even when query.isReverse() == true.
230 		assertEquals("20101124000000", capture1.getCaptureTimestamp());
231 
232 		CaptureSearchResult capture2 = list.get(1);
233 		assertEquals("20101125000000", capture2.getCaptureTimestamp());
234 		assertEquals("20101124000000", capture2.getDuplicateDigestStoredTimestamp());
235 		assertEquals("/a/a.warc.gz", capture2.getDuplicatePayloadFile());
236 		assertEquals(0, (long)capture2.getDuplicatePayloadOffset());
237 		assertEquals(2000, capture2.getDuplicatePayloadCompressedLength());
238 
239 		assertSame(capture1, capture2.getDuplicatePayload());
240 	}
241 
242 	/**
243 	 * Test of soft-block feature (regular replay).
244 	 * capture with "X" in {@code robotflags} field does not make its way
245 	 * into {@code CaptureSearchResults}, but still available as payload
246 	 * capture for revisits.
247 	 * @throws Exception
248 	 */
249 	public void testSoftBlock() throws Exception {
250 		WaybackRequest wbr = WaybackRequest.createReplayRequest(
251 			"http://example.com/", "20101125000000", null, null);
252 		setCdxLines(
253 			"com,example)/ 20101124000000 http://example.com/ text/html 200" +
254 					" XXXX - X 2000 0 /a/a.warc.gz",
255 			"com,example)/ 20101125000000 http://example.com/ warc/revisit 200" +
256 					" XXXX - - 2000 0 /a/b.warc.gz",
257 			"com,example)/ 20101126000000 http://example.com/ text/html 200" +
258 					" XXXX - - 2000 0 /a/c.warc.gz"
259 				);
260 		CaptureSearchResults results = (CaptureSearchResults)cut.query(wbr);
261 
262 		assertEquals(2, results.getReturnedCount());
263 
264 		// first line is excluded
265 		List<CaptureSearchResult> list = results.getResults();
266 		assertEquals(2, list.size());
267 
268 		CaptureSearchResult capture1 = list.get(0);
269 		assertEquals("20101125000000", capture1.getCaptureTimestamp());
270 
271 		CaptureSearchResult capture2 = list.get(1);
272 		assertEquals("20101126000000", capture2.getCaptureTimestamp());
273 
274 		// but revisit is resolved to the first line.
275 		assertEquals("20101124000000", capture1.getDuplicateDigestStoredTimestamp());
276 		assertEquals("/a/a.warc.gz", capture1.getDuplicatePayloadFile());
277 		assertEquals(0, (long)capture1.getDuplicatePayloadOffset());
278 		assertEquals(2000, capture1.getDuplicatePayloadCompressedLength());
279 
280 		// payload capture is available via duplicatePayload
281 		CaptureSearchResult captureX = capture1.getDuplicatePayload();
282 		assertNotNull(captureX);
283 		assertEquals("20101124000000", captureX.getCaptureTimestamp());
284 
285 		// test if capture1 pretends to be an ordinary capture.
286 		// we want to hide the fact that it's content is coming from
287 		// blocked capture (this is actually a test of CaptereSearchResult.)
288 		assertFalse(capture1.isDuplicateDigest());
289 }
290 
291 	/**
292 	 * Supplementary test for soft-block feature.
293 	 * Modification to {@code robotflags} made by {@code exclusionFilter} must be
294 	 * properly recognized. As baseline {@code EmbeddedCDXServerIndex} does not have
295 	 * setting up {@code exclusionFilter}, this test deals with
296 	 * {@link CDXToCaptureSearchResultsWriter} directly.
297 	 * @throws Exception
298 	 */
299 	public void testSoftBlock_fieldModificationRecognized() throws Exception {
300 		WaybackRequest wbr = WaybackRequest.createReplayRequest(
301 			"http://example.com/", "20101125000000", null, null);
302 		final String[] CDXLINES = {
303 			// note this line has no "X" in robotflags field (compare with test above)
304 			"com,example)/ 20101124000000 http://example.com/ text/html 200" +
305 					" XXXX - - 2000 0 /a/a.warc.gz",
306 			"com,example)/ 20101125000000 http://example.com/ warc/revisit 200" +
307 					" XXXX - - 2000 0 /a/b.warc.gz",
308 			"com,example)/ 20101126000000 http://example.com/ text/html 200" +
309 					" XXXX - - 2000 0 /a/c.warc.gz"
310 		};
311 		CDXQuery query = new CDXQuery(wbr.getRequestUrl());
312 		ExclusionFilter exclusionFilter = new ExclusionFilter() {
313 			@Override
314 			public int filterObject(CaptureSearchResult o) {
315 				if (o.getCaptureTimestamp().startsWith("20101124")) {
316 					o.setRobotFlag(CaptureSearchResult.CAPTURE_ROBOT_BLOCKED);
317 				}
318 				return FILTER_INCLUDE;
319 			}
320 		};
321 		CDXToCaptureSearchResultsWriter cdxw = new CDXToCaptureSearchResultsWriter(query, true, false, null);
322 		cdxw.setExclusionFilter(exclusionFilter);
323 
324 		final FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES;
325 		cdxw.begin();
326 		for (String l : CDXLINES) {
327 			CDXLine line = new CDXLine(l, fmt);
328 			cdxw.trackLine(line);
329 			cdxw.writeLine(line);
330 		}
331 		cdxw.end();
332 
333 		CaptureSearchResults results = cdxw.getSearchResults();
334 
335 		// first capture will be removed from the result.
336 		assertEquals(2, results.getReturnedCount());
337 
338 		List<CaptureSearchResult> list = results.getResults();
339 
340 		CaptureSearchResult capture1 = list.get(0);
341 
342 		assertEquals("20101125000000", capture1.getCaptureTimestamp());
343 
344 		CaptureSearchResult captureX = capture1.getDuplicatePayload();
345 		assertNotNull(captureX);
346 		assertEquals("20101124000000", captureX.getCaptureTimestamp());
347 		// modification to robotflags field made by ExclusionFilter must be reflected
348 		// in capture1.
349 		assertEquals("X", captureX.getRobotFlags());
350 	}
351 
352 	/**
353 	 * Test of soft-block feature (URL-agnostic revisit payload lookup).
354 	 * In revisit payload lookup mode, capture with "X" is returned.
355 	 * @throws Exception
356 	 */
357 	public void testSoftBlock_revisitPayloadLookup() throws Exception {
358 		WaybackRequest wbr = WaybackRequest.createReplayRequest(
359 			"http://example.com/", "20101124000000", null, null);
360 		wbr.put(EmbeddedCDXServerIndex.REQUEST_REVISIT_LOOKUP, "true");
361 		setCdxLines(
362 			"com,example)/ 20101124000000 http://example.com/ text/html 200" +
363 					" XXXX - X 2000 0 /a/a.warc.gz",
364 			"com,example)/ 20101125000000 http://example.com/ warc/revisit 200" +
365 					" XXXX - - 2000 0 /a/b.warc.gz",
366 			"com,example)/ 20101126000000 http://example.com/ text/html 200" +
367 					" XXXX - - 2000 0 /a/c.warc.gz"
368 				);
369 		CaptureSearchResults results = (CaptureSearchResults)cut.query(wbr);
370 
371 		CaptureSearchResult capture1 = results.getResults().get(0);
372 		assertEquals("20101124000000", capture1.getCaptureTimestamp());
373 		assertSame(capture1, results.getClosest());
374 	}
375 
376 	/**
377 	 * quick test of {@link EmbeddedCDXServerIndex#buildStatusFilter(String)}
378 	 */
379 	public void testBuildStatusFilter() {
380 		final String[][] CASES = new String[][] {
381 				{ "!500", "!statuscode:500" },
382 				{ "! 400|500|502 ", "!statuscode:400|500|502" },
383 				{ "[23]..", "statuscode:[23].." },
384 				{ "! ", "" },
385 				{ "", "" },
386 				{ null, "" }
387 		};
388 		for (String[] c : CASES) {
389 			assertEquals(c[1], EmbeddedCDXServerIndex.buildStatusFilter(c[0]));
390 		}
391 	}
392 	
393 	/**
394 	 * test of {@link EmbeddedCDXServerIndex#setBaseStatusRegexp(String)}
395 	 * @throws Exception
396 	 */
397 	public void testQueryWithCustomStatusFilter() throws Exception {
398 		WaybackRequest wbr = new WaybackRequest();
399 		wbr.setRequestUrl("http://example.com/");
400 		wbr.setCaptureQueryRequest();
401 		
402 		// urlkey, timestamp, original, mimetype, statuscode, digest, redirect, robotflags,
403 		// length, offset, filename.
404 		setCdxLines(CDXLINE1);
405 		
406 		cut.setBaseStatusRegexp("");
407 		{
408 			@SuppressWarnings("unused")
409 			SearchResults sr = cut.query(wbr);
410 
411 			assertEquals(1, testCDXServer.capturedArgs.size());
412 
413 			Object[] args = testCDXServer.capturedArgs.get(0);
414 			CDXQuery query = (CDXQuery)args[0];
415 			String[] filter = query.getFilter();
416 			assertNull("there should be no filter", filter);
417 		}
418 		
419 		testCDXServer.clearCapturedArgs();
420 		cut.setBaseStatusRegexp("!500");
421 		{
422 			@SuppressWarnings("unused")
423 			SearchResults sr = cut.query(wbr);
424 
425 			assertEquals(1, testCDXServer.capturedArgs.size());
426 			
427 			Object[] args = testCDXServer.capturedArgs.get(0);
428 			CDXQuery query = (CDXQuery)args[0];
429 			String[] filter = query.getFilter();
430 			assertEquals(1, filter.length);
431 			assertEquals("!statuscode:500", filter[0]);
432 		}
433 	}
434 	
435 	/**
436 	 * for those SURT prefixes in {@code ignoreRobotsPaths}, 
437 	 * {@link AuthToken#isIgnoreRobots()} flag is set.
438 	 * @throws Exception
439 	 */
440 	public void testIgnoreRobotPaths() throws Exception {
441 		cut.setIgnoreRobotPaths(Arrays.asList(new String[]{ "com,norobots" }));
442 		WaybackRequest wbr = new WaybackRequest();
443 		wbr.setRequestUrl("http://norobots.com/");
444 		wbr.setCaptureQueryRequest();
445 		
446 		// urlkey, timestamp, original, mimetype, statuscode, digest, redirect, robotflags,
447 		// length, offset, filename.
448 		setCdxLines(CDXLINE2);
449 		
450 		@SuppressWarnings("unused")
451 		SearchResults sr = cut.query(wbr);
452 		
453 		assertEquals(1, testCDXServer.capturedArgs.size());
454 		
455 		Object[] args = testCDXServer.capturedArgs.get(0);
456 		//CDXQuery query = (CDXQuery)args[0];
457 		AuthToken authToken = (AuthToken)args[1];
458 		assertTrue(authToken.isIgnoreRobots());
459 	}
460 	
461 	/**
462 	 * test of timestamp-collapsing.
463 	 * <p>Actual processing happens in {@link CDXServer}. {@link EmbeddedCDXServerIndex}
464 	 * simply passes {@link WaybackRequest#getCollapseTime()} to {@link CDXQuery#setCollapse(String[])}.
465 	 * if {@code collapseTime} is unspecified in {@code WaybackRequest} (-1), default value
466 	 * {@code timestampDedupLength} will be used.
467 	 * @throws Exception
468 	 */
469 	public void testCollapseTime() throws Exception {
470 		WaybackRequest wbr = WaybackRequest.createCaptureQueryRequet(
471 			"http://example.com/", null, null, null);
472 		setCdxLines(CDXLINE1);
473 
474 		{
475 			cut.setTimestampDedupLength(10);
476 			@SuppressWarnings("unused")
477 			SearchResults sr = cut.query(wbr);
478 
479 			Object[] args = testCDXServer.capturedArgs.get(0);
480 			assertEquals(10, ((CDXQuery)args[0]).getCollapseTime());
481 		}
482 		testCDXServer.clearCapturedArgs();
483 		{
484 			wbr.setCollapseTime(8);
485 			@SuppressWarnings("unused")
486 			SearchResults sr = cut.query(wbr);
487 
488 			Object[] args = testCDXServer.capturedArgs.get(0);
489 			assertEquals(8, ((CDXQuery)args[0]).getCollapseTime());
490 		}
491 	}
492 
493 	/**
494 	 * {@link EmbeddedCDXServerIndex#handleRequest(HttpServletRequest, HttpServletResponse)} is
495 	 * a entry point for CDXServer API. It should return all accessible cdx lines, without applying
496 	 * any additional filters not requested by API user.
497 	 * @throws Exception
498 	 */
499 	public void testHandleRequest() throws Exception {
500 		HttpServletRequest request = EasyMock.createNiceMock(HttpServletRequest.class);
501 		EasyMock.expect(request.getParameter("url")).andStubReturn("http://example.com/");
502 		
503 		HttpServletResponse response = EasyMock.createNiceMock(HttpServletResponse.class);
504 		StringWriter sw = new StringWriter();
505 		EasyMock.expect(response.getWriter()).andReturn(new PrintWriter(sw));
506 		
507 		FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES;
508 		testCDXServer.cdxLines = new CDXLine[] {
509 				new CDXLine(CDXLINE1, fmt)
510 		};
511 
512 		EasyMock.replay(request, response);
513 		cut.handleRequest(request, response);
514 		
515 		assertEquals(1, testCDXServer.capturedArgs.size());
516 		Object[] args = testCDXServer.capturedArgs.get(0);
517 		
518 		CDXQuery query = (CDXQuery)args[0];
519 		assertEquals("API query should not have filter by default", 0, query.getFilter().length);
520 		
521 		assertEquals(CDXLINE1+"\n", sw.toString());
522 	}
523 
524 	/**
525 	 * {@link EmbeddedCDXServerIndex#renderMementoTimemap(WaybackRequest, HttpServletRequest, HttpServletResponse)}
526 	 * is a CDXServer API entry point for Memento format output.
527 	 * @throws Exception
528 	 */
529 	public void testRenderMementoTimemap() throws Exception {
530 		HttpServletRequest request = EasyMock.createNiceMock(HttpServletRequest.class);
531 		// Used in MementoLinkWriter
532 		EasyMock.expect(request.getRequestURL()).andAnswer(new IAnswer<StringBuffer>() {
533 			@Override
534 			public StringBuffer answer() throws Throwable {
535 				return new StringBuffer("/timemap/memento/http://example.com/");
536 			}
537 		});
538 		HttpServletResponse response = EasyMock.createNiceMock(HttpServletResponse.class);
539 		StringWriter sw = new StringWriter();
540 		EasyMock.expect(response.getWriter()).andReturn(new PrintWriter(sw));
541 		
542 		// needs: 
543 		//   getMementoTimemapFormat() - passed to CDXQuery.output
544 		//   getRequestUrl() - passed to CDXQuery
545 		//   get(MementoConstants.PAGE_STARTS) (optional, passed to CDXQuery.from
546 		//   getAccessPoint() - if getMementoTimemapFormat() == MementoConstants.FORMAT_LINK,
547 		//     CDX is looked up by calling AccessPoint#queryIndex(WaybackRequest)
548 		WaybackRequest wbr = new WaybackRequest();
549 		wbr.setRequestUrl("http://example.com/");
550 		wbr.setMementoTimemapFormat("memento");
551 		
552 		FieldSplitFormat fmt = CDXFieldConstants.CDX_ALL_NAMES;
553 		testCDXServer.cdxLines = new CDXLine[] {
554 				new CDXLine(CDXLINE1, fmt)
555 		};
556 		
557 		EasyMock.replay(request, response);
558 		boolean r = cut.renderMementoTimemap(wbr, request, response);
559 		
560 		assertTrue("renderMementoTimemap returns true", r);
561 
562 		assertEquals(1, testCDXServer.capturedArgs.size());
563 		Object[] args = testCDXServer.capturedArgs.get(0);
564 		
565 		CDXQuery query = (CDXQuery)args[0];
566 		assertEquals("API query should not have filter by default", 0, query.getFilter().length);
567 		
568 		// Here we only check if output *looks like* Memento format. Detailed tests
569 		// shall be done by test case for MementoLinkWriter.
570 		//System.out.println("response=" + sw.toString());
571 		assertTrue(sw.toString().startsWith("<http://example.com/>;"));
572 	}
573 
574 	// WaybackAuthChecker wants RedisRobotExclusionFilterFactory for
575 	// robotsExclusions. BAD, BAD, BAD!
576 	public static class ExcludeAllFilterFactory extends RedisRobotExclusionFilterFactory {
577 		@Override
578 		public ExclusionFilter get() {
579 			return new ExclusionFilter() {
580 				@Override
581 				public int filterObject(CaptureSearchResult o) {
582 					return ObjectFilter.FILTER_EXCLUDE;
583 				}
584 			};
585 		}
586 	}
587 	// XXX CDXServer demands ZipNumCluster even though it doesn't
588 	// call methods specific to it. BAD.
589 	public static class StubZipNumCluster extends ZipNumCluster {
590 		List<String> cdxlines;
591 		public StubZipNumCluster(String... cdxlines) {
592 			this.cdxlines = Arrays.asList(cdxlines);
593 		}
594 		// method called by EmbeddedCDXServer.query(WaybackRequest) for
595 		// non-paged queries.
596 		@Override
597 		public CloseableIterator<String> getCDXIterator(String key,
598 				String start, String end, ZipNumParams params)
599 				throws IOException {
600 			return new WrappedCloseableIterator<String>(cdxlines.iterator());
601 		}
602 	}
603 	/**
604 	 * robots.txt exclusion shall be disable for embeds.
605 	 * <p>TODO: This is actually testing classes in {@code wayback-cdx-server}
606 	 * module. Implemented here because it takes more work to do this
607 	 * in wayback-cdx-server module, and it makes little sense to do it before
608 	 * planned refactoring.</p>
609 	 * <p>Ref: WWM-119. A bug in {@link PrivTokenAuthChecker}.</p>
610 	 * @throws Exception
611 	 */
612 	public void testIgnoreRobotsForEmbeds() throws Exception {
613 		CDXServer cdxServer = new CDXServer();
614 		ZipNumCluster cdxSource = new StubZipNumCluster(
615 			"com,example)/style.css 20101124000000 http://example.com/style.css text/css 200"
616 					+ " ABCDEFGHIJKLMNOPQRSTUVWXYZ012345 - - 2000 0 /a/a.warc.gz");
617 		cdxServer.setZipnumSource(cdxSource);
618 		// This is the class being tested here... so AuthChecker shall no be mocked.
619 		// We cannot use PrivTokenAuthCheck class for this test, because it has no
620 		// real support for robots.txt exclusion. This is the main reason why we
621 		// cannot have this test in wayback-cdx-server project.
622 		WaybackAuthChecker authChecker = new WaybackAuthChecker();
623 		authChecker.setRobotsExclusions(new ExcludeAllFilterFactory());
624 		cdxServer.setAuthChecker(authChecker);
625 		cdxServer.afterPropertiesSet();
626 		cut.setCdxServer(cdxServer);
627 
628 		{
629 			WaybackRequest wbRequest = WaybackRequest.createReplayRequest(
630 				"http://example.com/style.css", "20140101000000", null, null);
631 			wbRequest.setCSSContext(true); // i.e. "embed"
632 
633 			try {
634 				cut.query(wbRequest);
635 			} catch (RobotAccessControlException ex) {
636 				fail("robots.txt exclusion is not disabled for embeds");
637 			}
638 		}
639 		// additional tests to make sure robots.txt exclusion is implemented
640 		// right, not just broken. these would have better been in a separate
641 		// test method(s), but just for now... CDX server refactoring will
642 		// break these anyways.
643 		{
644 			WaybackRequest wbRequest = WaybackRequest.createReplayRequest(
645 				"http://example.com/style.css", "20140101000000", null, null);
646 			// not embed
647 			try {
648 				cut.query(wbRequest);
649 				fail("RobotAccessControlException was not thrown");
650 			} catch (RobotAccessControlException ex) {
651 				// expected.
652 			}
653 		}
654 
655 		// check robots.txt exclusion is working for CDX server API entry point
656 		{
657 			HttpServletRequest httpRequest = EasyMock.createNiceMock(HttpServletRequest.class);
658 			EasyMock.expect(httpRequest.getParameter("url")).andStubReturn("http://exmaple.com/style.css");
659 
660 			HttpServletResponse httpResponse = EasyMock.createMock(HttpServletResponse.class);
661 			// expect error response; 403 with error header containing "Robot"
662 			final StringWriter output = new StringWriter();
663 			EasyMock.expect(httpResponse.getWriter()).andReturn(new PrintWriter(output));
664 			httpResponse.setContentType(EasyMock.<String>notNull());
665 			EasyMock.expectLastCall().once();
666 			httpResponse.setStatus(403);
667 			EasyMock.expectLastCall().once();
668 			httpResponse.setHeader(EasyMock.eq(HttpCDXWriter.RUNTIME_ERROR_HEADER), EasyMock.matches("(?i).*Robot.*"));
669 
670 			EasyMock.replay(httpRequest, httpResponse);
671 
672 			cut.handleRequest(httpRequest, httpResponse);
673 
674 			EasyMock.verify(httpResponse);
675 		}
676 
677 		// check if robots.txt exclusion can be disabled by cookie.
678 		{
679 			final String IGNORE_ROBOTS_TOKEN = "DISABLE-ROBOTS-EXCLUSION";
680 			authChecker.setIgnoreRobotsAccessTokens(Collections.singletonList(IGNORE_ROBOTS_TOKEN));
681 
682 			HttpServletRequest httpRequest = EasyMock.createNiceMock(HttpServletRequest.class);
683 			EasyMock.expect(httpRequest.getParameter("url")).andStubReturn("http://exmaple.com/style.css");
684 			EasyMock.expect(httpRequest.getCookies()).andStubReturn(
685 				new Cookie[] { new Cookie(cdxServer.getCookieAuthToken(),
686 					IGNORE_ROBOTS_TOKEN) });
687 
688 			HttpServletResponse httpResponse = EasyMock.createMock(HttpServletResponse.class);
689 			// expect 200 response = robots exclusion is disabled.
690 			final StringWriter output = new StringWriter();
691 			EasyMock.expect(httpResponse.getWriter()).andReturn(new PrintWriter(output));
692 			httpResponse.setContentType(EasyMock.<String>notNull());
693 			EasyMock.expectLastCall().once();
694 			//httpResponse.setStatus(200); // this is not explicitly called
695 			//EasyMock.expectLastCall().once();
696 
697 			EasyMock.replay(httpRequest, httpResponse);
698 
699 			cut.handleRequest(httpRequest, httpResponse);
700 			// if it's not working, EasyMock will report unexpected call to httpResponse.setStatus(403).
701 
702 			EasyMock.verify(httpResponse);
703 
704 			System.out.println(output.toString());
705 		}
706 	}
707 }