Package org.archive.modules.writer

Examples of org.archive.modules.writer.WARCWriterProcessor.process()


            loader().process(curi1);
            assertTrue(curi1.hasContentDigestHistory());
            assertTrue(curi1.getContentDigestHistory().isEmpty());

            warcWriter.process(curi1);
            assertEquals(curi1.getUURI().toString(), curi1.getContentDigestHistory().get(A_ORIGINAL_URL));
            assertEquals(1, curi1.getContentDigestHistory().get(A_CONTENT_DIGEST_COUNT));
            String report = warcWriter.report();
            assertTrue(report.contains("Total CrawlURIs:   1\n"));
            assertTrue(report.contains("Revisit records:   0\n"));
View Full Code Here


            assertTrue(curi2.hasContentDigestHistory());
            assertEquals(curi1.getUURI().toString(), curi2.getContentDigestHistory().get(A_ORIGINAL_URL));
            assertNotSame(curi2.getUURI().toString(), curi2.getContentDigestHistory().get(A_ORIGINAL_URL));
            assertEquals(1, curi2.getContentDigestHistory().get(A_CONTENT_DIGEST_COUNT));

            warcWriter.process(curi2);
            assertTrue(curi2.getAnnotations().contains("duplicate:digest"));
            assertEquals(curi1.getUURI().toString(), curi2.getContentDigestHistory().get(A_ORIGINAL_URL));
            assertNotSame(curi2.getUURI().toString(), curi2.getContentDigestHistory().get(A_ORIGINAL_URL));
            assertEquals(2, curi2.getContentDigestHistory().get(A_CONTENT_DIGEST_COUNT));
            report = warcWriter.report();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.