public class TestWARCRecordInfo
extends org.archive.io.warc.WARCRecordInfo
implements org.archive.format.warc.WARCConstants, org.archive.format.ArchiveFileConstants
TestWARCReader
Modifier and Type | Field and Description |
---|---|
static byte[] |
PAYLOAD_GIF |
static String |
REVISIT_WARC_PROFILE |
contentLength, contentStream, create14DigitDate, enforceLength, extraHeaders, mimetype, recordId, type, url, warcFilename, warcFileOffset
COLON_SPACE, COMPRESSED_WARC_FILE_EXTENSION, CONTENT_DESCRIPTION, CONTENT_LENGTH, CONTENT_TYPE, DEFAULT_ENCODING, DEFAULT_MAX_WARC_FILE_SIZE, DOT_COMPRESSED_WARC_FILE_EXTENSION, DOT_WARC_FILE_EXTENSION, FTP_CONTROL_CONVERSATION_MIMETYPE, HEADER_FIELD_SEPARATOR, HEADER_KEY_BLOCK_DIGEST, HEADER_KEY_CONCURRENT_TO, HEADER_KEY_DATE, HEADER_KEY_ETAG, HEADER_KEY_FILENAME, HEADER_KEY_ID, HEADER_KEY_IP, HEADER_KEY_LAST_MODIFIED, HEADER_KEY_PAYLOAD_DIGEST, HEADER_KEY_PROFILE, HEADER_KEY_REFERS_TO, HEADER_KEY_REFERS_TO_DATE, HEADER_KEY_REFERS_TO_FILE_OFFSET, HEADER_KEY_REFERS_TO_FILENAME, HEADER_KEY_REFERS_TO_TARGET_URI, HEADER_KEY_TRUNCATED, HEADER_KEY_TYPE, HEADER_KEY_URI, HEADER_LINE_ENCODING, HTTP_REQUEST_MIMETYPE, HTTP_RESPONSE_MIMETYPE, MAX_LINE_LENGTH, MAX_WARC_HEADER_LINE_LENGTH, NAMED_FIELD_CHECKSUM_LABEL, NAMED_FIELD_DESCRIPTION, NAMED_FIELD_FILEDESC, NAMED_FIELD_IP_LABEL, NAMED_FIELD_RELATED_LABEL, NAMED_FIELD_TRUNCATED, NAMED_FIELD_TRUNCATED_VALUE_HEAD, NAMED_FIELD_TRUNCATED_VALUE_LENGTH, NAMED_FIELD_TRUNCATED_VALUE_TIME, NAMED_FIELD_TRUNCATED_VALUE_UNSPECIFIED, NAMED_FIELD_WARCFILENAME, PLACEHOLDER_RECORD_LENGTH_STRING, PROFILE_REVISIT_IDENTICAL_DIGEST, PROFILE_REVISIT_NOT_MODIFIED, PROFILE_REVISIT_URI_AGNOSTIC_IDENTICAL_DIGEST, TRUNCATED_VALUE_UNSPECIFIED, TYPE, WARC_FIELDS_TYPE, WARC_FILE_EXTENSION, WARC_HEADER_ENCODING, WARC_ID, WARC_MAGIC, WARC_VERSION, WSP
ABSOLUTE_OFFSET_KEY, CDX, CDX_FILE, CDX_LINE_BUFFER_SIZE, CRLF, DATE_FIELD_KEY, DEFAULT_DIGEST_METHOD, DOT_COMPRESSED_FILE_EXTENSION, DUMP, GZIP_DUMP, HEADER, INVALID_SUFFIX, LENGTH_FIELD_KEY, MIMETYPE_FIELD_KEY, NOHEAD, OCCUPIED_SUFFIX, READER_IDENTIFIER_FIELD_KEY, RECORD_IDENTIFIER_FIELD_KEY, SINGLE_SPACE, TYPE_FIELD_KEY, URL_FIELD_KEY, VERSION_FIELD_KEY
Constructor and Description |
---|
TestWARCRecordInfo(byte[] content) |
Modifier and Type | Method and Description |
---|---|
static byte[] |
buildCompressedHttpResponseBlock(String ctype,
byte[] payloadBytes) |
static byte[] |
buildCompressedHttpResponseBlock(String ctype,
byte[] payloadBytes,
boolean chunked) |
static byte[] |
buildHttpRedirectResponseBlock(String location) |
static byte[] |
buildHttpRedirectResponseBlock(String statusline,
String location) |
static byte[] |
buildHttpResponseBlock(String payload) |
static byte[] |
buildHttpResponseBlock(String ctype,
byte[] payloadBytes)
short cut for generating "200 OK" HTTP response content-block.
|
static byte[] |
buildHttpResponseBlock(String status,
String ctype,
byte[] payloadBytes) |
static byte[] |
buildHttpResponseBlock(String status,
String ctype,
byte[] payloadBytes,
boolean chunked)
return content-block bytes for HTTP response.
|
static byte[] |
buildRevisitHttpResponseBlock(String ctype,
int len,
boolean withHeader,
boolean gzipContent)
generates WARC content for new revisit record.
|
static TestWARCRecordInfo |
createCompressedHttpResponse(String ctype,
byte[] payloadBytes) |
static TestWARCRecordInfo |
createHttpResponse(String payload)
return TestWARCRecordInfo for HTTP Response with entity
payload . |
static TestWARCRecordInfo |
createHttpResponse(String ctype,
byte[] payloadBytes)
return TestWARCRecordInfo for HTTP Response with entity
payload . |
static TestWARCRecordInfo |
createHttpResponse(String status,
String ctype,
byte[] payloadBytes)
return TestWARCRecordInfo for HTTP Response with response status line
status ,
entity payload of content-type ctype . |
static TestWARCRecordInfo |
createRevisitHttpResponse(String ctype,
int len) |
static TestWARCRecordInfo |
createRevisitHttpResponse(String ctype,
int len,
boolean withHeader) |
static TestWARCRecordInfo |
createRevisitHttpResponse(String ctype,
int len,
boolean withHeader,
boolean gzipContent) |
static TestWARCRecordInfo |
createUrlAgnosticRevisitHttpResponse(String ctype,
int len)
creates TestWARCRecordInfo with URL-Agnostic Revisit WARC record content.
|
static String |
dt14ToISOZ(String dt14)
translates DT14 (YYYYmmddHHMMSS) to ISOZ format used in WARC-Date header.
|
void |
setCreate14DigitDateFromDT14(String dt14)
utility method for updating create14DigitDate from DT14.
|
addExtraHeader, getContentLength, getContentStream, getCreate14DigitDate, getEnforceLength, getExtraHeaders, getMimetype, getRecordId, getType, getUrl, getWARCFilename, getWARCFileOffset, isEnforceLength, setContentLength, setContentStream, setCreate14DigitDate, setEnforceLength, setExtraHeaders, setMimetype, setRecordId, setType, setUrl, setWARCFilename, setWARCFileOffset
public static final String REVISIT_WARC_PROFILE
public static final byte[] PAYLOAD_GIF
public static String dt14ToISOZ(String dt14) throws IOException
dt14
- IOException
- dt14 is in bad format (wrapping ParseException to simply error handling)public void setCreate14DigitDateFromDT14(String dt14) throws IOException
dt14
- DT14 (YYYYmmddHHMMSS)IOException
- dt14 is in bad format.public static TestWARCRecordInfo createHttpResponse(String payload) throws IOException
payload
.
Content-Type is text/plain
, and payload
is encoded in UTF-8.payload
- IOException
public static TestWARCRecordInfo createHttpResponse(String ctype, byte[] payloadBytes) throws IOException
payload
.ctype
- Content-Type valuepayloadBytes
- payload bytesIOException
public static TestWARCRecordInfo createHttpResponse(String status, String ctype, byte[] payloadBytes) throws IOException
status
,
entity payload
of content-type ctype
.status
- status line, such as "200 OK"
ctype
- content-typepayloadBytes
- payload bytesIOException
public static TestWARCRecordInfo createCompressedHttpResponse(String ctype, byte[] payloadBytes) throws IOException
IOException
public static TestWARCRecordInfo createRevisitHttpResponse(String ctype, int len, boolean withHeader) throws IOException
IOException
public static TestWARCRecordInfo createRevisitHttpResponse(String ctype, int len, boolean withHeader, boolean gzipContent) throws IOException
IOException
public static TestWARCRecordInfo createRevisitHttpResponse(String ctype, int len) throws IOException
IOException
public static TestWARCRecordInfo createUrlAgnosticRevisitHttpResponse(String ctype, int len) throws IOException
WARC-Refers-To-Target-URI
= http://example.com/
WARC-Refers-To-Date
= 2014-01-01T10:10:10Z
ctype
- Content-Typelen
- Content-Length (arbitrary)IOException
public static byte[] buildHttpResponseBlock(String payload) throws IOException
IOException
public static byte[] buildHttpResponseBlock(String ctype, byte[] payloadBytes) throws IOException
ctype
- HTTP Content-Type, such as "text/plain"
, "image/gif"
payloadBytes
- payload bytesIOException
public static byte[] buildHttpResponseBlock(String status, String ctype, byte[] payloadBytes, boolean chunked) throws IOException
status
- HTTP status code and status text separated by a space. ex. "200 OK"
.ctype
- HTTP Content-TypepayloadBytes
- payload byteschunked
- if true, use chunked transfer-encodingIOException
public static byte[] buildHttpResponseBlock(String status, String ctype, byte[] payloadBytes) throws IOException
IOException
public static byte[] buildHttpRedirectResponseBlock(String location) throws IOException
IOException
public static byte[] buildHttpRedirectResponseBlock(String statusline, String location) throws IOException
IOException
public static byte[] buildCompressedHttpResponseBlock(String ctype, byte[] payloadBytes, boolean chunked) throws IOException
IOException
public static byte[] buildCompressedHttpResponseBlock(String ctype, byte[] payloadBytes) throws IOException
IOException
public static byte[] buildRevisitHttpResponseBlock(String ctype, int len, boolean withHeader, boolean gzipContent) throws IOException
ctype
- value for Content-Typelen
- value for Content-LengthwithHeader
- include HTTP status line and headers.
passing false generates old-style revisit content block.gzipContent
- if true, block will have "Content-Encoding: gzip" header.
(this shall match the compress-ness of previous capture).IOException
Copyright © 2005–2015 IIPC. All rights reserved.