public class TestWARCRecordInfo
extends org.archive.io.warc.WARCRecordInfo
implements org.archive.format.warc.WARCConstants, org.archive.format.ArchiveFileConstants
TestWARCReader| Modifier and Type | Field and Description |
|---|---|
static byte[] |
PAYLOAD_GIF |
static String |
REVISIT_WARC_PROFILE |
contentLength, contentStream, create14DigitDate, enforceLength, extraHeaders, mimetype, recordId, type, url, warcFilename, warcFileOffsetCOLON_SPACE, COMPRESSED_WARC_FILE_EXTENSION, CONTENT_DESCRIPTION, CONTENT_LENGTH, CONTENT_TYPE, DEFAULT_ENCODING, DEFAULT_MAX_WARC_FILE_SIZE, DOT_COMPRESSED_WARC_FILE_EXTENSION, DOT_WARC_FILE_EXTENSION, FTP_CONTROL_CONVERSATION_MIMETYPE, HEADER_FIELD_SEPARATOR, HEADER_KEY_BLOCK_DIGEST, HEADER_KEY_CONCURRENT_TO, HEADER_KEY_DATE, HEADER_KEY_ETAG, HEADER_KEY_FILENAME, HEADER_KEY_ID, HEADER_KEY_IP, HEADER_KEY_LAST_MODIFIED, HEADER_KEY_PAYLOAD_DIGEST, HEADER_KEY_PROFILE, HEADER_KEY_REFERS_TO, HEADER_KEY_REFERS_TO_DATE, HEADER_KEY_REFERS_TO_FILE_OFFSET, HEADER_KEY_REFERS_TO_FILENAME, HEADER_KEY_REFERS_TO_TARGET_URI, HEADER_KEY_TRUNCATED, HEADER_KEY_TYPE, HEADER_KEY_URI, HEADER_LINE_ENCODING, HTTP_REQUEST_MIMETYPE, HTTP_RESPONSE_MIMETYPE, MAX_LINE_LENGTH, MAX_WARC_HEADER_LINE_LENGTH, NAMED_FIELD_CHECKSUM_LABEL, NAMED_FIELD_DESCRIPTION, NAMED_FIELD_FILEDESC, NAMED_FIELD_IP_LABEL, NAMED_FIELD_RELATED_LABEL, NAMED_FIELD_TRUNCATED, NAMED_FIELD_TRUNCATED_VALUE_HEAD, NAMED_FIELD_TRUNCATED_VALUE_LENGTH, NAMED_FIELD_TRUNCATED_VALUE_TIME, NAMED_FIELD_TRUNCATED_VALUE_UNSPECIFIED, NAMED_FIELD_WARCFILENAME, PLACEHOLDER_RECORD_LENGTH_STRING, PROFILE_REVISIT_IDENTICAL_DIGEST, PROFILE_REVISIT_NOT_MODIFIED, PROFILE_REVISIT_URI_AGNOSTIC_IDENTICAL_DIGEST, TRUNCATED_VALUE_UNSPECIFIED, TYPE, WARC_FIELDS_TYPE, WARC_FILE_EXTENSION, WARC_HEADER_ENCODING, WARC_ID, WARC_MAGIC, WARC_VERSION, WSPABSOLUTE_OFFSET_KEY, CDX, CDX_FILE, CDX_LINE_BUFFER_SIZE, CRLF, DATE_FIELD_KEY, DEFAULT_DIGEST_METHOD, DOT_COMPRESSED_FILE_EXTENSION, DUMP, GZIP_DUMP, HEADER, INVALID_SUFFIX, LENGTH_FIELD_KEY, MIMETYPE_FIELD_KEY, NOHEAD, OCCUPIED_SUFFIX, READER_IDENTIFIER_FIELD_KEY, RECORD_IDENTIFIER_FIELD_KEY, SINGLE_SPACE, TYPE_FIELD_KEY, URL_FIELD_KEY, VERSION_FIELD_KEY| Constructor and Description |
|---|
TestWARCRecordInfo(byte[] content) |
| Modifier and Type | Method and Description |
|---|---|
static byte[] |
buildCompressedHttpResponseBlock(String ctype,
byte[] payloadBytes) |
static byte[] |
buildCompressedHttpResponseBlock(String ctype,
byte[] payloadBytes,
boolean chunked) |
static byte[] |
buildHttpRedirectResponseBlock(String location) |
static byte[] |
buildHttpRedirectResponseBlock(String statusline,
String location) |
static byte[] |
buildHttpResponseBlock(String payload) |
static byte[] |
buildHttpResponseBlock(String ctype,
byte[] payloadBytes)
short cut for generating "200 OK" HTTP response content-block.
|
static byte[] |
buildHttpResponseBlock(String status,
String ctype,
byte[] payloadBytes) |
static byte[] |
buildHttpResponseBlock(String status,
String ctype,
byte[] payloadBytes,
boolean chunked)
return content-block bytes for HTTP response.
|
static byte[] |
buildRevisitHttpResponseBlock(String ctype,
int len,
boolean withHeader,
boolean gzipContent)
generates WARC content for new revisit record.
|
static TestWARCRecordInfo |
createCompressedHttpResponse(String ctype,
byte[] payloadBytes) |
static TestWARCRecordInfo |
createHttpResponse(String payload)
return TestWARCRecordInfo for HTTP Response with entity
payload. |
static TestWARCRecordInfo |
createHttpResponse(String ctype,
byte[] payloadBytes)
return TestWARCRecordInfo for HTTP Response with entity
payload. |
static TestWARCRecordInfo |
createHttpResponse(String status,
String ctype,
byte[] payloadBytes)
return TestWARCRecordInfo for HTTP Response with response status line
status,
entity payload of content-type ctype. |
static TestWARCRecordInfo |
createRevisitHttpResponse(String ctype,
int len) |
static TestWARCRecordInfo |
createRevisitHttpResponse(String ctype,
int len,
boolean withHeader) |
static TestWARCRecordInfo |
createRevisitHttpResponse(String ctype,
int len,
boolean withHeader,
boolean gzipContent) |
static TestWARCRecordInfo |
createUrlAgnosticRevisitHttpResponse(String ctype,
int len)
creates TestWARCRecordInfo with URL-Agnostic Revisit WARC record content.
|
static String |
dt14ToISOZ(String dt14)
translates DT14 (YYYYmmddHHMMSS) to ISOZ format used in WARC-Date header.
|
void |
setCreate14DigitDateFromDT14(String dt14)
utility method for updating create14DigitDate from DT14.
|
addExtraHeader, getContentLength, getContentStream, getCreate14DigitDate, getEnforceLength, getExtraHeaders, getMimetype, getRecordId, getType, getUrl, getWARCFilename, getWARCFileOffset, isEnforceLength, setContentLength, setContentStream, setCreate14DigitDate, setEnforceLength, setExtraHeaders, setMimetype, setRecordId, setType, setUrl, setWARCFilename, setWARCFileOffsetpublic static final String REVISIT_WARC_PROFILE
public static final byte[] PAYLOAD_GIF
public static String dt14ToISOZ(String dt14) throws IOException
dt14 - IOException - dt14 is in bad format (wrapping ParseException to simply error handling)public void setCreate14DigitDateFromDT14(String dt14) throws IOException
dt14 - DT14 (YYYYmmddHHMMSS)IOException - dt14 is in bad format.public static TestWARCRecordInfo createHttpResponse(String payload) throws IOException
payload.
Content-Type is text/plain, and payload is encoded in UTF-8.payload - IOExceptionpublic static TestWARCRecordInfo createHttpResponse(String ctype, byte[] payloadBytes) throws IOException
payload.ctype - Content-Type valuepayloadBytes - payload bytesIOExceptionpublic static TestWARCRecordInfo createHttpResponse(String status, String ctype, byte[] payloadBytes) throws IOException
status,
entity payload of content-type ctype.status - status line, such as "200 OK"ctype - content-typepayloadBytes - payload bytesIOExceptionpublic static TestWARCRecordInfo createCompressedHttpResponse(String ctype, byte[] payloadBytes) throws IOException
IOExceptionpublic static TestWARCRecordInfo createRevisitHttpResponse(String ctype, int len, boolean withHeader) throws IOException
IOExceptionpublic static TestWARCRecordInfo createRevisitHttpResponse(String ctype, int len, boolean withHeader, boolean gzipContent) throws IOException
IOExceptionpublic static TestWARCRecordInfo createRevisitHttpResponse(String ctype, int len) throws IOException
IOExceptionpublic static TestWARCRecordInfo createUrlAgnosticRevisitHttpResponse(String ctype, int len) throws IOException
WARC-Refers-To-Target-URI = http://example.com/WARC-Refers-To-Date = 2014-01-01T10:10:10Zctype - Content-Typelen - Content-Length (arbitrary)IOExceptionpublic static byte[] buildHttpResponseBlock(String payload) throws IOException
IOExceptionpublic static byte[] buildHttpResponseBlock(String ctype, byte[] payloadBytes) throws IOException
ctype - HTTP Content-Type, such as "text/plain", "image/gif"payloadBytes - payload bytesIOExceptionpublic static byte[] buildHttpResponseBlock(String status, String ctype, byte[] payloadBytes, boolean chunked) throws IOException
status - HTTP status code and status text separated by a space. ex. "200 OK".ctype - HTTP Content-TypepayloadBytes - payload byteschunked - if true, use chunked transfer-encodingIOExceptionpublic static byte[] buildHttpResponseBlock(String status, String ctype, byte[] payloadBytes) throws IOException
IOExceptionpublic static byte[] buildHttpRedirectResponseBlock(String location) throws IOException
IOExceptionpublic static byte[] buildHttpRedirectResponseBlock(String statusline, String location) throws IOException
IOExceptionpublic static byte[] buildCompressedHttpResponseBlock(String ctype, byte[] payloadBytes, boolean chunked) throws IOException
IOExceptionpublic static byte[] buildCompressedHttpResponseBlock(String ctype, byte[] payloadBytes) throws IOException
IOExceptionpublic static byte[] buildRevisitHttpResponseBlock(String ctype, int len, boolean withHeader, boolean gzipContent) throws IOException
ctype - value for Content-Typelen - value for Content-LengthwithHeader - include HTTP status line and headers.
passing false generates old-style revisit content block.gzipContent - if true, block will have "Content-Encoding: gzip" header.
(this shall match the compress-ness of previous capture).IOExceptionCopyright © 2005–2015 IIPC. All rights reserved.