Source code for pywb.warcserver.resource.blockrecordloader

from warcio.bufferedreaders import DecompressingBufferedReader
from warcio.recordloader import ArcWarcRecordLoader

from pywb.utils.loaders import BlockLoader
from pywb.utils.io import BUFF_SIZE


#=================================================================
[docs]class BlockArcWarcRecordLoader(ArcWarcRecordLoader): def __init__(self, loader=None, cookie_maker=None, block_size=BUFF_SIZE, *args, **kwargs): if not loader: loader = BlockLoader(cookie_maker=cookie_maker) self.loader = loader self.block_size = block_size super(BlockArcWarcRecordLoader, self).__init__(*args, **kwargs)
[docs] def load(self, url, offset, length, no_record_parse=False): """ Load a single record from given url at offset with length and parse as either warc or arc record """ try: length = int(length) except: length = -1 stream = self.loader.load(url, int(offset), length) decomp_type = 'gzip' # Create decompressing stream stream = DecompressingBufferedReader(stream=stream, decomp_type=decomp_type, block_size=self.block_size) return self.parse_record_stream(stream, no_record_parse=no_record_parse)