Source code for pywb.manager.autoindex

import gevent
import time
import re
import os
import logging

from pywb.manager.manager import CollectionsManager


#=============================================================================
[docs]class AutoIndexer(object): EXT_RX = re.compile('.*\.w?arc(\.gz)?$') AUTO_INDEX_FILE = 'autoindex.cdxj' def __init__(self, colls_dir=None, interval=30, keep_running=True): self.manager = CollectionsManager('', colls_dir=colls_dir, must_exist=False) self.root_path = self.manager.colls_dir self.keep_running = keep_running self.interval = interval self.last_size = {}
[docs] def is_newer_than(self, path1, path2, track=False): try: mtime1 = os.path.getmtime(path1) mtime2 = os.path.getmtime(path2) newer = mtime1 > mtime2 except: newer = True if track: size = os.path.getsize(path1) try: if size != self.last_size[path1]: newer = True except: pass self.last_size[path1] = size return newer
[docs] def do_index(self, files): logging.info('Auto-Indexing... ' + str(files)) self.manager.index_merge(files, self.AUTO_INDEX_FILE) logging.info('...Done')
[docs] def check_path(self): for coll in os.listdir(self.root_path): coll_dir = os.path.join(self.root_path, coll) if not os.path.isdir(coll_dir): continue self.manager.change_collection(coll) archive_dir = self.manager.archive_dir if not os.path.isdir(archive_dir): continue index_file = os.path.join(self.manager.indexes_dir, self.AUTO_INDEX_FILE) if not os.path.isfile(index_file): try: os.makedirs(self.manager.indexes_dir) except Exception as e: pass logging.info('Checking Collection: ' + coll) to_index = [] for dirpath, dirnames, filenames in os.walk(archive_dir): for filename in filenames: if not self.EXT_RX.match(filename): continue full_filename = os.path.join(dirpath, filename) if self.is_newer_than(full_filename, index_file, True): to_index.append(full_filename) if to_index: self.do_index(to_index)
[docs] def run(self): try: # If running in uwsgi, run AutoIndexer only in first worker! import uwsgi if uwsgi.worker_id() != 1: return except: pass try: while self.keep_running: self.check_path() if not self.interval: break time.sleep(self.interval) except KeyboardInterrupt: # pragma: no cover return
[docs] def start(self): self.ge = gevent.spawn(self.run)
[docs] def stop(self): self.interval = 0 self.keep_running = False