from pywb.rewrite.cookie_rewriter import WbUrlBaseCookieRewriter, HostScopeCookieRewriter
from warcio.timeutils import datetime_to_http_date
from six.moves import zip
import redis
import tldextract
import time
import datetime
import six
# =============================================================================
[docs]class CookieTracker(object):
def __init__(self, redis, expire_time=120):
self.redis = redis
self.expire_time = expire_time
[docs] def get_rewriter(self, url_rewriter, cookie_key):
return DomainCacheCookieRewriter(url_rewriter, self, cookie_key)
[docs] def add_cookie(self, cookie_key, domain, name, value):
if domain[0] != '.':
domain = '.' + domain
with redis.utils.pipeline(self.redis) as pi:
pi.hset(cookie_key + domain, name, value)
pi.expire(cookie_key + domain, self.expire_time)
[docs] @staticmethod
def get_subdomains(url):
tld = tldextract.extract(url)
if not tld.subdomain:
return None
main = tld.domain + '.' + tld.suffix
full = tld.subdomain + '.' + main
def get_all_subdomains(main, full):
doms = []
while main != full:
full = full.split('.', 1)[1]
doms.append(full)
return doms
all_subs = get_all_subdomains(main, full)
return all_subs
# =============================================================================
[docs]class HostScopeNoFilterCookieRewriter(HostScopeCookieRewriter):
def _filter_morsel(self, morsel):
pass
# =============================================================================
[docs]class DomainCacheCookieRewriter(WbUrlBaseCookieRewriter):
def __init__(self, url_rewriter, cookie_tracker, cookie_key):
super(DomainCacheCookieRewriter, self).__init__(url_rewriter)
self.cookie_tracker = cookie_tracker
self.cookie_key = cookie_key
[docs] def rewrite_cookie(self, name, morsel):
# if domain set, no choice but to expand cookie path to root
domain = morsel.pop('domain', '')
if domain and self.cookie_key:
#if morsel.get('max-age'):
# morsel['max-age'] = int(morsel['max-age'])
#self.cookiejar.set_cookie(self.morsel_to_cookie(morsel))
#print(morsel, self.cookie_key + domain)
string = morsel.value
if morsel.get('path'):
string += '; Path=' + morsel.get('path')
if morsel.get('httponly'):
string += '; HttpOnly'
if morsel.get('secure'):
string += '; Secure'
self.cookie_tracker.add_cookie(self.cookie_key,
domain,
morsel.key,
string)
# else set cookie to rewritten path
if morsel.get('path'):
morsel['path'] = self.url_rewriter.rewrite(morsel['path'])
return morsel
[docs] def get_expire_sec(self, morsel):
expires = None
if morsel.get('max-age'):
return int(morsel['max-age'])
expires = morsel.get('expires')
if not expires:
return None
expires = expires.replace(' UTC', ' GMT')
try:
expires = time.strptime(expires, '%a, %d-%b-%Y %H:%M:%S GMT')
except:
pass
try:
expires = time.strptime(expires, '%a, %d %b %Y %H:%M:%S GMT')
except:
pass
expires = time.mktime(expires)
expires = expires - time.timezone - time.time()
return expires
# ============================================================================