Source code for kibitzr.fetcher.simple
import logging
import collections
from time import sleep
import requests
from cachecontrol import CacheControl
from kibitzr import __version__ as version
logger = logging.getLogger(__name__)
[docs]class SessionFetcher:
RETRIABLE_EXCEPTIONS = (
(requests.HTTPError, 5),
(requests.ConnectionError, 15),
(requests.Timeout, lambda retry: 60 * (retry + 1)),
)
# Explicitly listing exceptions from above to make pylint happy:
EXCEPTED = (
requests.HTTPError,
requests.ConnectionError,
requests.Timeout,
)
def __init__(self, conf):
self.conf = conf
self.session = CacheControl(requests.Session())
self.session.headers.update({
'User-agent': 'Kibitzr/' + version,
})
self.url = conf['url']
self.valid_http = set(conf.get('valid_http', [200]))
self.verify_cert = conf.get('verify-cert', conf.get('verify_cert', True))
[docs] def fetch(self):
retries = 3
for retry in range(retries):
try:
response = self.session.get(self.url, timeout=(3.05, 27), verify=self.verify_cert)
except self.EXCEPTED as exc:
if retry < retries - 1:
self.sleep_on_exception(exc, retry)
else:
raise
else:
ok = (response.status_code in self.valid_http)
text = response.text
return ok, text
[docs] def sleep_on_exception(self, exc, retry):
for klass, seconds in self.RETRIABLE_EXCEPTIONS:
if isinstance(exc, klass):
if isinstance(seconds, collections.Callable):
seconds = seconds(retry)
sleep(seconds)
break
[docs]def requests_fetcher(conf):
def fetcher():
return session_fetcher.fetch()
session_fetcher = SessionFetcher(conf)
return fetcher