Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jayvdb committed Mar 8, 2020
1 parent f34937e commit 83a3df0
Show file tree
Hide file tree
Showing 20 changed files with 1,507 additions and 1 deletion.
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# Test generated files

dns.pickle
dns-lru.pickle
jsonpickle.stash
pickle.stash
dns.sqlite
disk-cache-dir
diskdict-cache-dir

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
82 changes: 81 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,81 @@
# dns-cache
# dns-cache

`dns-cache` is a Python client side DNS caching framework utilising
[`dnspython`](https://github.com/rthalley/dnspython) v1.15+ for DNS
and supports various local key stores, and provides caching of lookup failures,
and configurable expiration of cached responses.

Some reasons to use a client side cache include:
- processing data containing many repeated invalid domains,
- running a local DNS caching service is not practical or appropriate,
- adding reporting of DNS activity performed within a job.

## Installation

The recommended way to install `dns-cache` is by using pip as follows:

`pip install dns-cache`

## Getting started

To quickly benefit from client side dns-caching in your existing application, install the system resolver.

```python
import dns_cache
import requests

dns_cache.override_system_resolver()

for i in range(10):
requests.get('http://www.coala.io/')
```

If you have a fast dns proxy, 10 requests will possibly show no performance improvement.
Even 100 may not perform better in this contrived example.

However when many parts of a system are performing lookups on the same DNS records, or where
sessions are being closed and new ones created and need to access the same DNS records,
the difference becomes more noticable, especially in jobs which takes hours to run.

For long running jobs, use the `min_ttl` argument to increase the default if 5 minutes isnt sufficient.
It can be set to `dns_cache.NO_EXPIRY` for a ttl of one week, which is not recommended except when
accompanied with custom cache expiration logic.

## Key stores

Multiple key stores are supported, and their dependencies need to added separately as required.

1. `pickle` and [`pickle4`](https://github.com/moreati/pickle4) backport: `dns_cache.pickle.PickableCache`
2. [`diskcache`](https://github.com/grantjenks/python-diskcache): `dns_cache.diskcache.DiskCache`
3. [`stash.py`](https://github.com/fuzeman/stash.py/): `dns_cache.stash.StashCache`
4. [`sqlitedict`](https://github.com/RaRe-Technologies/sqlitedict): `dns_cache.sqlitedict.SqliteDictCache`
5. [`disk_dict`](https://github.com/AWNystrom/DiskDict): `dns_cache.disk_dict.DiskDictCache` (Python 2.7 only)

`stash.py` support uses `pickle` or `jsonpickle` on Python 3, however only `jsonpickle` works on Python 2.7.

## Caching additions

The following classes can be used separately or together.

1. `dns_cache.resolver.AggressiveCachingResolver`: indexes all qnames in the response, increasing the number of keys,
but reducing the number of requests and cached responses when several related records are requested, such as a HTTP redirect
from www.foo.com to foo.com (or vis versa) where one is a CNAME point to the other.
2. `dns_cache.resolver.ExceptionCachingResolver`: caches lookup failures.

**Note:** `dns_cache.override_system_resolver()` can be used to install a custom `resolver` or `cache`, which may
be derived from the above classes or your own implementation from scratch.

## TODO

1. Support [`python-benedict`](https://github.com/fabiocaccamo/python-benedict)
2. Use [`dnsbin`](https://github.com/ettic-team/dnsbin) for testing
3. Add redis, memcached and cloud caching backends

## Similar projects

Python:
1. [`velocity`](https://github.com/s0md3v/velocity) is a lighter weight approach, with a [`serious bug`](https://github.com/s0md3v/velocity/issues/2)
2. [`dnsplug`](https://github.com/nresare/dnsplug), unfortunately not available on PyPI.

Go:
1. [`dnscache`](https://github.com/rs/dnscache)
69 changes: 69 additions & 0 deletions dns_cache/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import atexit
import os
import os.path
import sys

from dns.resolver import override_system_resolver as upstream_override_system_resolver

from .expiration import _NO_EXPIRY as NO_EXPIRY
from .expiration import FIVE_MINS, MinExpirationCache, NoExpirationCache
from .pickle import PickableCache
from .resolver import AggressiveCachingResolver, ExceptionCachingResolver

__version__ = "0.1.0"


class Resolver(AggressiveCachingResolver, ExceptionCachingResolver):
pass


class MinExpirationPickableCache(MinExpirationCache, PickableCache):
pass


class NoExpirationPickableCache(NoExpirationCache, PickableCache):
pass


def override_system_resolver(
resolver=None, cache=None, directory=None, min_ttl=FIVE_MINS
): # pragma: no cover
if not cache:
if directory:
try:
os.makedirs(directory, exist_ok=True)
except TypeError:
try:
os.makedirs(directory)
except OSError:
pass

filename = os.path.join(directory, "dns.pickle")
if min_ttl == NO_EXPIRY:
cache = MinExpirationPickableCache(filename=filename, min_ttl=min_ttl)
else:
cache = MinExpirationPickableCache(filename=filename, min_ttl=min_ttl)
else:
if min_ttl == NO_EXPIRY:
cache = NoExpirationCache(min_ttl=min_ttl)
else:
cache = MinExpirationCache(min_ttl=min_ttl)

if not resolver:
resolver = Resolver(configure=False)
try:
if sys.platform == "win32":
resolver.read_registry()
else:
resolver.read_resolv_conf("/etc/resolv.conf")
except Exception:
resolver.nameservers = ["8.8.8.8"]

resolver.cache = cache

upstream_override_system_resolver(resolver)

if hasattr(cache, "__del__"):
atexit.register(cache.__del__)

return resolver
38 changes: 38 additions & 0 deletions dns_cache/disk_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from __future__ import absolute_import

import jsonpickle

from dns.resolver import Cache

from .key_transform import StringKeyDictBase

from disk_dict import DiskDict


class DiskDict(StringKeyDictBase, DiskDict): # pragma: no cover
def __len__(self):
return len(list(self.keys()))
try:
return len(self.keys())
except ValueError:
return 0


class DiskDictCacheBase(object): # pragma: no cover
def __init__(
self,
directory,
serializer=jsonpickle.dumps,
deserializer=jsonpickle.loads,
*args,
**kwargs
): # pragma: no cover
super(DiskDictCacheBase, self).__init__(*args, **kwargs)
self.data = DiskDict(
location=directory, serializer=serializer, deserializer=deserializer
)


class DiskDictCache(DiskDictCacheBase, Cache): # pragma: no cover
def __init__(self, *args, **kwargs):
super(DiskDictCache, self).__init__(*args, **kwargs)
21 changes: 21 additions & 0 deletions dns_cache/diskcache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from __future__ import absolute_import

import diskcache as dc

from dns.resolver import Cache, LRUCache


class DiskCacheBase(object):
def __init__(self, directory, *args, **kwargs):
super(DiskCacheBase, self).__init__(*args, **kwargs)
self.data = dc.Cache(directory)


class DiskCache(DiskCacheBase, Cache):
def __init__(self, *args, **kwargs):
super(DiskCache, self).__init__(*args, **kwargs)


class DiskLRUCache(DiskCacheBase, LRUCache):
def __init__(self, *args, **kwargs):
super(DiskLRUCache, self).__init__(*args, **kwargs)
59 changes: 59 additions & 0 deletions dns_cache/expiration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import time

from dns.resolver import Cache, LRUCache

FIVE_MINS = 60 * 5
TEN_MINS = 60 * 10
SECONDS_PER_DAY = 60 * 60 * 24
SECONDS_PER_WEEK = SECONDS_PER_DAY * 7

_NO_EXPIRY = SECONDS_PER_WEEK

MIN_TTL = FIVE_MINS


class MinExpirationCacheBase(object):
def __init__(self, min_ttl=None, *args, **kwargs):
if not min_ttl:
min_ttl = MIN_TTL
super(MinExpirationCacheBase, self).__init__(*args, **kwargs)
self.min_ttl = min_ttl

def put(self, key, value):
now = time.time()
min_expiration = now + self.min_ttl
if min_expiration > value.expiration:
value.expiration = min_expiration
super(MinExpirationCacheBase, self).put(key, value)


class NoExpirationCacheBase(MinExpirationCacheBase):
def __init__(self, min_ttl=_NO_EXPIRY):
super(NoExpirationCacheBase, self).__init__(min_ttl)

def _maybe_clean(self):
"""Avoid the _maybe_clean phase of dns.resolver.Cache."""
pass


class MinExpirationCache(MinExpirationCacheBase, Cache):
def __init__(self, cleaning_interval=None, min_ttl=None, *args, **kwargs):
if not min_ttl:
min_ttl = MIN_TTL
if not cleaning_interval:
cleaning_interval = max(MIN_TTL, min_ttl)
super(MinExpirationCache, self).__init__(
cleaning_interval=cleaning_interval, min_ttl=min_ttl, *args, **kwargs
)


class NoExpirationCache(NoExpirationCacheBase, Cache):
pass


class MinExpirationLRUCache(MinExpirationCacheBase, LRUCache):
pass


class NoExpirationLRUCache(NoExpirationCacheBase, LRUCache):
pass
62 changes: 62 additions & 0 deletions dns_cache/key_transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from dns.name import from_text


def key_encode(key):
name, rdtype, rdclass = key
return "{}!{}!{}".format(name, rdtype, rdclass)


def key_decode(key):
name, rdtype, rdclass = key.rsplit("!", 2)
return (from_text(name, None), int(rdtype), int(rdclass))


class KeyTransformDictBase(object):
def __contains__(self, key):
if isinstance(key, tuple):
key = self.key_encode(key)
return super(KeyTransformDictBase, self).__contains__(key)

def __setitem__(self, key, value):
if isinstance(key, tuple):
key = self.key_encode(key)
super(KeyTransformDictBase, self).__setitem__(key, value)

def get(self, key, default=None):
if isinstance(key, tuple):
key = self.key_encode(key)
return super(KeyTransformDictBase, self).get(key, default)

def put(self, key, value):
if isinstance(key, tuple):
key = self.key_encode(key)
try:
return super(KeyTransformDictBase, self).put(key, value)
except AttributeError:
return super(KeyTransformDictBase, self).__setitem__(key, value)

def __getitem__(self, key):
if isinstance(key, tuple):
key = self.key_encode(key)
return super(KeyTransformDictBase, self).__getitem__(key)

def __delitem__(self, key):
if isinstance(key, tuple):
key = self.key_encode(key)
super(KeyTransformDictBase, self).__delitem__(key)

def keys(self):
return (key_decode(key) for key in super(KeyTransformDictBase, self).keys())

def items(self):
for key, value in super(KeyTransformDictBase, self).items():
yield key_decode(key), value


class StringKeyDictBase(KeyTransformDictBase):
key_encode = staticmethod(key_encode)
key_decode = staticmethod(key_decode)


class StringKeyDict(StringKeyDictBase, dict):
pass
Loading

0 comments on commit 83a3df0

Please sign in to comment.