Multi-key Python dictionary

September 17th, 2011 by erb

There are several (easily found via Google) Python dictionary variations available, that allow you to map multiple values for a given key.  Essentially allowing a list of values for the key to be stored.

I was looking for the converse: I wanted to (efficiently) hash multiple keys to a single, common value.

Here’s code to do this (after the cut), that provides a (relatively) complete equivalent to the built-in dict methods: (minimally tested!)

>>> d = MultiDict({'a':2, 'b':3})
>>> d
{'a': 2, 'b': 3}
>>> d['c'] = 4 ; d
{'a': 2, 'b': 3, 'c': 4}
>>> d.setdefault(['x','a'])
2
>>> d
{['a', 'x']: 2, 'b': 3, 'c': 4}
>>> d['a']
2
>>> d['x'] = 10 ; d
{['a', 'x']: 10, 'b': 3, 'c': 4}
>>> d = MultiDict([['a', 10], [['b','c','d'], 20], ['e',30]])
>>> d
{'a': 10, ['c', 'b', 'd']: 20, 'e': 30}
>>> d['b']
20
>>> d['d'] = 50 ; d['c']
50
>>> d.keys()
['a', 'c', 'b', 'e', 'd']
>>> d.values()
[10, 50, 50, 30, 50]
>>> d.items()
[('a', 10), ('c', 50), ('b', 50), ('e', 30), ('d', 50)]
>>> d.__sizeof__()
248
>>> 'b' in d
True
>>> del d['b']
>>> 'b' in d
False
>>> d
{'a': 10, ['c', 'd']: 50, 'e': 30}
>>> d.pop('c')
50
>>> d.pop('d')
50
>>> d
{'a': 10, 'e': 30}
>>> d.__sizeof__()
248
>>> d.setdefault(['a','b'], 60)
60
>>> d
{['a', 'b']: 60, 'e': 30}
>>>

Code:

class MultiDict:
    """Dictionary that allows multiple keys to reference common values.
    """
    def __init__(self, init_dict={}):
        self.clear()
        if isinstance(init_dict, dict):
            for key, value in init_dict.iteritems():
                self.__setitem__(key, value)
        elif isinstance(init_dict, list):
            for key, value in init_dict:
                self.__setitem__(key, value)

    def _get_index(self):
        # Note we never use index == 0 (indistinguishable from None)
        self._index += 1
        return self._index

    def _get_hash(self, key):
        if isinstance(key, list):
            count = self._get_key_count(key)
            if count == 0:
                raise KeyError(key)
            elif count == 1:
                return self._hash.get(key[0])
            else:
                raise KeyError("multiple keys already exist")
        else:
            return self._hash.get(key)

    def _set_hash(self, key, hash=None):
        self._hash.update({key: hash or self._get_index()})
        return self._get_hash(key)

    def _get_hash_count(self, hash):
        return len(filter(lambda k: self._hash[k]==hash, self._hash))

    def _get_key_count(self, key):
        if isinstance(key, list):
            return len(set(filter(None, map(self._get_hash, key))))
        else:
            return self.__contains__(key) and 1 or 0

    def __contains__(self, key):
        return self._hash.__contains__(key)

    def __delitem__(self, key):
        hash = self._get_hash(key)
        if not hash:
            raise KeyError(key)
        if 1 == self._get_hash_count(hash):
            del self._dict[hash]
        del self._hash[key]

    def __getitem__(self, key):
        hash = self._get_hash(key)
        if not hash:
            raise KeyError(key)
        return self._dict.get(hash)

    def __iter__(self):
        return self._hash.__iter__()

    def __len__(self):
        return len(self._hash)

    def __repr__(self):
        hashkeys = [(hash, key) for key, hash in self._hash.iteritems()]
        if not hashkeys:
            return "{}"
        hashes = sorted(list(set(zip(*sorted(hashkeys))[0])))
        repr = []
        for hash in hashes:
            value = self._dict.get(hash).__repr__()
            keys = [x[1] for x in filter(lambda x:x[0]==hash, hashkeys)]
            if len(keys) == 1:
                key = keys[0].__repr__()
            else:
                key = "[%s]" % ', '.join([k.__repr__() for k in keys])
            repr.append('%s: %s' % (key, value))
        return "{%s}" % ', '.join(repr)

    def __setitem__(self, key, value=None):
        if isinstance(key, list):
            count = self._get_key_count(key)
            if count == 0:
                hash = self._get_index()
            elif count == 1:
                hashes = filter(None, map(self._get_hash, key))
                hash = hashes[0]
            else:
                raise KeyError("multiple keys already exist")
            for k in key:
                self._set_hash(k, hash)
            if value is not None or count == 0:
                self._dict.update({hash: value})
        else:
            hash = self._get_hash(key) or self._set_hash(key)
            self._dict.update({hash: value})

    def __sizeof__(self):
        return self._hash.__sizeof__() + self._dict.__sizeof__()

    def clear(self):
        self._hash = {}
        self._dict = {}
        self._index = 0

    def copy(self):
        c = MultiDict()
        for key, value in self.iteritems():
            c.__setitem__(key, value)
        return c

    def get(self, key, default=None):
        hash = self._get_hash(key)
        if not hash:
            return default
        return self._dict.get(hash)

    def has_key(self, key):
        return self._hash.has_key(key)

    def items(self):
        return [(key, value) for key, value in self.iteritems()]

    def iteritems(self):
        # Not really an iterator!
        return [(key, self.get(key)) for key in self.iterkeys()]

    def iterkeys(self):
        return self._hash.iterkeys()

    def itervalues(self):
        # Not really an iterator!
        return [self.get(key) for key in self.iterkeys()]

    def keys(self):
        return self._hash.keys()

    def pop(self, key, default="defaultpop"):
        hash = self._get_hash(key)
        if not hash:
            if default != "defaultpop":
                return default
            raise KeyError(key)
        value = self.get(key)
        self.__delitem__(key)
        return value

    def popitem(self):
        # self._hash.popitem() raises KeyError if empty, which is
        # exactly what we want our popitem to do - no try/except
        key, hash = self._hash.popitem()
        value = self.get(hash)
        return (key, value)

    def setdefault(self, key, default=None):
        if isinstance(key, list):
            hash = None
        else:
            hash = self._get_hash(key)
        if not hash:
            self.__setitem__(key, default)
        return self.get(key)

    # Not strictly following dict spec: D.update(E, **F)
    def update(self, other):
        # Only adding support for type(other)==dict, for now
        for key, value in other.iteritems():
            self.__setitem__(key, value)

    def values(self):
        # We do return duplicate values, to maintain compatibility
        # with normal dicts; see Python docs, "items()", regarding
        # ordering; namely, pairs = zip(d.values(), d.keys()) is
        # the same as pairs = zip(d.itervalues(), d.iterkeys()).
        # This is only possible if duplicate values are returned.
        return [value for key, value in self.iteritems()]

TrackBack URI

Leave a Reply

You must be logged in to post a comment.