"""Miscellaneous utilities for scenario evaluation."""

from __future__ import division

from itertools import product, izip
from hashlib import md5
import optparse
import os
import shelve
import shutil

def dictokey(dic):
    """Convert a dictionary to a key usable for a :class:`ResultDB`.

    >>> dictokey({'a': 1, 'b': 2, 'c': 4})
    'ddfce457873ac799ea0b80470dd6a8c6'

    Keys are independent of item order:

    >>> k1 = dictokey({'a': 1, 'c': 2, 'b': 3})
    >>> k1
    '497fd5957b5df58f5b4094feec7ff849'
    >>> k1 == dictokey({'a': 1, 'b': 3, 'c': 2})
    True

    `dic` may also be an already sorted list of key-value-tuples:

    >>> k1 == dictokey([('a', 1), ('b', 3), ('c', 2)])
    True

    Tuples don't work:

    >>> k1 == dictokey((('a', 1), ('b', 3), ('c', 2)))
    False

    Also, item lists must be sorted to yield the same key:

    >>> k1 == dictokey([('a', 1), ('c', 2), ('b', 3)])
    False

    """
    try:
        items = sorted(dic.iteritems())
    except AttributeError: # not a dict, assume an already sorted items list
        items = dic
    return md5(str(items)).hexdigest()

class ResultDB(object):
    """Evaluation results database.

    This is a thin wrapper around Python's *shelve* module. The main purpose is
    to efficiently access results without the need to load them all into memory.

    >>> rdb = ResultDB("/var/tmp/rdb", flag='n')
    >>> rdb.add(({'a': 0, 'b': 0}, "r1"))
    >>> rdb.add(({'a': 0, 'b': 1}, "r2"))
    >>> rdb.add(({'a': 1, 'b': 0}, "r3"))
    >>> sorted(rdb.filter({'a': [0]}))
    [({'a': 0, 'b': 0}, 'r1'), ({'a': 0, 'b': 1}, 'r2')]
    >>> rdb.close()

    """
    def __init__(self, fname, flag='r'):
        """The database is read from respectively written to `fname`. By
        default it is opened in *read-only* mode, i.e. `flag` is *r*.
        A flag of *w* opens a database in read-write mode while *n* always
        creates a new database.

        """
        self._flag = flag
        if self._flag == 'n' and os.path.exists(fname):
            os.remove(fname)
        self._db = shelve.open(fname, flag=self._flag, protocol=2)
        self._km = self._db.get('__keymap__', {})

    def all(self):
        """Returns an iterator over all results."""

        isresultkey = lambda k: not k.startswith("__") and not k.endswith("__")
        return (self._db[k] for k in self._db.iterkeys() if isresultkey(k))

    def filter(self, mask):
        """Returns an iterator over the results yielded by the meta information
        mask `mask` (a positive filter specifying allowed values for certain
        keys).

        Keys not known (according to the global keymap) are ignore, i.e. not
        used for filtering.

        """
        mask = dict(self._km, **mask) # complete mask
        mitems = sorted((k, tuple(vl)) for k, vl in mask.iteritems() if k in self._km)
        mkeys, mvlists = zip(*mitems)
        for mvalues in product(*mvlists):
            key = dictokey(zip(mkeys, mvalues))
            try:
                yield self._db[key]
            except KeyError:
                pass # not every possible meta must exist
        raise StopIteration

    def get(self, meta):
        """Returns the specific result identified by the meta information
        `meta`.

        """
        key = dictokey(meta)
        return self._db[key]

    def add(self, result):
        """Add a new result."""

        assert self._flag != 'r'
        meta = sorted(result[0].iteritems())
        assert not self._km or set(self._km.keys()) == set([k for k, _ in meta])
        for k, v in meta:
            self._km.setdefault(k, set()).add(v)
        key = dictokey(meta)
        self._db[key] = result

    def extend(self, results):
        """Add a list of results."""

        for result in results:
            self.add(result)

    def close(self):
        """Close the result database, writing changes back to disk."""

        if self._flag != 'r':
            self._db['__keymap__'] = self._km
        self._db.close()

    def xget(self, key):
        """Get some extra information, i.e. an item which is not a result."""
        return self._db["__%s" % key]

    def xadd(self, key, value):
        """Add some extra information, i.e. an item which is not a result."""
        self._db["__%s" % key] = value

def join(rdbl, fname):
    """Join two or more result databases.

    Source DBs are given by their filenames in `rdbl`. The new joined DB is
    saved using file `fname`.

    Results available in one DB but not in another are okay (i.e. both will be
    in the joined DB) but probably this is not useful in case of
    results from randomized order validations both probably used different
    randomized orders.

    If a result is available in both DBs and in case it refers to a randomized
    order validation, the corresponding performance lists are joined. The extra
    information is not joined, i.e. the one from the first DB listed in `rdbl`
    is used. In all other cases, i.e. results from original order validations,
    results are not joined (they should be identical at all) but the one from
    the first DB listed in `rdbl` is used.

    """
    shutil.copy(rdbl[0], fname)
    jdb = ResultDB(fname, flag='w')
    for rdb in rdbl[1:]:
        rdb = ResultDB(rdb)
        for meta, pfl, xinfo in rdb.all():
            try:
                jmeta, jpfl, jxinfo = jdb.get(meta)
            except KeyError:
                jdb.add((meta, pfl, xinfo))
                continue # a new result
            if meta['order'] != 'randomized':
                continue # do not duplicate results from original order
            # join performance lists from randomized orders
            for pf, jpf in izip(pfl, jpfl):
                jpf.extend(pf)
            jdb.add((jmeta, jpfl, jxinfo))
    jdb.close()


# =============================================================================
# command line interface
# =============================================================================

def options():

    op = optparse.OptionParser("%prog -s FILE [-s FILE ...] -d FILE")
    op.add_option("-s", "--source", action="append",
                  help="source database [+]", metavar="FILE")
    op.add_option("-d", "--destination",
                  help="destination for joined database", metavar="FILE")
    opts, args = op.parse_args()

    if args:
        op.error("invalid arguments")
    if not opts.source or len(opts.source) < 2:
        op.error("need at least 2 source database")
    if not opts.destination:
        op.error("need a destination file name")

    return opts

def main():

    opts = options()
    join(opts.source, opts.destination)

if __name__ == '__main__':
    main()

# =============================================================================
# tests
# =============================================================================

def __doctests_resultdb():
    """
    >>> rdb = ResultDB("/var/tmp/rdb", flag='n')
    >>> tuple(rdb.all())
    ()
    >>> rdb.add(({'a': 0, 'b': 0}, "r1"))
    >>> tuple(rdb.all())
    (({'a': 0, 'b': 0}, 'r1'),)
    >>> rdb.get({'a': 0, 'b': 0})
    ({'a': 0, 'b': 0}, 'r1')
    >>> rdb.add(({'a': 0, 'b': 1}, "r2"))
    >>> rdb.add(({'a': 1, 'b': 1}, "r3"))
    >>> tuple(rdb.filter({'b': [1]}))
    (({'a': 0, 'b': 1}, 'r2'), ({'a': 1, 'b': 1}, 'r3'))
    >>> rdb.add(({'a': 1}, "r4")) # doctest: +IGNORE_EXCEPTION_DETAIL
    Traceback (most recent call last):
    AssertionError
    >>> rdb.extend([({'a': 1, 'b': 0}, "r4"), ({'a': 2, 'b': 1}, "r5")])
    >>> sorted(x[1] for x in rdb.all())
    ['r1', 'r2', 'r3', 'r4', 'r5']

    Add some extra items and check if all() hides them:

    >>> rdb.xadd('foo', 'bar')
    >>> sorted(x[1] for x in rdb.all())
    ['r1', 'r2', 'r3', 'r4', 'r5']
    >>> rdb.xget('foo')
    'bar'
    >>> sorted(k for k in rdb._db.iterkeys() if k.startswith("__"))
    ['__foo']
    >>> rdb.close()

    >>> rdb = ResultDB("/var/tmp/rdb")
    >>> sorted(k for k in rdb._db.iterkeys() if k.startswith("__"))
    ['__foo', '__keymap__']
    >>> sorted(x[1] for x in rdb.all())
    ['r1', 'r2', 'r3', 'r4', 'r5']
    >>> tuple(rdb.filter({'b': [1], 'a': [0,5]}))
    (({'a': 0, 'b': 1}, 'r2'),)
    >>> rdb.add(({'a': 1, 'b': 0}, "rx")) # doctest: +IGNORE_EXCEPTION_DETAIL
    Traceback (most recent call last):
    AssertionError
    >>> rdb.close()

    >>> rdb = ResultDB("/var/tmp/rdb", flag='n')
    >>> tuple(rdb.all())
    ()
    >>> rdb.close()
    """

def __doctests_join():
    """
    >>> fn1, fn2, fn3, fn4 = ("/var/tmp/rdb%d" % i for i in (1,2,3,4))
    >>> rdb = ResultDB(fn1, flag='n')
    >>> rdb.add(({'a': 0, 'b': 0, 'order': "randomized"}, [[1], [10], [100]], "x"))
    >>> rdb.add(({'a': 0, 'b': 1, 'order': "randomized"}, [[2], [20], [200]], "x"))
    >>> rdb.close()
    >>> rdb = ResultDB(fn2, flag='n')
    >>> rdb.add(({'a': 0, 'b': 0, 'order': "randomized"}, [[1], [10], [100]], "x"))
    >>> rdb.add(({'a': 1, 'b': 1, 'order': "randomized"}, [[3], [30], [300]], "x"))
    >>> rdb.close()
    >>> rdb = ResultDB(fn3, flag='n')
    >>> rdb.add(({'a': 0, 'b': 0, 'order': "original"}, [[1], [10], [100]], "x"))
    >>> rdb.add(({'a': 1, 'b': 1, 'order': "randomized"}, [[3], [30], [300]], "x"))
    >>> rdb.close()

    >>> join([fn1, fn2, fn3], fn4)
    >>> rdb = ResultDB(fn4)
    >>> for result in sorted(rdb.all()):
    ...    print sorted(result[0].items()), " ".join(repr(x) for x in result[1:])
    [('a', 0), ('b', 0), ('order', 'original')] [[1], [10], [100]] 'x'
    [('a', 0), ('b', 0), ('order', 'randomized')] [[1, 1], [10, 10], [100, 100]] 'x'
    [('a', 0), ('b', 1), ('order', 'randomized')] [[2], [20], [200]] 'x'
    [('a', 1), ('b', 1), ('order', 'randomized')] [[3, 3], [30, 30], [300, 300]] 'x'
    """