"""Scenario analysis and plotting (implements the script
``diles-analyze-scenario``).

"""
from __future__ import division

from itertools import chain, combinations, izip, product, cycle, count
import optparse
import os
import re

from diles.scenario import load, PREPROCESSORS
from diles import stats
from diles.learn import Label
from diles.util import cachedproperty, readfile, writefile

OMCPRESERVING, OMCREVERSING, OMCIGNORING, OMCLOOSING = range(4)

def _doh(udiscs):
    """Degree if hierarchy of a set of disclosures.

    Expresses a degree of how much a HML learner is likely to perform better
    than a ML learner.

    The _doh is the number of cases where an item of one disclosure
    contains an item of another disclosure.

    Examples:

    >>> _doh((["a"], ["b"], ["c"]))
    0
    >>> _doh((["a", "a.1"], ["b"], ["c"]))
    0
    >>> _doh((["a"], ["b", "a.1"], ["c"]))
    1
    >>> _doh((["a.1", "a.2"], ["b", "a.2"], ["c"]))
    0
    >>> _doh((["a.1", "a.2"], ["b", "a.2"], ["a", "c"]))
    3
    >>> _doh((["a", "c.c"], ["b", "a.a"], ["c"]))
    2
    """
    tness = 0
    for d1, d2 in combinations(udiscs, 2):
        for i1, i2 in product(d1, d2):
            if i1 != i2 and (i1.startswith(i2 + ".") or i2.startswith(i1 + ".")):
                tness +=1
    return tness

class DisclosureStats(tuple):
    """Statistics about a list of disclosure situations."""

    def __new__(cls, obj):
        t = tuple.__new__(cls, obj)
        t._init() # pylint: disable-msg=W0212
        return t

    def __str__(self):
        isprop = lambda x: type(getattr(DisclosureStats, x, None)) == property
        getdoc = lambda x: getattr(self.__class__, x).__doc__.split("\n")[0][:-1].lower()
        props = (x for x in dir(self) if isprop(x))
        skey = lambda x: ("disc" in x, "group" in x, x)
        spvl = sorted(("%s: %s" % (getdoc(x), getattr(self, x)) for x in props),
                      key=skey)
        return "\n".join(spvl)

    def __repr__(self):
        return self.__str__()

    def _init(self):
        self._discs = tuple(frozenset(x['disclosure']) for x in self)
        self._udiscs = set(self._discs)

    @cachedproperty
    def ud(self):
        """Number of unique disclosures."""
        return len(self._udiscs)

    @cachedproperty
    def udi(self):
        """Number of unique disclosure items."""
        return len(set(chain(*self._udiscs)))

    @cachedproperty
    def pw(self):
        """Poset width of disclosures."""
        return stats.posetwidth(self._udiscs)

    @cachedproperty
    def duc(self):
        """Disclosure usage counts."""
        return sorted(self._discs.count(d) for d in self._udiscs)

    @cachedproperty
    def duo(self):
        """Number of disclosures used once."""
        return self.duc.count(1)

    @cachedproperty
    def dum(self):
        """Number of disclosures used multiple times."""
        return len([x for x in self.duc if x > 1])

    @cachedproperty
    def omc(self):
        """Order mapping counts (preserving, reversing, ignoring, loosing).

        Number of order mapping types in percent for disclosure situation pairs
        with unequal but comparable person groups and identical other context
        information, i.e. situations where only the person groups differs.

        Order mappings are defined as follows:

        - preserving: ``g1 > g2 and d1 > d2``
        - reversing: ``g1 > g2 and d1 < d2``
        - ignoring: ``g1 > g2 and d1 == d2``
        - loosing: ``g1 > g2 and d1 || d2``

        The order mapping counts in the returned tuple follow the index numbers
        given by :const:`OMCPRESERVING`, :const:`OMCREVERSING`,
        :const:`OMCIGNORING`, and :const:`OMCLOOSING`.

        """
        odp, odr, odi, odl = 0, 0, 0, 0
        for s1, s2 in combinations(self, 2):
            s1, s2 = s1.copy(), s2.copy()
            g1, g2 = frozenset(s1.pop('persons')), frozenset(s2.pop('persons'))
            d1, d2 = Label(s1.pop('disclosure')), Label(s2.pop('disclosure'))
            if s1 != s2:
                continue
            if g1 > g2:
                if d1 > d2: odp += 1
                elif d1 < d2: odr += 1
                elif d1 == d2: odi += 1
                else: odl += 1
            elif g1 < g2:
                if d1 < d2: odp += 1
                elif d1 > d2: odr += 1
                elif d1 == d2: odi += 1
                else: odl += 1
        counts = (odp, odr, odi, odl)
        n = sum(counts)
        return [x*100/n for x in counts]
        #return odp, odr, odi, odl

    @cachedproperty
    def doh(self):
        """Degree of hierarchy of disclosures."""
        return _doh(self._udiscs)

# =============================================================================
# plot
# =============================================================================

def plot(dstats, fname):

    from matplotlib import pyplot as plt
    from diles.scenario.plot import COLORS
    LEGENDFONT = dict(size='small')


    plots = []
    legends = []
    xlabels = []
    indices = count(0)
    plt.figure(1, figsize=(3.3,5))

    def plotbar(bname, items, styles, stack=True, legend=True):

        def texbarname(s):
            s = s.replace(" ", r"\\").replace("_", " ")
            return r"\begin{flushright}%s\end{flushright}" % s

        width = 0.8
        bottom = 0
        ind = indices.next()
        for (name, val), (hatch, color) in izip(items, styles):
            bars = plt.bar([ind], [val], bottom=bottom, width=width, linewidth=0.7,
                           color=COLORS[color], hatch=hatch,
                           edgecolor=COLORS[color, 0.5])
                            #hatch=hatch, width=width, color=COLORS[color],
                            #ecolor=COLORS[errcolor], edgecolor=COLORS[edgecolor])
            if stack:
                bottom += val
            else:
                width *= 0.75
            if legend:
                plots.append(bars[0])
                legends.append(name)
        xlabels.append(texbarname(bname))

    styles = product([None], ['visio-1', ('visio-2', 0.85), ('visio-2', 1.15), 'visio-4'])
    items = zip(['preserving', 'reversing', 'ignoring', 'loosing'],
                dstats.omc)
    plotbar(r"mapping types_in_\%", items, styles)
    legends = list(reversed(legends))
    plots = list(reversed(plots))

    styles = cycle([(None, ('visio-3', 1.2))])
    items = [('duc', x) for x in dstats.duc]
    plotbar("usage counts", items, styles, legend=False)

    styles = [(None, ('visio-5', 1.0))]
    items = [('ud', dstats.ud)]
    plotbar("unique disclosures", items, styles, stack=False, legend=False)

    styles = [(None, ('visio-5', 1.4))]
    items = [('pw', dstats.pw)]
    plotbar("poset width", items, styles, stack=False, legend=False)

    plt.yticks(range(0, plt.ylim()[1]+1,10))
    plt.xticks([i + 0.8/2 for i in range(len(xlabels))], xlabels, rotation=90)
    plt.title("Disclosure analysis")
    plt.legend(plots, legends, loc='upper right', prop=LEGENDFONT)

    plt.axes().yaxis.grid(color='lightgrey', alpha=0.5, zorder=-1, linestyle="solid")
    plt.axes().set_axisbelow(True)

    # save
    plt.savefig(fname + ".png", bbox_inches='tight')
    plt.savefig(fname + ".pdf", bbox_inches='tight')
    plt.close()

# =============================================================================
# info template
# =============================================================================

SCENARIOTEMPLATE = """
%(desc)s

Statistics
----------

-   number of subjects: `%(subjects)s`
-   situations per subject (min, mean, max): `%(sitspersubject)s`

Scenario File
-------------

    %(file)s
"""

# =============================================================================
# command line interface
# =============================================================================

def options():

    op = optparse.OptionParser("%prog -s FILE -d DIR")
    op.add_option("-s", "--scenario", metavar="FILE",
                  help="scenario file to analyze")
    op.add_option("-d", "--dest", metavar="DIR",
                  help="destination directory to put plot images into")
    opts, args = op.parse_args()
    if args:
        op.error("invalid arguments")
    if opts.scenario is None:
        op.error("need a scenario file to analyze")
    if opts.dest is None:
        op.error("need a destination for plot files")
    return opts

def main():

    opts = options()

    sdesc = readfile(re.sub(r'\.yaml$', '.desc', opts.scenario))
    scode = readfile(opts.scenario)
    scode = "\n    ".join(scode.split("\n"))

    nsitspersubject = {}
    _ppname, ppfuncs = PREPROCESSORS.iteritems().next()
    for subject, sits in load(opts.scenario).iteritems():
        nsitspersubject[subject] = len(sits)
        fname = "scenario-analysis.subject-%s" % subject
        fname = os.path.join(opts.dest, fname)
        print "- %s" % fname
        ppsits = sits
        for ppf in ppfuncs:
            ppsits = ppf(ppsits)
        plot(DisclosureStats(ppsits), fname)

    nl = nsitspersubject.values()
    nsits = min(nl), stats.mean(nl), max(nl)

    context = dict(desc=sdesc, file=scode, subjects=len(nsitspersubject),
                   sitspersubject=nsits)

    md = SCENARIOTEMPLATE % context

    writefile(os.path.join(opts.dest, "scenario-info.md"), md)

if __name__ == '__main__':
    main()

# =============================================================================
# tests
# =============================================================================

_TESTSCENARIO = """
- subject: S1
  trigger: T1
  purpose: P1
  modalities: [M1]
  labels: [L1]

- repeat: 2
  persons: [S2]
  disclosure:
    - M1:a.1
    - M1:c

- repeat: 1
  persons: [S3]
  labels: [L2]
  disclosure:
    - M1:a
    - M1:c
    - M1:d

- repeat: 1
  persons: [S2, S3]
  disclosure:
    - M1:c

- repeat: 1
  persons: [S2, S3, S4]
  disclosure:
    - M1:c
    - M1:d

- repeat: 1
  persons: [S2, S3, S5]
  disclosure:
    - M1:a.1
    - M1:d

"""

def __doctests():
    """
    >>> from diles.scenario import load
    >>> sitspersubject = load(_TESTSCENARIO)
    >>> s = DisclosureStats(sitspersubject['S1'])
    >>> s
    order mapping counts (preserving, reversing, ignoring, loosing): (1, 2, 0, 5)
    degree of hierarchy of disclosures: 2
    disclosure usage counts: [1, 1, 1, 1, 2]
    number of disclosures used multiple times: 1
    number of disclosures used once: 4
    number of unique disclosure items: 4
    number of unique disclosures: 5
    poset width of disclosures: 3
    """