"""Scenario analysis and plotting (implements the script ``diles-analyze-scenario``). """ from __future__ import division from itertools import chain, combinations, izip, product, cycle, count import optparse import os import re from diles.scenario import load, PREPROCESSORS from diles import stats from diles.learn import Label from diles.util import cachedproperty, readfile, writefile OMCPRESERVING, OMCREVERSING, OMCIGNORING, OMCLOOSING = range(4) def _doh(udiscs): """Degree if hierarchy of a set of disclosures. Expresses a degree of how much a HML learner is likely to perform better than a ML learner. The _doh is the number of cases where an item of one disclosure contains an item of another disclosure. Examples: >>> _doh((["a"], ["b"], ["c"])) 0 >>> _doh((["a", "a.1"], ["b"], ["c"])) 0 >>> _doh((["a"], ["b", "a.1"], ["c"])) 1 >>> _doh((["a.1", "a.2"], ["b", "a.2"], ["c"])) 0 >>> _doh((["a.1", "a.2"], ["b", "a.2"], ["a", "c"])) 3 >>> _doh((["a", "c.c"], ["b", "a.a"], ["c"])) 2 """ tness = 0 for d1, d2 in combinations(udiscs, 2): for i1, i2 in product(d1, d2): if i1 != i2 and (i1.startswith(i2 + ".") or i2.startswith(i1 + ".")): tness +=1 return tness class DisclosureStats(tuple): """Statistics about a list of disclosure situations.""" def __new__(cls, obj): t = tuple.__new__(cls, obj) t._init() # pylint: disable-msg=W0212 return t def __str__(self): isprop = lambda x: type(getattr(DisclosureStats, x, None)) == property getdoc = lambda x: getattr(self.__class__, x).__doc__.split("\n")[0][:-1].lower() props = (x for x in dir(self) if isprop(x)) skey = lambda x: ("disc" in x, "group" in x, x) spvl = sorted(("%s: %s" % (getdoc(x), getattr(self, x)) for x in props), key=skey) return "\n".join(spvl) def __repr__(self): return self.__str__() def _init(self): self._discs = tuple(frozenset(x['disclosure']) for x in self) self._udiscs = set(self._discs) @cachedproperty def ud(self): """Number of unique disclosures.""" return len(self._udiscs) @cachedproperty def udi(self): """Number of unique disclosure items.""" return len(set(chain(*self._udiscs))) @cachedproperty def pw(self): """Poset width of disclosures.""" return stats.posetwidth(self._udiscs) @cachedproperty def duc(self): """Disclosure usage counts.""" return sorted(self._discs.count(d) for d in self._udiscs) @cachedproperty def duo(self): """Number of disclosures used once.""" return self.duc.count(1) @cachedproperty def dum(self): """Number of disclosures used multiple times.""" return len([x for x in self.duc if x > 1]) @cachedproperty def omc(self): """Order mapping counts (preserving, reversing, ignoring, loosing). Number of order mapping types in percent for disclosure situation pairs with unequal but comparable person groups and identical other context information, i.e. situations where only the person groups differs. Order mappings are defined as follows: - preserving: ``g1 > g2 and d1 > d2`` - reversing: ``g1 > g2 and d1 < d2`` - ignoring: ``g1 > g2 and d1 == d2`` - loosing: ``g1 > g2 and d1 || d2`` The order mapping counts in the returned tuple follow the index numbers given by :const:`OMCPRESERVING`, :const:`OMCREVERSING`, :const:`OMCIGNORING`, and :const:`OMCLOOSING`. """ odp, odr, odi, odl = 0, 0, 0, 0 for s1, s2 in combinations(self, 2): s1, s2 = s1.copy(), s2.copy() g1, g2 = frozenset(s1.pop('persons')), frozenset(s2.pop('persons')) d1, d2 = Label(s1.pop('disclosure')), Label(s2.pop('disclosure')) if s1 != s2: continue if g1 > g2: if d1 > d2: odp += 1 elif d1 < d2: odr += 1 elif d1 == d2: odi += 1 else: odl += 1 elif g1 < g2: if d1 < d2: odp += 1 elif d1 > d2: odr += 1 elif d1 == d2: odi += 1 else: odl += 1 counts = (odp, odr, odi, odl) n = sum(counts) return [x*100/n for x in counts] #return odp, odr, odi, odl @cachedproperty def doh(self): """Degree of hierarchy of disclosures.""" return _doh(self._udiscs) # ============================================================================= # plot # ============================================================================= def plot(dstats, fname): from matplotlib import pyplot as plt from diles.scenario.plot import COLORS LEGENDFONT = dict(size='small') plots = [] legends = [] xlabels = [] indices = count(0) plt.figure(1, figsize=(3.3,5)) def plotbar(bname, items, styles, stack=True, legend=True): def texbarname(s): s = s.replace(" ", r"\\").replace("_", " ") return r"\begin{flushright}%s\end{flushright}" % s width = 0.8 bottom = 0 ind = indices.next() for (name, val), (hatch, color) in izip(items, styles): bars = plt.bar([ind], [val], bottom=bottom, width=width, linewidth=0.7, color=COLORS[color], hatch=hatch, edgecolor=COLORS[color, 0.5]) #hatch=hatch, width=width, color=COLORS[color], #ecolor=COLORS[errcolor], edgecolor=COLORS[edgecolor]) if stack: bottom += val else: width *= 0.75 if legend: plots.append(bars[0]) legends.append(name) xlabels.append(texbarname(bname)) styles = product([None], ['visio-1', ('visio-2', 0.85), ('visio-2', 1.15), 'visio-4']) items = zip(['preserving', 'reversing', 'ignoring', 'loosing'], dstats.omc) plotbar(r"mapping types_in_\%", items, styles) legends = list(reversed(legends)) plots = list(reversed(plots)) styles = cycle([(None, ('visio-3', 1.2))]) items = [('duc', x) for x in dstats.duc] plotbar("usage counts", items, styles, legend=False) styles = [(None, ('visio-5', 1.0))] items = [('ud', dstats.ud)] plotbar("unique disclosures", items, styles, stack=False, legend=False) styles = [(None, ('visio-5', 1.4))] items = [('pw', dstats.pw)] plotbar("poset width", items, styles, stack=False, legend=False) plt.yticks(range(0, plt.ylim()[1]+1,10)) plt.xticks([i + 0.8/2 for i in range(len(xlabels))], xlabels, rotation=90) plt.title("Disclosure analysis") plt.legend(plots, legends, loc='upper right', prop=LEGENDFONT) plt.axes().yaxis.grid(color='lightgrey', alpha=0.5, zorder=-1, linestyle="solid") plt.axes().set_axisbelow(True) # save plt.savefig(fname + ".png", bbox_inches='tight') plt.savefig(fname + ".pdf", bbox_inches='tight') plt.close() # ============================================================================= # info template # ============================================================================= SCENARIOTEMPLATE = """ %(desc)s Statistics ---------- - number of subjects: `%(subjects)s` - situations per subject (min, mean, max): `%(sitspersubject)s` Scenario File ------------- %(file)s """ # ============================================================================= # command line interface # ============================================================================= def options(): op = optparse.OptionParser("%prog -s FILE -d DIR") op.add_option("-s", "--scenario", metavar="FILE", help="scenario file to analyze") op.add_option("-d", "--dest", metavar="DIR", help="destination directory to put plot images into") opts, args = op.parse_args() if args: op.error("invalid arguments") if opts.scenario is None: op.error("need a scenario file to analyze") if opts.dest is None: op.error("need a destination for plot files") return opts def main(): opts = options() sdesc = readfile(re.sub(r'\.yaml$', '.desc', opts.scenario)) scode = readfile(opts.scenario) scode = "\n ".join(scode.split("\n")) nsitspersubject = {} _ppname, ppfuncs = PREPROCESSORS.iteritems().next() for subject, sits in load(opts.scenario).iteritems(): nsitspersubject[subject] = len(sits) fname = "scenario-analysis.subject-%s" % subject fname = os.path.join(opts.dest, fname) print "- %s" % fname ppsits = sits for ppf in ppfuncs: ppsits = ppf(ppsits) plot(DisclosureStats(ppsits), fname) nl = nsitspersubject.values() nsits = min(nl), stats.mean(nl), max(nl) context = dict(desc=sdesc, file=scode, subjects=len(nsitspersubject), sitspersubject=nsits) md = SCENARIOTEMPLATE % context writefile(os.path.join(opts.dest, "scenario-info.md"), md) if __name__ == '__main__': main() # ============================================================================= # tests # ============================================================================= _TESTSCENARIO = """ - subject: S1 trigger: T1 purpose: P1 modalities: [M1] labels: [L1] - repeat: 2 persons: [S2] disclosure: - M1:a.1 - M1:c - repeat: 1 persons: [S3] labels: [L2] disclosure: - M1:a - M1:c - M1:d - repeat: 1 persons: [S2, S3] disclosure: - M1:c - repeat: 1 persons: [S2, S3, S4] disclosure: - M1:c - M1:d - repeat: 1 persons: [S2, S3, S5] disclosure: - M1:a.1 - M1:d """ def __doctests(): """ >>> from diles.scenario import load >>> sitspersubject = load(_TESTSCENARIO) >>> s = DisclosureStats(sitspersubject['S1']) >>> s order mapping counts (preserving, reversing, ignoring, loosing): (1, 2, 0, 5) degree of hierarchy of disclosures: 2 disclosure usage counts: [1, 1, 1, 1, 2] number of disclosures used multiple times: 1 number of disclosures used once: 4 number of unique disclosure items: 4 number of unique disclosures: 5 poset width of disclosures: 3 """