import colorsys import shelve import optparse from os.path import join from dihabs.gtools import gbins, gset, gsize from dihabs.util import meanstdv class colmap(dict): """ Dictionary for mapping color names to `#`-prefixed RGB hex values. When accessing an item, the key may be a tuple of the color name and a lightness scale parameter (a float). >>> cm = colmap(green="#8ae234", blue="#1101d0") >>> cm['green'] '#8ae234' >>> cm['green', 1.2] '#a6e965' >>> cm['green', 0.8] '#6ec31c' >>> cm[('green', 0.8), 0.8] == cm['green', 0.8 * 0.8] True >>> colmap._lscale("#aaaaaa", 1.2) '#cccccc' """ @staticmethod def _lscale(rgb, adjust): rgb = rgb.lstrip("#") rgb = [int(rgb[2*i:2*i+2], base=16) / 255.0 for i in range(3)] h, l, s = colorsys.rgb_to_hls(*rgb) l = min(1.0, l * adjust) rgb = colorsys.hls_to_rgb(h, l, s) rgb = [int(round(x * 255)) for x in rgb] return "#%s" % "".join(["%02x" % x for x in rgb]) def __getitem__(self, col): try: return super(colmap, self).__getitem__(col) except KeyError: if type(col) != tuple: raise KeyError(col) basecol, lightness = col basecolhex = super(colmap, self).__getitem__(basecol) val = colmap._lscale(basecolhex, lightness) self[col] = val return val _COLORS = colmap({ 'green': "#8ae234", 'red1': '#cc0000', 'red2': '#5b0000', 'orange': '#f57900', 'blue': '#5a8ec7', 'yellow': '#fce94f', 'purple': '#6d4b73', 'visio-1': "#ea9651", # orange 'visio-2': "#b3c283", # green 'visio-3': "#6b9bc7", # blue-light 'visio-4': "#4e66b2", # blue-dark 'visio-5': "#8976ac", # purple 'black': "#000000", 'gray': "#888888" }) def _results(rdb): rd = shelve.open(rdb, protocol=2, flag='r') dcmp = lambda a,b: cmp(a[1]['start'], b[1]['start']) rl = sorted(rd.items(), cmp=dcmp) rd.close() return rl def _result(rdb, n=-1, usid=""): """Get a list of all results or the last `limit` ones.""" rl = _results(rdb) if n < 0: n = None for i, result in enumerate(rl): if result[0].startswith(usid): if n is not None: raise StandardError("usid %s is not unique" % usid) else: n = i if n is None: raise StandardError("no result with usid %s" % usid) elif n >= len(rl): raise StandardError("result number is out of bounds") return rl[n] def delete(rdb, n=-1, usid=""): usid, _ = _result(rdb, n, usid) rd = shelve.open(rdb, protocol=2, flag='c') del(rd[usid]) rd.close() def query(rdb, xtests, field): elems = field.split(".") for usid, result in _results(rdb): if xtests and result.get('test', False): continue x = result for elem in elems: if isinstance(x, dict): x = x.get(elem, None) else: # numbered sequence x = x[int(elem)] if x is None: break print x def dump(usid, result): highl = lambda s: "\033[1;31m%s\033[1;m" % s print("%s %s" % (highl("usid:"), usid)) for k, v in result.items(): v = len(str(v)) > 70 and ("%s .." % str(v)[:68]) or v print("%s: %s" % (highl(k) , v)) def listr(rdb, xtests): import hashlib # import here so that other functions still can be used in Python 2.4 for i, ur in enumerate(_results(rdb)): usid, result = ur[0], ur[1] if xtests and result.get('test', False): continue ivname = result['interview']['name'] ivhash = hashlib.md5(repr(result['interview'])).hexdigest() print("%3d: %s (%s, %s, %s)" % (i, usid, result['start'], ivhash, ivname)) def graph(result, fprefix, itype=-1): """Draw graph for a specific result and information type.""" import pygraphviz as pgv def treduce(rels): """Transitive reduction of a list of relations.""" reduced = rels[:] for a, b in rels: for c, d in rels: if b == c: obsolete = (a, d) if obsolete in reduced: reduced.remove(obsolete) return reduced CMATCH, CCON, CIGN, CNONE = [_COLORS[c] for c in ('visio-2', 'visio-1', 'visio-4', 'visio-5')] ### file name prefix according to info type ### if itype == -1: fprefix = "%s-%s" % (fprefix, "all") else: name = result['interview']['itypes'][itype]['name'].lower() fprefix = "%s-%s" % (fprefix, name) ### get and prune relations ### stats = analyze_per_type(result, itype) grels = stats["_group relations"] mrels = stats["_matching disclosure relations"] xrels = stats["_contradicting disclosure relations"] irels = stats['_ignoring disclosure relations'] nrels = stats["_unmatching disclosure relations"] grels = treduce(grels) urels = treduce(list(set(nrels + xrels))) # for simple graph mrels = treduce(mrels) xrels = treduce(xrels) irels = treduce(irels) nrels = treduce(nrels) ### draw graph ### label = lambda group: gbins(group, result['npersons']) # label for nodes # full graph showing matching and contradicting and unmatching relations fgraph = pgv.AGraph(directed=True) for a, b in mrels: fgraph.add_edge(label(a), label(b), style="solid,bold", color=CMATCH) for a, b in xrels: fgraph.add_edge(label(a), label(b), style="solid,bold", color=CCON) for a, b in irels: fgraph.add_edge(label(a), label(b), style="solid,bold", color=CIGN) for a, b in nrels: fgraph.add_edge(label(a), label(b), style="dashed,bold", color=CNONE) # simple graph showing matching and (contradicting or unmatching) relations sgraph = pgv.AGraph(directed=True) for a, b in mrels: sgraph.add_edge(label(a), label(b), style="solid,bold", color=CMATCH) for a, b in urels: sgraph.add_edge(label(a), label(b), style="dotted,bold", color=CCON) nodes = set() for rel in grels: nodes.add(rel[0]) nodes.add(rel[1]) for size in range(1, 30): # arrange nodes with same size on same level lnodes = [group for group in nodes if gsize(group) == size] if lnodes: fgraph.add_subgraph([label(group) for group in lnodes], rank="same") sgraph.add_subgraph([label(group) for group in lnodes], rank="same") args = '-Gsplines=true -Gratio=0.7' # '-Granksep=1' fgraph.draw('%s.png' % fprefix, prog="dot", args=args) fgraph.draw('%s.pdf' % fprefix, prog="dot", args=args) sgraph.draw('%s-simple.png' % fprefix, prog="dot", args=args) sgraph.draw('%s-simple.pdf' % fprefix, prog="dot", args=args) def _poset_width(poset): """Calculate width of given poset. >>> _poset_width((0,)) 1 >>> _poset_width((0,1,2,3)) 2 >>> _poset_width((0,1,3)) 1 >>> _poset_width((2,3,4,8)) 3 >>> _poset_width((1,2,4,8)) 4 >>> _poset_width((0,1,2,4,8)) 4 >>> _poset_width((0,1,2,3,4,5,13)) 3 >>> _poset_width((0,1,2,3,4,5,8,13)) 4 >>> _poset_width((0,8,13,4,5,7)) 2 >>> _poset_width((1,2,4,3,12,15)) 3 """ def next(i): if i < 0: return True if vs[i]: return False vs[i] = True for j in range(len(gt[i])): if gt[i][j]: if next(pv[j]): pv[j] = i return True return False poset = [gset(s) for s in poset] gt = [[False] * len(poset) for _ in poset] for i in range(len(poset)): for j in range(len(poset)): gt[i][j] = poset[i] > poset[j] pv = [-1] * len(poset) vs = [False] * len(poset) width = 0 for i in range(len(poset)): for j, _ in enumerate(vs): vs[j] = False if not next(i): width += 1 return width class _AdditionOrderedDict(dict): """A dict providing items, values and keys ordered by initial addition.""" def __init__(self): super(_AdditionOrderedDict, self).__init__() self.skeys = [] def __setitem__(self, key, value): super(_AdditionOrderedDict, self).__setitem__(key, value) if key not in self.skeys: self.skeys.append(key) def keys(self): return self.skeys def items(self): return [(k, self[k]) for k in self.skeys] def values(self): return [self[k] for k in self.skeys] def analyze_per_type(result, n): """Analyze a result for itype number `n` (-1 means all types together).""" stats = _AdditionOrderedDict() answers = [] for disclosure in result['disclosures']: if n == -1: values = disclosure['values'] stats['number of items'] = sum(result['nvalues']) else: values = disclosure['values-per-type'][n] stats['number of items'] = result['nvalues'][n] answers.append((disclosure['group'], values)) disclosures = [a[1] for a in answers] ### time stats ### # normalize old results w/o end/start timestamps if 'end' not in result['disclosures'][0]: for i, disclosure in enumerate(result['disclosures'][1:]): disclosure['end'] = disclosure['time'] disclosure['start'] = result['disclosures'][i]['time'] dlast = disclosure['end'] - disclosure['start'] result['disclosures'][0]['end'] = result['disclosures'][0]['time'] result['disclosures'][0]['start'] = result['disclosures'][0]['end'] - dlast ddisclosures = [(d['end'] - d['start']).seconds for d in result['disclosures']] stats['duration'] = (result['disclosures'][-1]['end'] - result['start']).seconds stats['duration preparation'] = stats['duration'] - sum(ddisclosures) stats['duration disclosures'] = sum(ddisclosures) stats['duration disclosures min'] = min(ddisclosures) stats['duration disclosures max'] = max(ddisclosures) mean, std = meanstdv(ddisclosures) stats['duration disclosures avg'] = mean stats['duration disclosures std'] = std ### disclosures in general ### stats['unique disclosures'] = len(set(disclosures)) stats['conflicting disclosures for duplicate groups'] = 0 heap = {} for i, (group, disclosure) in reversed(list(enumerate(answers))): if group in heap: if heap[group] != disclosure: stats['conflicting disclosures for duplicate groups'] += 1 answers.pop(i) # remove group duplicates else: heap[group] = disclosure stats['disclosures used once'] = 0 stats['disclosures used repeatedly'] = 0 for d in set(disclosures): if disclosures.count(d) > 1: stats['disclosures used repeatedly'] += 1 else: stats['disclosures used once'] += 1 ### disclosure relations ### stats['comparable disclosure pairs'] = 0 stats['uncomparable disclosure pairs'] = 0 for x in set(disclosures): for y in set(disclosures): if gset(x) > gset(y): stats['comparable disclosure pairs'] += 1 elif x != y: stats['uncomparable disclosure pairs'] += 1 stats['disclosure poset width'] = _poset_width(set(disclosures)) ### matchings between group and disclosure relations grels, mrels, xrels, irels, nrels = [], [], [], [], [] for x in answers: for y in answers: if gset(x[0]) > gset(y[0]): grels.append((x[0],y[0])) if gset(x[1]) < gset(y[1]): mrels.append((x[0],y[0])) elif gset(x[1]) > gset(y[1]): xrels.append((x[0],y[0])) elif gset(x[1]) == gset(y[1]): irels.append((x[0],y[0])) else: nrels.append((x[0],y[0])) stats['_group relations'] = grels stats['_matching disclosure relations'] = mrels stats['_contradicting disclosure relations'] = xrels stats['_ignoring disclosure relations'] = irels stats['_unmatching disclosure relations'] = nrels stats['number of group relations'] = len(grels) stats['number of matching disclosure relations'] = len(mrels) stats['number of contradicting disclosure relations'] = len(xrels) stats['number of ignoring disclosure relations'] = len(irels) stats['number of unmatching disclosure relations'] = len(nrels) return stats def analyze(result): print("number of groups: %d" % len(set(result['groups']))) print("duplicate groups: %d" % (len(result['groups']) - len(set(result['groups'])))) print("group poset width: %s" % _poset_width(result['groups'])) print("per type:") for i in [-1] + range(len(result['interview']['itypes'])): itype = i == -1 and "All" or result['interview']['itypes'][i]['name'] print("- %s" % itype) stats = analyze_per_type(result, i) for key, value in stats.items(): if not key.startswith("_"): print(" %s: %s" % (key, value)) def plotit(rdb, fprefix, xtests): import numpy import matplotlib.pyplot as plt cols = [_COLORS[c] for c in ('green', 'red2', 'blue', 'yellow', 'purple')] results = [r for _, r in _results(rdb) if not r.get("test", False) or not xtests] itypes = results[0]['interview']['itypes'] # === items counts and stuff ============================================== ymax = 0 bwidth = 0.8 / len(results) plots = [] # plot groups per param params = ('number of items', 'unique disclosures', 'disclosure poset width') ind = numpy.arange(len(params)) for i, param in enumerate(params): levels = [] for j, itype in enumerate(itypes): level = [] for result in results: value = analyze_per_type(result, j)[param] level.append(value) levels.append(level) pind = numpy.arange(i, i+1, bwidth)[:len(levels[0])] rplots = [] # plots per result for a specific param bottom = [0] * len(results) for j, level in enumerate(levels): plot = plt.bar(pind, level, bwidth, color=cols[j], bottom=bottom) bottom = [a + b for a, b in zip(bottom, level)] rplots.append(plot) ymax = max(ymax, max(bottom)) plots.append(rplots) plt.xticks(ind+0.45, [p.capitalize() for p in params], rotation=0) plt.yticks(numpy.arange(0, ymax+6,5)) barparts, legends = [], [] for i, itype in enumerate(itypes): barparts.append(plots[0][i][0]) legends.append(itype.get('plotname', itype['name'])) plt.legend(reversed(barparts), reversed(legends)) plt.savefig("%s-items.pdf" % fprefix, bbox_inches='tight') plt.close() #return # === relation matching =================================================== cols = [_COLORS[c] for c in ['visio-1', ('visio-2', 0.85), ('visio-2', 1.15), 'visio-4']] ymax = 0 bwidth = 0.8 / len(results) plots = [] # plot groups per param params = ('number of matching disclosure relations', 'number of unmatching disclosure relations', 'number of contradicting disclosure relations') params_plot = ('order-reversing disclosure decision pairs', 'order-loosing disclosure decision pairs', 'order-preserving disclosure decision pairs') ind = numpy.arange(len(itypes)) for i, itype in enumerate(itypes): levels = [] for j, param in enumerate(params): level = [] for result in results: value = analyze_per_type(result, i)[param] level.append(value) levels.append(level) pind = numpy.arange(i, i+1, bwidth)[:len(levels[0])] rplots = [] # plots per result for a specific param bottom = [0] * len(results) for j, level in enumerate(levels): plot = plt.bar(pind, level, bwidth, color=cols[j], bottom=bottom) bottom = [a + b for a, b in zip(bottom, level)] rplots.append(plot) ymax = max(ymax, max(bottom)) plots.append(rplots) pitypes = [x.get('plotname', x['name']) for x in itypes] plt.xticks(ind+0.45, [p.capitalize() for p in pitypes], rotation=0) plt.yticks(numpy.arange(0,ymax+16,5)) barparts, legends = [], [] for i, param in enumerate(params): barparts.append(plots[0][i][0]) legends.append(params_plot[i]) plt.legend(reversed(barparts), reversed(legends)) plt.savefig("%s-relations.pdf" % fprefix, bbox_inches='tight') return def plotit2(rdb, fprefix, xtests): import matplotlib.pyplot as plt from matplotlib import rc rc('text', usetex=True) rc('font', family='serif') colnames = ('visio-1', 'visio-2', 'visio-3', 'visio-5', 'visio-4') cols = [_COLORS[c] for c in colnames] omtypes = ("reversing", "preserving", "ignoring", "loosing") results = [r for _, r in _results(rdb) if not r.get("test", False) or not xtests] print len(results), "participants (all)" itypes = results[0]['interview']['itypes'] stats = {} # mapping information types to lists of result stats (one # list element for one participant) print "analyze results" for j, itype in enumerate(itypes): if j == 2: continue name = itype.get('plotname', itype['name']) stats[name] = [] for result in results: stats_per_type_and_result = analyze_per_type(result, j) if stats_per_type_and_result['duration disclosures avg'] < 25: #print "drop", result['interview']['name'], "(", stats_per_type_and_result['duration disclosures avg'], ",", result['start'], ")" continue stats[name].append(stats_per_type_and_result) print len(stats.values()[0]), "participants (filtered)" print "plot stats" #### get a common maximum for occurrences of unique disclosures ### udxmax = len(results[0]['disclosures']) udymax = 0 for i, itype in enumerate(stats): ud = [a['unique disclosures'] for a in stats[itype]] udx = range(1, udxmax + 1) udy = [] for nud in udx: udy.append(ud.count(nud)) udymax = max(udymax, max(udy)) #### get a common maximum for occurrences of poset widths #### pwxmax = len(results[0]['disclosures']) pwymax = 0 for i, itype in enumerate(stats): pw = [s['disclosure poset width'] for s in stats[itype]] pwx = range(1, pwxmax + 1) pwy = [] for npw in pwx: pwy.append(pw.count(npw)) pwymax = max(pwymax, max(pwy)) #### common maximums for order mapping graph #### nparticipants = len(stats.values()[0]) for i, itype in enumerate(stats): print("plots for information type '%s'" % itype.lower()) #### unique disclosures ### ud = [s['unique disclosures'] for s in stats[itype]] udx = range(1, udxmax + 1) udy = [] for nud in udx: udy.append(ud.count(nud)) plt.figure(1, figsize=(2.7,3.3)) plt.title(itype) plt.xlabel("unique disclosures") plt.ylabel(r"participants") plt.plot(udx, udy, color=cols[i], linestyle='dotted', marker='o') plt.axes().yaxis.grid(color='lightgrey', alpha=0.5, zorder=-1, linestyle="solid") plt.axes().set_axisbelow(True) plt.ylim(-0.1 * udymax, udymax * 1.1) plt.xlim(0.01, udxmax + 0.99) plt.savefig("%s-ud-%s.pdf" % (fprefix, itype.lower()), bbox_inches='tight') plt.close() #### poset width ### pw = [s['disclosure poset width'] for s in stats[itype]] pwx = range(1, pwxmax + 1) pwy = [] for npw in pwx: pwy.append(pw.count(npw)) xmax = 6 # adjust this if necessary pwx = range(1, xmax + 1) pwy = pwy[:xmax-1] + [sum(pwy[xmax-1:])] plt.figure(1, figsize=(2.7,3.3)) plt.title(itype) plt.xlabel("poset width") plt.ylabel(r"participants") plt.plot(pwx, pwy, color=cols[i], linestyle='dotted', marker='o') plt.axes().yaxis.grid(color='lightgrey', alpha=0.5, zorder=-1, linestyle="solid") plt.axes().set_axisbelow(True) plt.xticks(range(1,xmax + 1), range(1,xmax) + [r'$\ast$']) plt.ylim(-0.1 * pwymax, pwymax * 1.1) plt.xlim(0.01, xmax + 0.99) plt.savefig("%s-pw-%s.pdf" % (fprefix, itype.lower()), bbox_inches='tight') plt.close() #### order mapping major occurrences ### # this illustrates if certain order mapping types occur mainly plt.figure(1, figsize=(2.7,3.8)) plt.title(itype) width = 0.6 xvalues = range(4) plt.xticks([x + width/2.0 for x in range(4)], omtypes, rotation=90) plt.ylabel(r"participants") rmax = stats[itype][0]['number of group relations'] maj = 0.66 bars = [] legendrefs, legendnames = [], [] for maj in (0.5, 0.66, 0.75): yvalues = [0] * 4 for a in stats[itype]: if a['number of matching disclosure relations'] > maj * rmax: yvalues[0] += 1 elif a['number of contradicting disclosure relations'] > maj * rmax: yvalues[1] += 1 elif a['number of ignoring disclosure relations'] > maj * rmax: yvalues[2] += 1 elif a['number of unmatching disclosure relations'] > maj * rmax: yvalues[3] += 1 bars.append(yvalues) legendnames.append(r"$> %d/%d$" % (int(maj * rmax), rmax)) edgecolor = _COLORS[colnames[i], 0.25] for row, lightness in zip(bars, [1.4, 1.2, 1]): color = _COLORS[colnames[i], lightness] bars = plt.bar(xvalues, row, color=color, width=width, edgecolor=edgecolor) legendrefs.append(bars[0]) plt.legend(legendrefs, legendnames, loc='upper left', prop={'size': 'medium'}, title="mapping counts") plt.axes().yaxis.grid(color='lightgrey', alpha=0.5, zorder=-1, linestyle="solid") plt.axes().set_axisbelow(True) plt.xlim(-0.5, 4) plt.ylim(0, nparticipants) plt.savefig("%s-om1-%s.pdf" % (fprefix, itype.lower()), bbox_inches='tight') plt.close() #### order mapping occurrence counts as boxplots ### plt.figure(1, figsize=(2.7,3.3)) plt.title(itype) plt.xticks(range(4), omtypes, rotation=90) plt.ylabel(r"mapping occurrences") rmax = stats[itype][0]['number of group relations'] yvalues = [list() for _ in range(4)] for a in stats[itype]: for j, k in enumerate(('number of matching disclosure relations', 'number of contradicting disclosure relations', 'number of ignoring disclosure relations', 'number of unmatching disclosure relations')): yvalues[j].append(a[k]) bplots = plt.boxplot(yvalues, sym="+", notch=0) plt.setp(bplots['boxes'], color=_COLORS[colnames[i], 0.5]) plt.setp(bplots['caps'], color=_COLORS[colnames[i], 0.5]) plt.setp(bplots['whiskers'], color=_COLORS[colnames[i], 0.5], linestyle="solid") plt.setp(bplots['medians'], color=_COLORS[colnames[i], 0.5], solid_capstyle='butt', marker='') plt.setp(bplots['fliers'], color=_COLORS[colnames[i], 1]) # nice axes plt.axes().yaxis.grid(color='lightgrey', alpha=0.5, zorder=-1, linestyle="solid") plt.axes().set_axisbelow(True) plt.xlim(0.5,4.5) plt.ylim(-0.1 * rmax, rmax * 1.1) plt.savefig("%s-om2-%s.pdf" % (fprefix, itype.lower()), bbox_inches='tight') plt.close() def options(): op = optparse.OptionParser("%prog [RESULT] [ACTION]") og = optparse.OptionGroup(op, "Result Selection") og.add_option("-r", "--results", help="use results from FILE", metavar="FILE") og.add_option("-n", "--num", type="int", default=-1, help="use result number NUM", metavar="NUM") og.add_option("-u", "--usid", default="", help="use result with given USID", metavar="USID") og.add_option("-t", "--tests", action="store_true", default=False, help="include test results in actions on multiple results") op.add_option_group(og) og = optparse.OptionGroup(op, "Actions") og.add_option("-l", "--list", action="store_true", default=False, help="list all results") og.add_option("-q", "--query", metavar="FIELD", help="show value of a specific result field") og.add_option("-d", "--dump", action="store_true", default=False, help="dump a specific result") og.add_option("-g", "--graph", action="store_true", default=False, help="generate relation graphs for a specific result") og.add_option("-a", "--analyze", action="store_true", default=False, help="analyze a specific result") og.add_option("", "--plot", action="store_true", default=False, help="generate plots for all results") og.add_option("", "--plot2", action="store_true", default=False, help="calculate other plots over all results") og.add_option("", "--delete", action="store_true", default=False, help="delete a specific result") op.add_option_group(og) og = optparse.OptionGroup(op, "Miscellaneous") op.add_option("-D", "--dest", metavar="DIR", help="destination directory for graphs and plots") op.add_option_group(og) opts, _ = op.parse_args() if not opts.results: op.error("need a result file") if not (opts.dump or opts.analyze or opts.graph or opts.list or opts.query or opts.delete or opts.plot or opts.plot2): op.error("need to know what to do") if opts.num < 0 and not opts.usid and not (opts.list or opts.plot or opts.query or opts.plot2): op.error("need either a result number or USID") if (opts.graph or opts.plot or opts.plot2) and not opts.dest: op.error("need a destination directory") return opts def main(): opts = options() if opts.list: listr(opts.results, not opts.tests) return if opts.query: query(opts.results, not opts.tests, opts.query) return if opts.plot: plotit(opts.results, join(opts.dest, "plot"), not opts.tests) return if opts.plot2: plotit2(opts.results, join(opts.dest, "plot"), not opts.tests) return usid, result = _result(opts.results, n=opts.num, usid=opts.usid) if opts.dump: dump(usid, result) elif opts.graph: for itype in [-1] + range(len(result['interview']['itypes'])): tstamp = result['start'].strftime("%Y%m%d%H%M%S") graph(result, join(opts.dest, "graph-%s" % tstamp), itype) elif opts.analyze: analyze(result) elif opts.delete: delete(opts.results, n=opts.num, usid=opts.usid) else: raise StandardError("bug") if __name__ == '__main__': main()