from IPython.display import HTML, Markdown
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<input type="button" value="Clicca per vedere/nascondere il codice Python" onclick="code_toggle()">''')
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
survey = pd.read_csv("Questionario Bebras 2016 (Responses) - Form Responses 1.csv",
                     dtype='category',
                     parse_dates=['Timestamp'], infer_datetime_format=True)
questions = list(survey.columns[1:][:])
survey.columns = ['time', 'firsttime', 'how', 'others', 'othersdone', 'n', 'cat', 
                  'opinion', 'colleagues', 'teachhints', 'teach', 'activities', 'subject', 'grade',
                  'curriculum', 'currquiz', 'reuse', 'commentsorg', 'commentssw', 'comments', 'again']
survey['subject'] = survey['subject'].str.strip().str.lower().astype('category')
survey['others'] = survey['others'].str.split(',')
survey['othersdone'] = survey['othersdone'].str.split(',')
survey['grade'] = survey['grade'].str.split(',')
survey['reuse'] = survey['reuse'].str.split(',')
participants = survey[survey['n'].str.startswith('Nessuna') == False].copy()
POPULATION = 670
from collections import OrderedDict
SCHOOLS = OrderedDict( [('primary', 'Primaria'), 
                        ('secondary1', 'Secondaria di primo grado'), 
                        ('secondary2', 'Secondaria di secondo grado')])
participants['primary'] = participants['cat'].str.contains('KILO')
participants['secondary1'] = participants['cat'].str.contains('MEGA') | participants['cat'].str.contains('GIGA')
participants['secondary2'] = participants['cat'].str.contains('TERA') | participants['cat'].str.contains('PETA')
def get_question(q):
    return questions[list(participants.columns).index(q)-1].decode('utf8')
s = ''
for i, q in enumerate(questions):
    s += '{}. {} (`{}`)\n'.format(i+1, q, survey.columns[i+1])
Markdown(s)
questions[list(participants.columns).index('opinion')-1] += ' (1=Pessima, 5=Ottima)'
questions[list(participants.columns).index('again')-1] += " (1=Non credo, 5=Senz'altro)"
s = u"Hanno risposto in **{}**, di cui **{}** partecipanti all'edizione 2016 (su {}, **{:.2f}%**):".format(len(survey), 
                                                                                      len(participants),
                                                                                      POPULATION, 
                                                                                      100*len(participants)/float(POPULATION))
for k in SCHOOLS:
    n = participants[participants[k]][k].count()
    s += u" {} con squadre della scuola {},".format(n, SCHOOLS[k])
s = s[:-1] + u". (Alcuni insegnanti hanno squadre in più gradi scolastici.)"
Markdown(s)
def drawpie(q, size=(8,8), maxcat=5, maxlabel=20):
    plt.figure(figsize=size)
    plt.suptitle(get_question(q))
    colors = ['cyan', 'pink', 'lightgreen', 'yellow', 'orange']
    colors += colors[1:]
    tcolors = ['SteelBlue','Tomato', 'SeaGreen', 'Wheat', 'SandyBrown']
    tcolors += tcolors[1:] 
    
    labels=map(lambda x: x.decode('utf8')[:maxlabel],
               list(participants.groupby([q])[q].count().sort_values(ascending=False).index[:maxcat]))
    for i, t in enumerate(SCHOOLS):
        ax = plt.subplot(2,2,i+1)
        ax.set_aspect('equal')
        ax.set_title(SCHOOLS[t])
        p = ax.pie(participants[participants[t]].groupby([q])[q].count().sort_values(ascending=False)[:maxcat]\
                   / float(participants[participants[t]].groupby([q])[q].count().sum()), 
               labels=labels, autopct='%.1f', colors=colors)
    ax = plt.subplot(2,2,4)
    ax.set_aspect('equal')
    ax.set_title('Totale')
    p = ax.pie(participants.groupby([q])[q].count().sort_values(ascending=False)[:maxcat]\
               / float(participants.groupby([q])[q].count().sum()), 
           labels=labels, autopct='%.1f', colors=tcolors)
def drawpies(qq, school, size=(10,10), maxcat=5, maxlabel=20):
    plt.figure(figsize=size)
    colors = ['cyan', 'pink', 'lightgreen', 'yellow', 'orange']
    colors += colors[1:]
    tcolors = ['SteelBlue','Tomato', 'SeaGreen', 'Wheat', 'SandyBrown']
    tcolors += tcolors[1:] 
    
    for i, q in enumerate(qq):
        labels=map(lambda x: x.decode('utf8')[:maxlabel],
               list(participants.groupby([q])[q].count().sort_values(ascending=False).index[:maxcat]))
        ax = plt.subplot(len(qq),1,i+1)
        ax.set_aspect('equal')
        ax.set_title('Scuola ' + SCHOOLS[school].upper() + ': ' + get_question(q))
        p = ax.pie(participants[participants[school]].groupby([q])[q].count().sort_values(ascending=False)[:maxcat]\
                   / float(participants[participants[school]].groupby([q])[q].count().sum()), 
               labels=labels, autopct='%.1f', colors=colors)
    plt.tight_layout()
from collections import Counter
def drawbar(q, k=None, maxlabel=50):
    count = Counter()
    population = participants[participants[k]][q] if k else participants[q]
    for i in population:
        if type(i) == type([]):
            count += Counter(map(lambda x: x.strip().decode('utf8'), i))
     
    plt.title(get_question(q))
    p = plt.barh(xrange(len(count)), zip(*count.items())[1], tick_label=map(lambda x: x[:maxlabel], zip(*count.items())[0]))
def listlong(q, k):
    count = Counter()
    s = u"\n\n*{}* (Scuola {})".format(get_question(q), SCHOOLS[k].upper())
    for h in participants[participants[k] & participants[q].notnull()][q]:
        count += Counter([h.strip().rstrip('.').decode('utf8').capitalize()])
    for h in count:    
        s += u'\n* ' + h + (' *(risposta data {} volte)*'.format(count[h]) if count[h] > 1 else '')
    return Markdown(s)
drawbar('others')
drawbar('othersdone')
SECTION='primary'
drawpies(['firsttime','how', 'n', 'opinion', 'colleagues', 'teach', 'activities', 'subject', 'again'], SECTION,
        size=(30,60), maxlabel=50)
listlong('teachhints', SECTION)
drawbar('grade', SECTION)
listlong('curriculum', SECTION)
listlong('currquiz', SECTION)
drawbar('reuse', SECTION)
listlong('commentsorg', SECTION)
listlong('commentssw', SECTION)
listlong('comments', SECTION)
SECTION='secondary1'
drawpies(['firsttime','how', 'n', 'opinion', 'colleagues', 'teach', 'activities', 'subject', 'again'], SECTION,
        size=(30,60), maxlabel=50)
listlong('teachhints', SECTION)
drawbar('grade', SECTION)
listlong('curriculum', SECTION)
listlong('currquiz', SECTION)
drawbar('reuse', SECTION)
listlong('commentsorg', SECTION)
listlong('commentssw', SECTION)
listlong('comments', SECTION)
SECTION='secondary2'
drawpies(['firsttime','how', 'n', 'opinion', 'colleagues', 'teach', 'activities', 'subject', 'again'], SECTION,
        size=(30,60), maxlabel=50)
listlong('teachhints', SECTION)
drawbar('grade', SECTION)
listlong('curriculum', SECTION)
listlong('currquiz', SECTION)
drawbar('reuse', SECTION)
listlong('commentsorg', SECTION)
listlong('commentssw', SECTION)
listlong('comments', SECTION)
drawpie('firsttime')
drawpie('how',size=(15,15),maxcat=7)
drawpie('n', maxcat=2)
drawpie('opinion', maxcat=6)
drawpie('colleagues', maxlabel=22)
drawpie('teach', size=(15,15))
drawpie('activities', size=(20,20), maxlabel=40)
drawpie('subject', size=(15,15))
drawpie('again', size=(10,10))