from IPython.display import HTML, Markdown
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<input type="button" value="Clicca per vedere/nascondere il codice Python" onclick="code_toggle()">''')
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
survey = pd.read_csv("Questionario Bebras 2016 (Responses) - Form Responses 1.csv",
dtype='category',
parse_dates=['Timestamp'], infer_datetime_format=True)
questions = list(survey.columns[1:][:])
survey.columns = ['time', 'firsttime', 'how', 'others', 'othersdone', 'n', 'cat',
'opinion', 'colleagues', 'teachhints', 'teach', 'activities', 'subject', 'grade',
'curriculum', 'currquiz', 'reuse', 'commentsorg', 'commentssw', 'comments', 'again']
survey['subject'] = survey['subject'].str.strip().str.lower().astype('category')
survey['others'] = survey['others'].str.split(',')
survey['othersdone'] = survey['othersdone'].str.split(',')
survey['grade'] = survey['grade'].str.split(',')
survey['reuse'] = survey['reuse'].str.split(',')
participants = survey[survey['n'].str.startswith('Nessuna') == False].copy()
POPULATION = 670
from collections import OrderedDict
SCHOOLS = OrderedDict( [('primary', 'Primaria'),
('secondary1', 'Secondaria di primo grado'),
('secondary2', 'Secondaria di secondo grado')])
participants['primary'] = participants['cat'].str.contains('KILO')
participants['secondary1'] = participants['cat'].str.contains('MEGA') | participants['cat'].str.contains('GIGA')
participants['secondary2'] = participants['cat'].str.contains('TERA') | participants['cat'].str.contains('PETA')
def get_question(q):
return questions[list(participants.columns).index(q)-1].decode('utf8')
s = ''
for i, q in enumerate(questions):
s += '{}. {} (`{}`)\n'.format(i+1, q, survey.columns[i+1])
Markdown(s)
questions[list(participants.columns).index('opinion')-1] += ' (1=Pessima, 5=Ottima)'
questions[list(participants.columns).index('again')-1] += " (1=Non credo, 5=Senz'altro)"
s = u"Hanno risposto in **{}**, di cui **{}** partecipanti all'edizione 2016 (su {}, **{:.2f}%**):".format(len(survey),
len(participants),
POPULATION,
100*len(participants)/float(POPULATION))
for k in SCHOOLS:
n = participants[participants[k]][k].count()
s += u" {} con squadre della scuola {},".format(n, SCHOOLS[k])
s = s[:-1] + u". (Alcuni insegnanti hanno squadre in più gradi scolastici.)"
Markdown(s)
def drawpie(q, size=(8,8), maxcat=5, maxlabel=20):
plt.figure(figsize=size)
plt.suptitle(get_question(q))
colors = ['cyan', 'pink', 'lightgreen', 'yellow', 'orange']
colors += colors[1:]
tcolors = ['SteelBlue','Tomato', 'SeaGreen', 'Wheat', 'SandyBrown']
tcolors += tcolors[1:]
labels=map(lambda x: x.decode('utf8')[:maxlabel],
list(participants.groupby([q])[q].count().sort_values(ascending=False).index[:maxcat]))
for i, t in enumerate(SCHOOLS):
ax = plt.subplot(2,2,i+1)
ax.set_aspect('equal')
ax.set_title(SCHOOLS[t])
p = ax.pie(participants[participants[t]].groupby([q])[q].count().sort_values(ascending=False)[:maxcat]\
/ float(participants[participants[t]].groupby([q])[q].count().sum()),
labels=labels, autopct='%.1f', colors=colors)
ax = plt.subplot(2,2,4)
ax.set_aspect('equal')
ax.set_title('Totale')
p = ax.pie(participants.groupby([q])[q].count().sort_values(ascending=False)[:maxcat]\
/ float(participants.groupby([q])[q].count().sum()),
labels=labels, autopct='%.1f', colors=tcolors)
def drawpies(qq, school, size=(10,10), maxcat=5, maxlabel=20):
plt.figure(figsize=size)
colors = ['cyan', 'pink', 'lightgreen', 'yellow', 'orange']
colors += colors[1:]
tcolors = ['SteelBlue','Tomato', 'SeaGreen', 'Wheat', 'SandyBrown']
tcolors += tcolors[1:]
for i, q in enumerate(qq):
labels=map(lambda x: x.decode('utf8')[:maxlabel],
list(participants.groupby([q])[q].count().sort_values(ascending=False).index[:maxcat]))
ax = plt.subplot(len(qq),1,i+1)
ax.set_aspect('equal')
ax.set_title('Scuola ' + SCHOOLS[school].upper() + ': ' + get_question(q))
p = ax.pie(participants[participants[school]].groupby([q])[q].count().sort_values(ascending=False)[:maxcat]\
/ float(participants[participants[school]].groupby([q])[q].count().sum()),
labels=labels, autopct='%.1f', colors=colors)
plt.tight_layout()
from collections import Counter
def drawbar(q, k=None, maxlabel=50):
count = Counter()
population = participants[participants[k]][q] if k else participants[q]
for i in population:
if type(i) == type([]):
count += Counter(map(lambda x: x.strip().decode('utf8'), i))
plt.title(get_question(q))
p = plt.barh(xrange(len(count)), zip(*count.items())[1], tick_label=map(lambda x: x[:maxlabel], zip(*count.items())[0]))
def listlong(q, k):
count = Counter()
s = u"\n\n*{}* (Scuola {})".format(get_question(q), SCHOOLS[k].upper())
for h in participants[participants[k] & participants[q].notnull()][q]:
count += Counter([h.strip().rstrip('.').decode('utf8').capitalize()])
for h in count:
s += u'\n* ' + h + (' *(risposta data {} volte)*'.format(count[h]) if count[h] > 1 else '')
return Markdown(s)
drawbar('others')
drawbar('othersdone')
SECTION='primary'
drawpies(['firsttime','how', 'n', 'opinion', 'colleagues', 'teach', 'activities', 'subject', 'again'], SECTION,
size=(30,60), maxlabel=50)
listlong('teachhints', SECTION)
drawbar('grade', SECTION)
listlong('curriculum', SECTION)
listlong('currquiz', SECTION)
drawbar('reuse', SECTION)
listlong('commentsorg', SECTION)
listlong('commentssw', SECTION)
listlong('comments', SECTION)
SECTION='secondary1'
drawpies(['firsttime','how', 'n', 'opinion', 'colleagues', 'teach', 'activities', 'subject', 'again'], SECTION,
size=(30,60), maxlabel=50)
listlong('teachhints', SECTION)
drawbar('grade', SECTION)
listlong('curriculum', SECTION)
listlong('currquiz', SECTION)
drawbar('reuse', SECTION)
listlong('commentsorg', SECTION)
listlong('commentssw', SECTION)
listlong('comments', SECTION)
SECTION='secondary2'
drawpies(['firsttime','how', 'n', 'opinion', 'colleagues', 'teach', 'activities', 'subject', 'again'], SECTION,
size=(30,60), maxlabel=50)
listlong('teachhints', SECTION)
drawbar('grade', SECTION)
listlong('curriculum', SECTION)
listlong('currquiz', SECTION)
drawbar('reuse', SECTION)
listlong('commentsorg', SECTION)
listlong('commentssw', SECTION)
listlong('comments', SECTION)
drawpie('firsttime')
drawpie('how',size=(15,15),maxcat=7)
drawpie('n', maxcat=2)
drawpie('opinion', maxcat=6)
drawpie('colleagues', maxlabel=22)
drawpie('teach', size=(15,15))
drawpie('activities', size=(20,20), maxlabel=40)
drawpie('subject', size=(15,15))
drawpie('again', size=(10,10))