Statistiche sulla partecipazione al Bebras italiano 2021/22

In [1]:
from IPython.display import HTML, Markdown

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<input type="button" value="Clicca per vedere/nascondere il codice Python" onclick="code_toggle()">''')
Out[1]:
In [2]:
import warnings
warnings.filterwarnings('once')
#warnings.filterwarnings('ignore')
In [3]:
%matplotlib inline
import pandas as pd
import urllib.request
from IPython.display import display, Markdown
import matplotlib.pyplot as plt

pd.options.display.max_rows = None
plt.style.use('ggplot')
In [4]:
miur = pd.read_csv('bebras_school_list.zip', low_memory=False)

def norm_region(r):
    """Normalize the name of a region. It also corrects wrong names."""
    r = r.strip().upper()
    if r == 'FVG' or r.startswith('FRIULI'):
        return 'FRIULI-VENEZIA GIULIA'
    if r.startswith('EMILIA'):
        return 'EMILIA-ROMAGNA'
    if r.startswith('TRENTINO') or r.startswith('ALTO ADIGE'):
        return 'TRENTINO-ALTO ADIGE'
    if r.startswith('LOMB'):
        return 'LOMBARDIA'
    if r.startswith('VALLE') or 'AOSTA' in r:
        return "VALLE D'AOSTA"
    if r == 'G6GG6Y' or r == 'ITALIA':
        return None
    if r == 'ALBANIA' or r == 'BAVIERA' or r == 'SIERRA' or r == 'DDDD' or r == 'FRANCE' or r == 'SVIZZERA':
        return 'ESTERO'
    else:
        return r

def infer_school_type(k):
    knorm = k['school_kind'].strip().upper()
    cnorm = k['school_code'].strip().upper()
    if cnorm and miur[miur['i_code'] == cnorm]['i_type'].count() > 0:
        knorm = str(miur[miur['i_code'] == cnorm]['i_type'].iloc[0])
    if 'PRIMARIA' in knorm or 'INFANZIA' in knorm or 'ELEMENTARE' in knorm:
        return 'E'
    if 'PRIMO GRADO' in knorm or ('MEDIA' in knorm and (not 'SUP' in knorm))\
    or '1°' in knorm or ' I GRADO' in knorm or knorm == 'IC':
        return 'M'
    if 'COMPRENSIVO' in knorm:
        return 'EM'
    if 'SECONDO GRADO' in knorm or '2°' in knorm  or 'II GRADO' in knorm \
    or 'LICEO' in knorm or 'ITI' in knorm or 'PROF' in knorm or 'IST TEC' in knorm \
    or 'TECNICO' in knorm or 'MAGISTRALE' in knorm or 'SUPERIORE' in knorm:
        return 'S'
    if knorm == 'STATALE' or 'C.D.38':
        return 'EMS'
    else:
        return knorm

Insegnanti, stima delle squadre e alunni

In [5]:
from datetime import date
In [6]:
YEAR=2021

with open('secret.key') as k:
    key = k.readline().strip()

r = urllib.request.urlopen(("https://bebras.it/api?key={}&view=teachers_edition"+
                           "&edition=bebras_{}&subscription=1").format(key, YEAR))

with open("teachers.json", "w") as tw:
     tw.writelines(r.read().decode('utf-8'))        

teachers = pd.DataFrame(pd.read_json("teachers.json", convert_axes=True))

teachers.index = range(len(teachers))
teachers['confirm_time'] = pd.to_datetime(teachers['confirm_time'], unit='s')
teachers['enter_time'] = pd.to_datetime(teachers['enter_time'], unit='s')

teachers['school_code'] = teachers['school_code'].str.strip().str.upper()
teachers['school_type'] = teachers[['school_kind','school_code']].apply(infer_school_type, axis=1)
filled = len(teachers)
regteams = teachers['teams_active'].sum()

today = date.today()
if today > date(YEAR,11,8):
    today = date(YEAR,11,8)
s = """*{}:* **{:d}** insegnanti hanno confermato la partecipazione; 
ci sono **{:d}** squadre già registrate (>*{:d}* alunni).
"""
display(Markdown(s.format(str(today)[:19], 
                          filled, regteams, regteams)))

if today <= date(YEAR,11,8):
    isotoday = today.isoformat()[:10]
    with open("stats-" + isotoday + ".txt", "w") as stat:
        stat.write(f"{filled:d} {regteams:d} {regteams:d}\n")

2021-11-08: 840 insegnanti hanno confermato la partecipazione; ci sono 34842 squadre già registrate (>34842 alunni).

In [7]:
oldeditions = (2015, 2016, 2017, "bebras_2018", "bebras_2019", "bebras_2020")
In [8]:
oldteachers = {}
for y in oldeditions:
    r = urllib.request.urlopen(("https://bebras.it/api?key={}&view=teachers_edition"+
                                "&edition={}").format(key, y))
    with open("teachers{}.json".format(y), "w") as tw:
        tw.writelines(r.read().decode('utf-8'))

    oldteachers[y] = pd.DataFrame(pd.read_json("teachers{}.json".format(y), convert_axes=True))[3:]
    #oldtteachers[y]['school_type'] = oldteachers[['school_kind','school_code']].apply(infer_school_type, axis=1)
In [9]:
intersect = {}
for y in oldeditions:
    intersect[y] = pd.merge(teachers, oldteachers[y], on='id', how='inner')
    intersect[y]['deltateams'] = intersect[y]['teams_active_x'] - intersect[y]['teams_active_y']
    returning = intersect[y]['id'].count()
    base = len(oldteachers[y][oldteachers[y]['teams_active'] > 0])
    s = """*{:d}* insegnanti hanno già partecipato all'edizione {} (**{:.0f}%** dei partecipanti di quell'edizione), 
il numero di squadre è aumentato in media di {:.1f} (deviazione standard {:.0f}).
"""
    display(Markdown(s.format(returning, str(y)[-4:], 
                              100*float(returning)/float(base),
                              intersect[y]['deltateams'].mean(), intersect[y]['deltateams'].std() 
                             )))

56 insegnanti hanno già partecipato all'edizione 2015 (23% dei partecipanti di quell'edizione), il numero di squadre è aumentato in media di 34.8 (deviazione standard 53).

121 insegnanti hanno già partecipato all'edizione 2016 (17% dei partecipanti di quell'edizione), il numero di squadre è aumentato in media di 35.2 (deviazione standard 64).

223 insegnanti hanno già partecipato all'edizione 2017 (21% dei partecipanti di quell'edizione), il numero di squadre è aumentato in media di 35.6 (deviazione standard 65).

289 insegnanti hanno già partecipato all'edizione 2018 (25% dei partecipanti di quell'edizione), il numero di squadre è aumentato in media di 32.4 (deviazione standard 59).

336 insegnanti hanno già partecipato all'edizione 2019 (37% dei partecipanti di quell'edizione), il numero di squadre è aumentato in media di 31.7 (deviazione standard 58).

344 insegnanti hanno già partecipato all'edizione 2020 (69% dei partecipanti di quell'edizione), il numero di squadre è aumentato in media di -0.6 (deviazione standard 73).

In [10]:
trintersec = pd.merge(intersect["bebras_2019"], oldteachers["bebras_2018"], on='id', how='inner') 
print("Hanno partecipato nel 2018, 2019 e 2020: {}".format(len(trintersec)))
print("Hanno partecipato nel 2017, 2018 e 2020: {}".format(
      len(pd.merge(intersect["bebras_2018"], oldteachers[2017], on='id', how='inner'))))
Hanno partecipato nel 2018, 2019 e 2020: 243
Hanno partecipato nel 2017, 2018 e 2020: 196
In [11]:
all_intersect = pd.merge(teachers['id'], oldteachers[oldeditions[0]]['id'], on='id', how='inner')

for e in oldeditions[1:]:
    all_intersect = pd.merge(all_intersect['id'], oldteachers[e]['id'], on='id', how='inner')

    
print(f"Hanno partecipato a tutte le {len(oldeditions) + 1} edizioni: {len(all_intersect)} insegnanti.")
#display(pd.merge(teachers, all_intersect, on='id', how='inner')[['firstname', 'name', 'school_name', 'school_city', 'school_kind']])
Hanno partecipato a tutte le 7 edizioni: 33 insegnanti.
In [12]:
institutes = teachers[(teachers['school_code'].str.strip() != "") 
                      & (teachers['subscription'] > 0) 
                      & (teachers['confirm_time'].dt.date > date(2021,9,1))].groupby('school_code')['id'].count()

print("Totale istituti con codice meccanografico: {}; numero medio insegnanti per codice: {:.2f}".format(len(institutes), institutes.mean()))
Totale istituti con codice meccanografico: 410; numero medio insegnanti per codice: 1.70
In [13]:
import os
data = []
for path, dirs, files in os.walk("."):
    for f in files:
        if path == '.' and f.startswith("stats-"):
            d = [int(x) for x in f.split('.')[0].split('-')[1:4]]
            with open(f,"r") as df:
                nn = [int(x) for x in df.readline().strip().split(" ")]
                dd = date(YEAR, 11, 8) - date.fromtimestamp(os.stat(f).st_mtime)
                data.append((dd, nn))
data = pd.DataFrame.from_dict(dict(data), orient="index", 
                               columns=["insegnanti","squadre","alunni"]).sort_index(ascending=False)
data['giorni'] = (data.index * -1).astype('timedelta64[D]')
In [14]:
olddata = []
for path, dirs, files in os.walk("old"):
    for f in files:
        if f.startswith("stats-"):
            d = [int(x) for x in f.split('.')[0].split('-')[1:4]]
            with open(path + "/" + f,"r") as df:
                nn = [int(x) for x in df.readline().strip().split(" ")]
                olddata.append((date(YEAR-1,11,11) - date(*d), nn))
olddata = pd.DataFrame.from_dict(dict(olddata), orient="index", 
                                  columns=["insegnanti","squadre","alunni"]).sort_index(ascending=False)
olddata['giorni'] = (olddata.index * -1).astype('timedelta64[D]')
In [15]:
fig, ax = plt.subplots(1,2)
fig.set_size_inches(11,5)

for i, t in enumerate(['squadre', 'insegnanti']):
    ax[i].plot([-d.days for d in data.index], list(data[t]), label=t + ' ' + str(YEAR))
    ax[i].plot([-d.days for d in olddata.index], list(olddata[t]), '--', label=t + ' ' + str(YEAR-1) )
    ax[i].legend()
    ax[i].set_xlim([-50,0])
    delta = (data[t].max()-olddata[t].max())/olddata[t].max()
    ax[i].text(-.9*data[t].count(), .75*data[t].max(), '{:+.1f}%'.format(delta*100), color='red' if delta < 0 else 'green')

plt.show()
In [16]:
r = urllib.request.urlopen(("https://bebras.it/api?key={}&view=teams"+
                           "&edition=bebras_{}").format(key, YEAR))
with open("teams.json", "w") as tw:
     tw.writelines(r.read().decode('utf-8'))        
In [17]:
import json
In [18]:
CATS = ('kilo','mega','giga','tera','peta')
SUBS = ('single', 'double')
CATEGORIES = tuple(f'{c}-{s}' for c in CATS for s in SUBS)
In [19]:
with open("teams.json") as t:
    teams = pd.DataFrame(json.load(t)['teams'])

oldteams = pd.DataFrame({'class': CATS,f'teams_{YEAR-1}':[3120,6573,3321,3325,2363]})
oldteams.index = oldteams['class']
del oldteams['class']


teams['macro'] = teams['class'].str.slice(0,4)
    
tdata = teams.groupby('macro').count()['login'].copy()

for i in oldteams.index:
    oldteams.loc[i]['teams_' + str(YEAR)] = 1

tdata = pd.concat([tdata, oldteams],axis=1)


tdata['Incremento %'] = 100*(tdata['login']-tdata[f'teams_{YEAR-1}'])/tdata[f'teams_{YEAR-1}']
display(tdata)
print("In totale {} squadre iscritte ({:+.2f}% rispetto alle squadre partecipanti nel {})".format(
    tdata['login'].sum(), 
    100*(tdata['login'].sum()-tdata[f'teams_{YEAR-1}'].sum())/tdata[f'teams_{YEAR-1}'].sum(), YEAR-1))
print("In totale {} squadre iscritte ({:+.2f}% rispetto alle squadre iscritte nel {})".format(
    tdata['login'].sum(), 
    100*(tdata['login'].sum()-olddata['squadre'].max())/olddata['squadre'].max(), YEAR-1))
print(f"Nell'edizione {YEAR-1} le squadre avevano 1 componente, nel {YEAR} 1 o 2.")

students = teams.groupby('class').count().apply(lambda x: 2*x if x.name.endswith('double') else x, axis=1)['login']

print('\nIl numero di partecipanti previsto per categoria:')
display(students)
print(f"Il numero totale di partecipanti previsto è {students.sum()}.")
login teams_2020 Incremento %
giga 6648 3321 100.180668
kilo 4265 3120 36.698718
mega 14460 6573 119.990872
peta 3817 2363 61.531951
tera 5653 3325 70.015038
In totale 34843 squadre iscritte (+86.31% rispetto alle squadre partecipanti nel 2020)
In totale 34843 squadre iscritte (+28.19% rispetto alle squadre iscritte nel 2020)
Nell'edizione 2020 le squadre avevano 1 componente, nel 2021 1 o 2.

Il numero di partecipanti previsto per categoria:
class
giga-double     4856
giga-single     4220
kilo-double     4522
kilo-single     2004
mega-double    10186
mega-single     9367
peta-double     3546
peta-single     2044
tera-double     4012
tera-single     3647
Name: login, dtype: int64
Il numero totale di partecipanti previsto è 48404.

La popolazione studentesca nazionale

Dati ISTAT della popolazione studentesca scuola primaria e secondaria nel 2014 (fonte: http://dati.istat.it)

In [20]:
istat = pd.DataFrame.from_dict(
    dict([
    ("PIEMONTE",              (191399, 117997, 168439)),
    ("VALLE D'AOSTA",         (  5981,   3691,   5309)),
    ("LIGURIA",               ( 61566,  39213,  60184)),
    ("LOMBARDIA",             (468662, 283007, 381619)),
    ("TRENTINO-ALTO ADIGE",   ( 27028,  16890,  21836)),
    ("VENETO",                (232694, 142401, 204262)),
    ("FRIULI-VENEZIA GIULIA", ( 51830,  32143,  46949)),
    ("EMILIA-ROMAGNA",        (198417, 118460, 176968)),
    ("TOSCANA",               (161001,  98203, 152886)),
    ("UMBRIA",                ( 39181,  23488,  36946)),
    ("MARCHE",                ( 67996,  42095,  70602)),
    ("LAZIO",                 (268133, 161573, 249145)),
    ("ABRUZZO",               ( 57146,  35828,  58578)),
    ("MOLISE",                ( 12595,   8354,  14990)),
    ("CAMPANIA",              (317346, 204223, 326644)),
    ("PUGLIA",                (198662, 130675, 213545)),
    ("BASILICATA",            (25237,  17097,   30214)),
    ("CALABRIA",              (93277,  59624,  101208)),
    ("SICILIA",               (254023, 164520, 252730)),
    ("SARDEGNA",              (67379,  44105,   74003)),
    ("ESTERO",       (float('nan'), float('nan'), float('nan')))
    ]),
    orient = "index",
    columns = ('Primaria','Secondaria I grado','Secondaria II grado'))
istat['totale'] = istat['Primaria'] + istat['Secondaria I grado'] + istat['Secondaria II grado']
with pd.option_context('display.float_format', '{:.0f}'.format):
    display(istat)
Primaria Secondaria I grado Secondaria II grado totale
PIEMONTE 191399 117997 168439 477835
VALLE D'AOSTA 5981 3691 5309 14981
LIGURIA 61566 39213 60184 160963
LOMBARDIA 468662 283007 381619 1133288
TRENTINO-ALTO ADIGE 27028 16890 21836 65754
VENETO 232694 142401 204262 579357
FRIULI-VENEZIA GIULIA 51830 32143 46949 130922
EMILIA-ROMAGNA 198417 118460 176968 493845
TOSCANA 161001 98203 152886 412090
UMBRIA 39181 23488 36946 99615
MARCHE 67996 42095 70602 180693
LAZIO 268133 161573 249145 678851
ABRUZZO 57146 35828 58578 151552
MOLISE 12595 8354 14990 35939
CAMPANIA 317346 204223 326644 848213
PUGLIA 198662 130675 213545 542882
BASILICATA 25237 17097 30214 72548
CALABRIA 93277 59624 101208 254109
SICILIA 254023 164520 252730 671273
SARDEGNA 67379 44105 74003 185487
ESTERO NaN NaN NaN NaN

Analisi delle gare

In [21]:
CATS = ('kilo','mega','giga','tera','peta')
SUBS = ('single', 'double')
CATEGORIES = tuple(f'{c}-{s}' for c in CATS for s in SUBS)
CAT_FILES = tuple(f'{c}-{s}' for s in SUBS for c in CATS)
snames = {'E': 'Primaria', 'M': 'Secondaria I grado', 'S': 'Secondaria II grado'}
In [22]:
for i, k in enumerate(CAT_FILES):
    if not os.path.exists(f"overview-{k}.json"):
        r = urllib.request.urlopen(f"https://bebras.it/api?key={key}&view=exams&test={98+i}&examdata=0&edition=bebras_{YEAR}&events=0")
        with open(f"overview-{k}.json", "w") as tw:
            tw.writelines(r.read().decode('utf-8'))
In [23]:
import json

overview = []
for k in CATEGORIES:
    with open(f"overview-{k}.json", "r") as t:
        j = json.load(t)
        overview += j['exams']
In [24]:
dfov = pd.DataFrame(overview)
gare = pd.DataFrame()
gare['categoria'] = dfov['category'].str.lower().astype(pd.api.types.CategoricalDtype(categories = CATEGORIES, ordered=True))
gare['insegnante'] = dfov['teacher_id'].astype('int64')
gare['login'] = dfov['login']
gare['status'] = dfov['exam_valid_score']
gare['risultato'] = dfov['score']
gare['data'] = pd.to_datetime(dfov['time'])
gare['studenti'] = dfov['team_composition'].map(lambda tt: 0 if type(tt) != type({}) else len([s for s in tt['members'] if s['name'] != '' ]))
In [25]:
fid = teachers.set_index('id')
fid['regione'] = fid['school_region'].map(norm_region)
gare = gare.join(fid[['regione']],on='insegnante')
In [26]:
done = gare[gare['status'] == 1]

Insegnanti partecipanti

In [27]:
len(done.groupby(['insegnante']))
Out[27]:
751

Insegnanti per regione che hanno partecipato

In [28]:
display(done.groupby(['regione'])['insegnante'].nunique())
regione
ABRUZZO                   12
BASILICATA                18
BUCAREST (ROMANIA)         1
CALABRIA                   4
CAMPANIA                  78
CROAZIA                    1
EMILIA-ROMAGNA            48
FRIULI-VENEZIA GIULIA     22
ILFOV                      1
LAZIO                     35
LIGURIA                   19
LOMBARDIA                235
MARCHE                     8
MOLISE                     3
PIEMONTE                  38
PUGLIA                    41
SARDEGNA                   6
SICILIA                   14
TOSCANA                   22
TRENTINO-ALTO ADIGE       11
UMBRIA                     7
VALLE D'AOSTA             38
VENETO                    89
Name: insegnante, dtype: int64

Insegnanti per categoria

In [29]:
display(done.groupby(['categoria'])['insegnante'].nunique())
categoria
kilo-single    119
kilo-double    115
mega-single    234
mega-double    232
giga-single    172
giga-double    155
tera-single     76
tera-double     80
peta-single     60
peta-double     77
Name: insegnante, dtype: int64

Squadre per categoria

In [30]:
with pd.option_context('display.float_format', '{:.0f}'.format):
    display(done.groupby(['regione', 'categoria'])['login'].count())
regione                categoria  
ABRUZZO                kilo-single       8
                       kilo-double      69
                       mega-single       2
                       mega-double      26
                       giga-single       0
                       giga-double      12
                       tera-single      67
                       tera-double       0
                       peta-single      57
                       peta-double      49
BASILICATA             kilo-single      10
                       kilo-double      31
                       mega-single     416
                       mega-double      23
                       giga-single     178
                       giga-double       8
                       tera-single      16
                       tera-double       0
                       peta-single       0
                       peta-double       0
BUCAREST (ROMANIA)     kilo-single       3
                       kilo-double       4
                       mega-single       0
                       mega-double      13
                       giga-single       0
                       giga-double       0
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
CALABRIA               kilo-single      57
                       kilo-double      16
                       mega-single       0
                       mega-double       0
                       giga-single       0
                       giga-double       0
                       tera-single      20
                       tera-double       0
                       peta-single       0
                       peta-double       0
CAMPANIA               kilo-single     297
                       kilo-double     130
                       mega-single     629
                       mega-double     324
                       giga-single     273
                       giga-double     143
                       tera-single     155
                       tera-double     102
                       peta-single     208
                       peta-double     147
CROAZIA                kilo-single       0
                       kilo-double       0
                       mega-single       0
                       mega-double       2
                       giga-single       0
                       giga-double       0
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
EMILIA-ROMAGNA         kilo-single     140
                       kilo-double      23
                       mega-single     545
                       mega-double     299
                       giga-single     208
                       giga-double     146
                       tera-single     433
                       tera-double     101
                       peta-single     103
                       peta-double     162
FRIULI-VENEZIA GIULIA  kilo-single       3
                       kilo-double      36
                       mega-single      78
                       mega-double      10
                       giga-single      38
                       giga-double       8
                       tera-single     543
                       tera-double     109
                       peta-single     192
                       peta-double     105
ILFOV                  kilo-single       0
                       kilo-double       0
                       mega-single       0
                       mega-double       0
                       giga-single       2
                       giga-double       0
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
LAZIO                  kilo-single     207
                       kilo-double      97
                       mega-single     192
                       mega-double     248
                       giga-single     107
                       giga-double      75
                       tera-single      94
                       tera-double     137
                       peta-single      80
                       peta-double     227
LIGURIA                kilo-single      20
                       kilo-double      99
                       mega-single      12
                       mega-double     121
                       giga-single      20
                       giga-double      68
                       tera-single       0
                       tera-double      20
                       peta-single       0
                       peta-double      71
LOMBARDIA              kilo-single     309
                       kilo-double     820
                       mega-single    1633
                       mega-double    1176
                       giga-single     708
                       giga-double     598
                       tera-single     862
                       tera-double     603
                       peta-single     424
                       peta-double     162
MARCHE                 kilo-single      71
                       kilo-double       1
                       mega-single      29
                       mega-double       7
                       giga-single       9
                       giga-double      38
                       tera-single      51
                       tera-double      22
                       peta-single     134
                       peta-double      19
MOLISE                 kilo-single       0
                       kilo-double       0
                       mega-single       0
                       mega-double       8
                       giga-single       0
                       giga-double       4
                       tera-single      23
                       tera-double       7
                       peta-single      41
                       peta-double       9
PIEMONTE               kilo-single      74
                       kilo-double      42
                       mega-single     208
                       mega-double     331
                       giga-single     116
                       giga-double     170
                       tera-single      16
                       tera-double     162
                       peta-single      81
                       peta-double     155
PUGLIA                 kilo-single     204
                       kilo-double     162
                       mega-single     765
                       mega-double     217
                       giga-single     279
                       giga-double      72
                       tera-single      35
                       tera-double      74
                       peta-single      57
                       peta-double      28
SARDEGNA               kilo-single       0
                       kilo-double       0
                       mega-single     239
                       mega-double      29
                       giga-single     106
                       giga-double       4
                       tera-single       0
                       tera-double       8
                       peta-single       0
                       peta-double       9
SICILIA                kilo-single       0
                       kilo-double       0
                       mega-single      46
                       mega-double      98
                       giga-single      16
                       giga-double      49
                       tera-single      57
                       tera-double      24
                       peta-single       0
                       peta-double      48
TOSCANA                kilo-single      26
                       kilo-double      47
                       mega-single     158
                       mega-double     244
                       giga-single     113
                       giga-double     107
                       tera-single       0
                       tera-double       1
                       peta-single       0
                       peta-double      11
TRENTINO-ALTO ADIGE    kilo-single      72
                       kilo-double       8
                       mega-single     330
                       mega-double     148
                       giga-single     147
                       giga-double      71
                       tera-single       3
                       tera-double      59
                       peta-single       4
                       peta-double      22
UMBRIA                 kilo-single       5
                       kilo-double       8
                       mega-single     179
                       mega-double       0
                       giga-single      81
                       giga-double       0
                       tera-single     184
                       tera-double       0
                       peta-single       0
                       peta-double       0
VALLE D'AOSTA          kilo-single      54
                       kilo-double      10
                       mega-single     596
                       mega-double     306
                       giga-single     279
                       giga-double     152
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
VENETO                 kilo-single      78
                       kilo-double     225
                       mega-single    1237
                       mega-double     651
                       giga-single     623
                       giga-double     316
                       tera-single     478
                       tera-double     174
                       peta-single     236
                       peta-double     187
Name: login, dtype: int64

Studenti per categoria

In [31]:
with pd.option_context('display.float_format', '{:.0f}'.format):
    display(done.groupby(['regione', 'categoria'])['studenti'].sum())
regione                categoria  
ABRUZZO                kilo-single       7
                       kilo-double     134
                       mega-single       0
                       mega-double      30
                       giga-single       0
                       giga-double      12
                       tera-single      67
                       tera-double       0
                       peta-single      57
                       peta-double      95
BASILICATA             kilo-single       1
                       kilo-double      61
                       mega-single     210
                       mega-double       0
                       giga-single     100
                       giga-double       0
                       tera-single       5
                       tera-double       0
                       peta-single       0
                       peta-double       0
BUCAREST (ROMANIA)     kilo-single       3
                       kilo-double       8
                       mega-single       0
                       mega-double      13
                       giga-single       0
                       giga-double       0
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
CALABRIA               kilo-single      57
                       kilo-double      20
                       mega-single       0
                       mega-double       0
                       giga-single       0
                       giga-double       0
                       tera-single      19
                       tera-double       0
                       peta-single       0
                       peta-double       0
CAMPANIA               kilo-single     100
                       kilo-double     231
                       mega-single     508
                       mega-double     542
                       giga-single     204
                       giga-double     269
                       tera-single      22
                       tera-double     166
                       peta-single       2
                       peta-double     262
CROAZIA                kilo-single       0
                       kilo-double       0
                       mega-single       0
                       mega-double       4
                       giga-single       0
                       giga-double       0
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
EMILIA-ROMAGNA         kilo-single      49
                       kilo-double      44
                       mega-single     230
                       mega-double     545
                       giga-single      73
                       giga-double     256
                       tera-single     322
                       tera-double     100
                       peta-single      31
                       peta-double     289
FRIULI-VENEZIA GIULIA  kilo-single       3
                       kilo-double      72
                       mega-single      77
                       mega-double      20
                       giga-single      38
                       giga-double      16
                       tera-single     384
                       tera-double     198
                       peta-single      97
                       peta-double     186
ILFOV                  kilo-single       0
                       kilo-double       0
                       mega-single       0
                       mega-double       0
                       giga-single       2
                       giga-double       0
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
LAZIO                  kilo-single     177
                       kilo-double     192
                       mega-single      12
                       mega-double     230
                       giga-single       3
                       giga-double      76
                       tera-single      17
                       tera-double     261
                       peta-single      50
                       peta-double     332
LIGURIA                kilo-single      20
                       kilo-double     182
                       mega-single      12
                       mega-double     141
                       giga-single      18
                       giga-double      51
                       tera-single       0
                       tera-double      18
                       peta-single       0
                       peta-double       0
LOMBARDIA              kilo-single     218
                       kilo-double    1302
                       mega-single     868
                       mega-double    1737
                       giga-single     413
                       giga-double     878
                       tera-single     540
                       tera-double     708
                       peta-single     359
                       peta-double     314
MARCHE                 kilo-single      51
                       kilo-double       0
                       mega-single       1
                       mega-double       0
                       giga-single       0
                       giga-double       0
                       tera-single      51
                       tera-double      43
                       peta-single     133
                       peta-double      36
MOLISE                 kilo-single       0
                       kilo-double       0
                       mega-single       0
                       mega-double      12
                       giga-single       0
                       giga-double       4
                       tera-single      21
                       tera-double      14
                       peta-single      20
                       peta-double      18
PIEMONTE               kilo-single      68
                       kilo-double      66
                       mega-single     205
                       mega-double     272
                       giga-single     116
                       giga-double     138
                       tera-single      16
                       tera-double     319
                       peta-single      79
                       peta-double     302
PUGLIA                 kilo-single     167
                       kilo-double     271
                       mega-single     675
                       mega-double     274
                       giga-single     229
                       giga-double     141
                       tera-single      33
                       tera-double     148
                       peta-single      57
                       peta-double      56
SARDEGNA               kilo-single       0
                       kilo-double       0
                       mega-single       8
                       mega-double      58
                       giga-single       0
                       giga-double       8
                       tera-single       0
                       tera-double      14
                       peta-single       0
                       peta-double      18
SICILIA                kilo-single       0
                       kilo-double       0
                       mega-single      44
                       mega-double     184
                       giga-single      15
                       giga-double      87
                       tera-single      57
                       tera-double      42
                       peta-single       0
                       peta-double      90
TOSCANA                kilo-single      10
                       kilo-double      82
                       mega-single     158
                       mega-double     272
                       giga-single     110
                       giga-double      24
                       tera-single       0
                       tera-double       2
                       peta-single       0
                       peta-double      21
TRENTINO-ALTO ADIGE    kilo-single       0
                       kilo-double       0
                       mega-single     237
                       mega-double      88
                       giga-single     131
                       giga-double      44
                       tera-single       3
                       tera-double     116
                       peta-single       4
                       peta-double      44
UMBRIA                 kilo-single       5
                       kilo-double       3
                       mega-single     136
                       mega-double       0
                       giga-single      60
                       giga-double       0
                       tera-single     184
                       tera-double       0
                       peta-single       0
                       peta-double       0
VALLE D'AOSTA          kilo-single      43
                       kilo-double       0
                       mega-single     254
                       mega-double     464
                       giga-single     116
                       giga-double     304
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
VENETO                 kilo-single      79
                       kilo-double     448
                       mega-single     740
                       mega-double     898
                       giga-single     358
                       giga-double     447
                       tera-single     478
                       tera-double     213
                       peta-single     223
                       peta-double     323
Name: studenti, dtype: int64

Cartografia ISTAT 2011 (fonte: http://www.istat.it/it/archivio/24613), convertita con il comando:

ogr2ogr -f GeoJSON -s_srs reg2011_g.prj -t_srs EPSG:4326 it.json reg2011_g.shp

(fonte: https://gist.github.com/riccardoscalco/6029355)

In [32]:
import geopandas as gpd
%matplotlib inline

it = gpd.read_file("it.json")

TYPES = ['totale'] + list(snames.values())

dreg = done.groupby(['regione']).count()
dregk = done.groupby(['regione','categoria']).count()

sreg = done.groupby(['regione']).sum()
sregk = done.groupby(['regione','categoria']).sum()


def get_data_with_default(geo, i, t, ddata, sdata, jj, labeld='login', labels='studenti'):
    try:
        geo.loc[i, 'squadre' + ' ' + t] = 0
        for j in jj:
            geo.loc[i, 'squadre' + ' ' + t] += ddata.loc[j, labeld] if ddata.loc[j, labeld] > 0 else 0 
    except:
        geo.loc[i, 'squadre' + ' ' + t] += 0
    try:
        geo.loc[i, 'studenti' + ' ' + t] = 0
        for j in jj:
            geo.loc[i, 'studenti' + ' ' + t] += sdata.loc[j, labels] if sdata.loc[j, labels] > 0 else 0
    except:
        geo.loc[i, 'studenti' + ' ' + t] += 0

        
for i, r in it.iterrows():
    for cname in istat.index:
        if r['NOME_REG'][0:5] == cname[0:5]:
            it.loc[i, 'NOME_REG'] = cname
            get_data_with_default(it, i, TYPES[0], dreg, sreg, [cname])
            get_data_with_default(it, i, TYPES[1], dregk, sregk, [(cname, 'kilo-single'), (cname, 'kilo-double')])
            get_data_with_default(it, i, TYPES[2], dregk, sregk, [(cname, 'mega-single'), (cname, 'mega-double'),
                                                                  (cname, 'giga-single'), (cname, 'giga-double')])
            get_data_with_default(it, i, TYPES[3], dregk, sregk, [(cname, 'tera-single'), (cname, 'tera-double'),
                                                                  (cname, 'peta-single'), (cname, 'peta-double')])
                
            it.loc[i, 'popolazione ' + TYPES[0]] = istat.loc[cname, 'totale']
            it.loc[i, 'popolazione ' + TYPES[1]] = istat.loc[cname, snames['E']]
            it.loc[i, 'popolazione ' + TYPES[2]] = istat.loc[cname, snames['M']]
            it.loc[i, 'popolazione ' + TYPES[3]] = istat.loc[cname, snames['S']]
            break

for t in TYPES:
    it['copertura ' + t] = 1000 * it['studenti ' + t] / it['popolazione ' + t]

fig, ax = plt.subplots(2,2)
fig.set_size_inches(15,11)
for i, t in enumerate(TYPES):
    r = i // 2
    c = i % 2
    ax[r][c].set_aspect("equal")
    ax[r][c].set_axis_off()
    ax[r][c].set_title("Studenti ogni mille ({})".format(t))
    it.plot(ax=ax[r][c], column='copertura ' + t, cmap='YlOrRd', scheme='quantiles', legend=True)
    
fig.savefig('italia.png')    
plt.show()    

Il Bebras nel mondo (dati 2020)

In [33]:
w = gpd.read_file("world.json")
w = w.set_index("name")

with open("wbebras.json", "r") as t:
    wbebras = pd.DataFrame(pd.read_json(t, convert_axes=True, orient='index'))

wbebras['copertura'] = 1000 * wbebras["bebras"] / wbebras["oecd"]    
    

for i in wbebras.index:
    try:
        w.loc[i, "bebras"] = wbebras.loc[i, "bebras"]
        w.loc[i, "oecd"]   = wbebras.loc[i, "oecd"]
        w.loc[i, "copertura"]   = wbebras.loc[i, "copertura"]
    except:
        print(i)

plt.figure(figsize=(20,20))
ax = plt.subplot(212)
ax.set_aspect("equal")
ax.set_axis_off()
ax.set_title("Partecipanti ogni 1000 studenti (dati OECD 2018)")       
w.dropna().plot(ax=ax,column='copertura', cmap='Blues', scheme='quantiles', legend=True)


ax = plt.subplot(211)
ax.set_aspect("equal")
ax.set_axis_off()
ax.set_title("Partecipanti Bebras 2020")       
p = w.dropna(subset=["bebras"]).plot(ax=ax,column='bebras', cmap='YlOrRd', scheme='quantiles', legend=True)
plt.show()

Numeri assoluti

In [34]:
display(wbebras.sort_values("bebras",ascending=False)[["bebras","oecd","copertura"]])
bebras oecd copertura
France 523598 10412016.0 50.287860
Germany 381580 9935909.0 38.404136
United Kingdom 240803 11066193.0 21.760239
Belarus 166038 NaN NaN
Taiwan 159039 NaN NaN
India 103114 NaN NaN
South Korea 63897 5613337.0 11.383069
Czech Republic 61788 1370636.0 45.079802
Slovakia 55064 671408.0 82.012725
Australia 54920 4627837.0 11.867315
Ukraine 49317 NaN NaN
United States of America 47470 49829312.0 0.952652
Turkey 40431 16384160.0 2.467688
China 39869 NaN NaN
Croatia 36491 NaN NaN
Lithuania 32107 349619.0 91.834254
Switzerland 30994 1123542.0 27.585974
Hungary 29341 1182406.0 24.814658
Republic of Serbia 28187 NaN NaN
Uzbekistan 26335 NaN NaN
Vietnam 24787 NaN NaN
Austria 19741 1026408.0 19.233092
North Macedonia 19263 NaN NaN
Slovenia 18955 276126.0 68.646198
Netherlands 18245 2807504.0 6.498655
Portugal 17496 1388754.0 12.598344
Indonesia 16186 NaN NaN
Algeria 16184 NaN NaN
Canada 15462 5061204.0 3.055004
Latvia 14896 239000.0 62.326360
Thailand 14776 NaN NaN
Italy 14519 7501201.0 1.935557
South Africa 12359 NaN NaN
Poland 10074 4669836.0 2.157249
Ireland 9533 1056064.0 9.026915
Russia 8756 16217168.0 0.539922
Romania 7194 NaN NaN
Saudi Arabia 6527 NaN NaN
Iran 5371 NaN NaN
Bosnia and Herzegovina 5167 NaN NaN
Japan 4554 13500120.0 0.337330
Finland 4307 915321.0 4.705453
Uruguay 3593 NaN NaN
Belgium 3369 2000570.0 1.684020
Estonia 3354 172124.0 19.485952
Pakistan 3336 NaN NaN
New Zealand 2614 878178.0 2.976617
Iceland 2504 67592.0 37.045804
Egypt 2300 NaN NaN
Spain 2160 6414465.0 0.336739
Cyprus 1085 NaN NaN
Bulgaria 244 NaN NaN
Philippines 213 NaN NaN
Syria 163 NaN NaN
Niger 119 NaN NaN
Sweden 0 1828036.0 0.000000
Kazakhstan 0 NaN NaN
Singapore 0 NaN NaN
In [35]:
print("In totale nel mondo {} partecipanti".format(wbebras['bebras'].sum()))
In totale nel mondo 2479789 partecipanti