Statistiche sulla partecipazione al Bebras italiano 2021/22¶

from IPython.display import HTML, Markdown

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<input type="button" value="Clicca per vedere/nascondere il codice Python" onclick="code_toggle()">''')

import warnings
warnings.filterwarnings('once')
#warnings.filterwarnings('ignore')

%matplotlib inline
import pandas as pd
import urllib.request
from IPython.display import display, Markdown
import matplotlib.pyplot as plt

pd.options.display.max_rows = None
plt.style.use('ggplot')

miur = pd.read_csv('bebras_school_list.zip', low_memory=False)

def norm_region(r):
    """Normalize the name of a region. It also corrects wrong names."""
    r = r.strip().upper()
    if r == 'FVG' or r.startswith('FRIULI'):
        return 'FRIULI-VENEZIA GIULIA'
    if r.startswith('EMILIA'):
        return 'EMILIA-ROMAGNA'
    if r.startswith('TRENTINO') or r.startswith('ALTO ADIGE'):
        return 'TRENTINO-ALTO ADIGE'
    if r.startswith('LOMB'):
        return 'LOMBARDIA'
    if r.startswith('VALLE') or 'AOSTA' in r:
        return "VALLE D'AOSTA"
    if r == 'G6GG6Y' or r == 'ITALIA':
        return None
    if r == 'ALBANIA' or r == 'BAVIERA' or r == 'SIERRA' or r == 'DDDD' or r == 'FRANCE' or r == 'SVIZZERA':
        return 'ESTERO'
    else:
        return r

def infer_school_type(k):
    knorm = k['school_kind'].strip().upper()
    cnorm = k['school_code'].strip().upper()
    if cnorm and miur[miur['i_code'] == cnorm]['i_type'].count() > 0:
        knorm = str(miur[miur['i_code'] == cnorm]['i_type'].iloc[0])
    if 'PRIMARIA' in knorm or 'INFANZIA' in knorm or 'ELEMENTARE' in knorm:
        return 'E'
    if 'PRIMO GRADO' in knorm or ('MEDIA' in knorm and (not 'SUP' in knorm))\
    or '1°' in knorm or ' I GRADO' in knorm or knorm == 'IC':
        return 'M'
    if 'COMPRENSIVO' in knorm:
        return 'EM'
    if 'SECONDO GRADO' in knorm or '2°' in knorm  or 'II GRADO' in knorm \
    or 'LICEO' in knorm or 'ITI' in knorm or 'PROF' in knorm or 'IST TEC' in knorm \
    or 'TECNICO' in knorm or 'MAGISTRALE' in knorm or 'SUPERIORE' in knorm:
        return 'S'
    if knorm == 'STATALE' or 'C.D.38':
        return 'EMS'
    else:
        return knorm

Insegnanti, stima delle squadre e alunni¶

from datetime import date

YEAR=2021

with open('secret.key') as k:
    key = k.readline().strip()

r = urllib.request.urlopen(("https://bebras.it/api?key={}&view=teachers_edition"+
                           "&edition=bebras_{}&subscription=1").format(key, YEAR))

with open("teachers.json", "w") as tw:
     tw.writelines(r.read().decode('utf-8'))        

teachers = pd.DataFrame(pd.read_json("teachers.json", convert_axes=True))

teachers.index = range(len(teachers))
teachers['confirm_time'] = pd.to_datetime(teachers['confirm_time'], unit='s')
teachers['enter_time'] = pd.to_datetime(teachers['enter_time'], unit='s')

teachers['school_code'] = teachers['school_code'].str.strip().str.upper()
teachers['school_type'] = teachers[['school_kind','school_code']].apply(infer_school_type, axis=1)
filled = len(teachers)
regteams = teachers['teams_active'].sum()

today = date.today()
if today > date(YEAR,11,8):
    today = date(YEAR,11,8)
s = """*{}:* **{:d}** insegnanti hanno confermato la partecipazione; 
ci sono **{:d}** squadre già registrate (>*{:d}* alunni).
"""
display(Markdown(s.format(str(today)[:19], 
                          filled, regteams, regteams)))

if today <= date(YEAR,11,8):
    isotoday = today.isoformat()[:10]
    with open("stats-" + isotoday + ".txt", "w") as stat:
        stat.write(f"{filled:d} {regteams:d} {regteams:d}\n")

oldeditions = (2015, 2016, 2017, "bebras_2018", "bebras_2019", "bebras_2020")

oldteachers = {}
for y in oldeditions:
    r = urllib.request.urlopen(("https://bebras.it/api?key={}&view=teachers_edition"+
                                "&edition={}").format(key, y))
    with open("teachers{}.json".format(y), "w") as tw:
        tw.writelines(r.read().decode('utf-8'))

    oldteachers[y] = pd.DataFrame(pd.read_json("teachers{}.json".format(y), convert_axes=True))[3:]
    #oldtteachers[y]['school_type'] = oldteachers[['school_kind','school_code']].apply(infer_school_type, axis=1)

intersect = {}
for y in oldeditions:
    intersect[y] = pd.merge(teachers, oldteachers[y], on='id', how='inner')
    intersect[y]['deltateams'] = intersect[y]['teams_active_x'] - intersect[y]['teams_active_y']
    returning = intersect[y]['id'].count()
    base = len(oldteachers[y][oldteachers[y]['teams_active'] > 0])
    s = """*{:d}* insegnanti hanno già partecipato all'edizione {} (**{:.0f}%** dei partecipanti di quell'edizione), 
il numero di squadre è aumentato in media di {:.1f} (deviazione standard {:.0f}).
"""
    display(Markdown(s.format(returning, str(y)[-4:], 
                              100*float(returning)/float(base),
                              intersect[y]['deltateams'].mean(), intersect[y]['deltateams'].std() 
                             )))

trintersec = pd.merge(intersect["bebras_2019"], oldteachers["bebras_2018"], on='id', how='inner') 
print("Hanno partecipato nel 2018, 2019 e 2020: {}".format(len(trintersec)))
print("Hanno partecipato nel 2017, 2018 e 2020: {}".format(
      len(pd.merge(intersect["bebras_2018"], oldteachers[2017], on='id', how='inner'))))

Hanno partecipato nel 2018, 2019 e 2020: 243
Hanno partecipato nel 2017, 2018 e 2020: 196

all_intersect = pd.merge(teachers['id'], oldteachers[oldeditions[0]]['id'], on='id', how='inner')

for e in oldeditions[1:]:
    all_intersect = pd.merge(all_intersect['id'], oldteachers[e]['id'], on='id', how='inner')

    
print(f"Hanno partecipato a tutte le {len(oldeditions) + 1} edizioni: {len(all_intersect)} insegnanti.")
#display(pd.merge(teachers, all_intersect, on='id', how='inner')[['firstname', 'name', 'school_name', 'school_city', 'school_kind']])

Hanno partecipato a tutte le 7 edizioni: 33 insegnanti.

institutes = teachers[(teachers['school_code'].str.strip() != "") 
                      & (teachers['subscription'] > 0) 
                      & (teachers['confirm_time'].dt.date > date(2021,9,1))].groupby('school_code')['id'].count()

print("Totale istituti con codice meccanografico: {}; numero medio insegnanti per codice: {:.2f}".format(len(institutes), institutes.mean()))

Totale istituti con codice meccanografico: 410; numero medio insegnanti per codice: 1.70

import os
data = []
for path, dirs, files in os.walk("."):
    for f in files:
        if path == '.' and f.startswith("stats-"):
            d = [int(x) for x in f.split('.')[0].split('-')[1:4]]
            with open(f,"r") as df:
                nn = [int(x) for x in df.readline().strip().split(" ")]
                dd = date(YEAR, 11, 8) - date.fromtimestamp(os.stat(f).st_mtime)
                data.append((dd, nn))
data = pd.DataFrame.from_dict(dict(data), orient="index", 
                               columns=["insegnanti","squadre","alunni"]).sort_index(ascending=False)
data['giorni'] = (data.index * -1).astype('timedelta64[D]')

olddata = []
for path, dirs, files in os.walk("old"):
    for f in files:
        if f.startswith("stats-"):
            d = [int(x) for x in f.split('.')[0].split('-')[1:4]]
            with open(path + "/" + f,"r") as df:
                nn = [int(x) for x in df.readline().strip().split(" ")]
                olddata.append((date(YEAR-1,11,11) - date(*d), nn))
olddata = pd.DataFrame.from_dict(dict(olddata), orient="index", 
                                  columns=["insegnanti","squadre","alunni"]).sort_index(ascending=False)
olddata['giorni'] = (olddata.index * -1).astype('timedelta64[D]')

fig, ax = plt.subplots(1,2)
fig.set_size_inches(11,5)

for i, t in enumerate(['squadre', 'insegnanti']):
    ax[i].plot([-d.days for d in data.index], list(data[t]), label=t + ' ' + str(YEAR))
    ax[i].plot([-d.days for d in olddata.index], list(olddata[t]), '--', label=t + ' ' + str(YEAR-1) )
    ax[i].legend()
    ax[i].set_xlim([-50,0])
    delta = (data[t].max()-olddata[t].max())/olddata[t].max()
    ax[i].text(-.9*data[t].count(), .75*data[t].max(), '{:+.1f}%'.format(delta*100), color='red' if delta < 0 else 'green')

plt.show()

r = urllib.request.urlopen(("https://bebras.it/api?key={}&view=teams"+
                           "&edition=bebras_{}").format(key, YEAR))
with open("teams.json", "w") as tw:
     tw.writelines(r.read().decode('utf-8'))

import json

CATS = ('kilo','mega','giga','tera','peta')
SUBS = ('single', 'double')
CATEGORIES = tuple(f'{c}-{s}' for c in CATS for s in SUBS)

with open("teams.json") as t:
    teams = pd.DataFrame(json.load(t)['teams'])

oldteams = pd.DataFrame({'class': CATS,f'teams_{YEAR-1}':[3120,6573,3321,3325,2363]})
oldteams.index = oldteams['class']
del oldteams['class']


teams['macro'] = teams['class'].str.slice(0,4)
    
tdata = teams.groupby('macro').count()['login'].copy()

for i in oldteams.index:
    oldteams.loc[i]['teams_' + str(YEAR)] = 1

tdata = pd.concat([tdata, oldteams],axis=1)


tdata['Incremento %'] = 100*(tdata['login']-tdata[f'teams_{YEAR-1}'])/tdata[f'teams_{YEAR-1}']
display(tdata)
print("In totale {} squadre iscritte ({:+.2f}% rispetto alle squadre partecipanti nel {})".format(
    tdata['login'].sum(), 
    100*(tdata['login'].sum()-tdata[f'teams_{YEAR-1}'].sum())/tdata[f'teams_{YEAR-1}'].sum(), YEAR-1))
print("In totale {} squadre iscritte ({:+.2f}% rispetto alle squadre iscritte nel {})".format(
    tdata['login'].sum(), 
    100*(tdata['login'].sum()-olddata['squadre'].max())/olddata['squadre'].max(), YEAR-1))
print(f"Nell'edizione {YEAR-1} le squadre avevano 1 componente, nel {YEAR} 1 o 2.")

students = teams.groupby('class').count().apply(lambda x: 2*x if x.name.endswith('double') else x, axis=1)['login']

print('\nIl numero di partecipanti previsto per categoria:')
display(students)
print(f"Il numero totale di partecipanti previsto è {students.sum()}.")

In totale 34843 squadre iscritte (+86.31% rispetto alle squadre partecipanti nel 2020)
In totale 34843 squadre iscritte (+28.19% rispetto alle squadre iscritte nel 2020)
Nell'edizione 2020 le squadre avevano 1 componente, nel 2021 1 o 2.

Il numero di partecipanti previsto per categoria:

class
giga-double     4856
giga-single     4220
kilo-double     4522
kilo-single     2004
mega-double    10186
mega-single     9367
peta-double     3546
peta-single     2044
tera-double     4012
tera-single     3647
Name: login, dtype: int64

Il numero totale di partecipanti previsto è 48404.

La popolazione studentesca nazionale¶

Dati ISTAT della popolazione studentesca scuola primaria e secondaria nel 2014 (fonte: http://dati.istat.it)

istat = pd.DataFrame.from_dict(
    dict([
    ("PIEMONTE",              (191399, 117997, 168439)),
    ("VALLE D'AOSTA",         (  5981,   3691,   5309)),
    ("LIGURIA",               ( 61566,  39213,  60184)),
    ("LOMBARDIA",             (468662, 283007, 381619)),
    ("TRENTINO-ALTO ADIGE",   ( 27028,  16890,  21836)),
    ("VENETO",                (232694, 142401, 204262)),
    ("FRIULI-VENEZIA GIULIA", ( 51830,  32143,  46949)),
    ("EMILIA-ROMAGNA",        (198417, 118460, 176968)),
    ("TOSCANA",               (161001,  98203, 152886)),
    ("UMBRIA",                ( 39181,  23488,  36946)),
    ("MARCHE",                ( 67996,  42095,  70602)),
    ("LAZIO",                 (268133, 161573, 249145)),
    ("ABRUZZO",               ( 57146,  35828,  58578)),
    ("MOLISE",                ( 12595,   8354,  14990)),
    ("CAMPANIA",              (317346, 204223, 326644)),
    ("PUGLIA",                (198662, 130675, 213545)),
    ("BASILICATA",            (25237,  17097,   30214)),
    ("CALABRIA",              (93277,  59624,  101208)),
    ("SICILIA",               (254023, 164520, 252730)),
    ("SARDEGNA",              (67379,  44105,   74003)),
    ("ESTERO",       (float('nan'), float('nan'), float('nan')))
    ]),
    orient = "index",
    columns = ('Primaria','Secondaria I grado','Secondaria II grado'))
istat['totale'] = istat['Primaria'] + istat['Secondaria I grado'] + istat['Secondaria II grado']
with pd.option_context('display.float_format', '{:.0f}'.format):
    display(istat)

Analisi delle gare¶

CATS = ('kilo','mega','giga','tera','peta')
SUBS = ('single', 'double')
CATEGORIES = tuple(f'{c}-{s}' for c in CATS for s in SUBS)
CAT_FILES = tuple(f'{c}-{s}' for s in SUBS for c in CATS)
snames = {'E': 'Primaria', 'M': 'Secondaria I grado', 'S': 'Secondaria II grado'}

for i, k in enumerate(CAT_FILES):
    if not os.path.exists(f"overview-{k}.json"):
        r = urllib.request.urlopen(f"https://bebras.it/api?key={key}&view=exams&test={98+i}&examdata=0&edition=bebras_{YEAR}&events=0")
        with open(f"overview-{k}.json", "w") as tw:
            tw.writelines(r.read().decode('utf-8'))

import json

overview = []
for k in CATEGORIES:
    with open(f"overview-{k}.json", "r") as t:
        j = json.load(t)
        overview += j['exams']

dfov = pd.DataFrame(overview)
gare = pd.DataFrame()
gare['categoria'] = dfov['category'].str.lower().astype(pd.api.types.CategoricalDtype(categories = CATEGORIES, ordered=True))
gare['insegnante'] = dfov['teacher_id'].astype('int64')
gare['login'] = dfov['login']
gare['status'] = dfov['exam_valid_score']
gare['risultato'] = dfov['score']
gare['data'] = pd.to_datetime(dfov['time'])
gare['studenti'] = dfov['team_composition'].map(lambda tt: 0 if type(tt) != type({}) else len([s for s in tt['members'] if s['name'] != '' ]))

fid = teachers.set_index('id')
fid['regione'] = fid['school_region'].map(norm_region)
gare = gare.join(fid[['regione']],on='insegnante')

done = gare[gare['status'] == 1]

Insegnanti partecipanti¶

len(done.groupby(['insegnante']))

751

Insegnanti per regione che hanno partecipato¶

display(done.groupby(['regione'])['insegnante'].nunique())

regione
ABRUZZO                   12
BASILICATA                18
BUCAREST (ROMANIA)         1
CALABRIA                   4
CAMPANIA                  78
CROAZIA                    1
EMILIA-ROMAGNA            48
FRIULI-VENEZIA GIULIA     22
ILFOV                      1
LAZIO                     35
LIGURIA                   19
LOMBARDIA                235
MARCHE                     8
MOLISE                     3
PIEMONTE                  38
PUGLIA                    41
SARDEGNA                   6
SICILIA                   14
TOSCANA                   22
TRENTINO-ALTO ADIGE       11
UMBRIA                     7
VALLE D'AOSTA             38
VENETO                    89
Name: insegnante, dtype: int64

Insegnanti per categoria¶

display(done.groupby(['categoria'])['insegnante'].nunique())

categoria
kilo-single    119
kilo-double    115
mega-single    234
mega-double    232
giga-single    172
giga-double    155
tera-single     76
tera-double     80
peta-single     60
peta-double     77
Name: insegnante, dtype: int64

Squadre per categoria¶

with pd.option_context('display.float_format', '{:.0f}'.format):
    display(done.groupby(['regione', 'categoria'])['login'].count())

regione                categoria  
ABRUZZO                kilo-single       8
                       kilo-double      69
                       mega-single       2
                       mega-double      26
                       giga-single       0
                       giga-double      12
                       tera-single      67
                       tera-double       0
                       peta-single      57
                       peta-double      49
BASILICATA             kilo-single      10
                       kilo-double      31
                       mega-single     416
                       mega-double      23
                       giga-single     178
                       giga-double       8
                       tera-single      16
                       tera-double       0
                       peta-single       0
                       peta-double       0
BUCAREST (ROMANIA)     kilo-single       3
                       kilo-double       4
                       mega-single       0
                       mega-double      13
                       giga-single       0
                       giga-double       0
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
CALABRIA               kilo-single      57
                       kilo-double      16
                       mega-single       0
                       mega-double       0
                       giga-single       0
                       giga-double       0
                       tera-single      20
                       tera-double       0
                       peta-single       0
                       peta-double       0
CAMPANIA               kilo-single     297
                       kilo-double     130
                       mega-single     629
                       mega-double     324
                       giga-single     273
                       giga-double     143
                       tera-single     155
                       tera-double     102
                       peta-single     208
                       peta-double     147
CROAZIA                kilo-single       0
                       kilo-double       0
                       mega-single       0
                       mega-double       2
                       giga-single       0
                       giga-double       0
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
EMILIA-ROMAGNA         kilo-single     140
                       kilo-double      23
                       mega-single     545
                       mega-double     299
                       giga-single     208
                       giga-double     146
                       tera-single     433
                       tera-double     101
                       peta-single     103
                       peta-double     162
FRIULI-VENEZIA GIULIA  kilo-single       3
                       kilo-double      36
                       mega-single      78
                       mega-double      10
                       giga-single      38
                       giga-double       8
                       tera-single     543
                       tera-double     109
                       peta-single     192
                       peta-double     105
ILFOV                  kilo-single       0
                       kilo-double       0
                       mega-single       0
                       mega-double       0
                       giga-single       2
                       giga-double       0
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
LAZIO                  kilo-single     207
                       kilo-double      97
                       mega-single     192
                       mega-double     248
                       giga-single     107
                       giga-double      75
                       tera-single      94
                       tera-double     137
                       peta-single      80
                       peta-double     227
LIGURIA                kilo-single      20
                       kilo-double      99
                       mega-single      12
                       mega-double     121
                       giga-single      20
                       giga-double      68
                       tera-single       0
                       tera-double      20
                       peta-single       0
                       peta-double      71
LOMBARDIA              kilo-single     309
                       kilo-double     820
                       mega-single    1633
                       mega-double    1176
                       giga-single     708
                       giga-double     598
                       tera-single     862
                       tera-double     603
                       peta-single     424
                       peta-double     162
MARCHE                 kilo-single      71
                       kilo-double       1
                       mega-single      29
                       mega-double       7
                       giga-single       9
                       giga-double      38
                       tera-single      51
                       tera-double      22
                       peta-single     134
                       peta-double      19
MOLISE                 kilo-single       0
                       kilo-double       0
                       mega-single       0
                       mega-double       8
                       giga-single       0
                       giga-double       4
                       tera-single      23
                       tera-double       7
                       peta-single      41
                       peta-double       9
PIEMONTE               kilo-single      74
                       kilo-double      42
                       mega-single     208
                       mega-double     331
                       giga-single     116
                       giga-double     170
                       tera-single      16
                       tera-double     162
                       peta-single      81
                       peta-double     155
PUGLIA                 kilo-single     204
                       kilo-double     162
                       mega-single     765
                       mega-double     217
                       giga-single     279
                       giga-double      72
                       tera-single      35
                       tera-double      74
                       peta-single      57
                       peta-double      28
SARDEGNA               kilo-single       0
                       kilo-double       0
                       mega-single     239
                       mega-double      29
                       giga-single     106
                       giga-double       4
                       tera-single       0
                       tera-double       8
                       peta-single       0
                       peta-double       9
SICILIA                kilo-single       0
                       kilo-double       0
                       mega-single      46
                       mega-double      98
                       giga-single      16
                       giga-double      49
                       tera-single      57
                       tera-double      24
                       peta-single       0
                       peta-double      48
TOSCANA                kilo-single      26
                       kilo-double      47
                       mega-single     158
                       mega-double     244
                       giga-single     113
                       giga-double     107
                       tera-single       0
                       tera-double       1
                       peta-single       0
                       peta-double      11
TRENTINO-ALTO ADIGE    kilo-single      72
                       kilo-double       8
                       mega-single     330
                       mega-double     148
                       giga-single     147
                       giga-double      71
                       tera-single       3
                       tera-double      59
                       peta-single       4
                       peta-double      22
UMBRIA                 kilo-single       5
                       kilo-double       8
                       mega-single     179
                       mega-double       0
                       giga-single      81
                       giga-double       0
                       tera-single     184
                       tera-double       0
                       peta-single       0
                       peta-double       0
VALLE D'AOSTA          kilo-single      54
                       kilo-double      10
                       mega-single     596
                       mega-double     306
                       giga-single     279
                       giga-double     152
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
VENETO                 kilo-single      78
                       kilo-double     225
                       mega-single    1237
                       mega-double     651
                       giga-single     623
                       giga-double     316
                       tera-single     478
                       tera-double     174
                       peta-single     236
                       peta-double     187
Name: login, dtype: int64

Studenti per categoria¶

with pd.option_context('display.float_format', '{:.0f}'.format):
    display(done.groupby(['regione', 'categoria'])['studenti'].sum())

regione                categoria  
ABRUZZO                kilo-single       7
                       kilo-double     134
                       mega-single       0
                       mega-double      30
                       giga-single       0
                       giga-double      12
                       tera-single      67
                       tera-double       0
                       peta-single      57
                       peta-double      95
BASILICATA             kilo-single       1
                       kilo-double      61
                       mega-single     210
                       mega-double       0
                       giga-single     100
                       giga-double       0
                       tera-single       5
                       tera-double       0
                       peta-single       0
                       peta-double       0
BUCAREST (ROMANIA)     kilo-single       3
                       kilo-double       8
                       mega-single       0
                       mega-double      13
                       giga-single       0
                       giga-double       0
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
CALABRIA               kilo-single      57
                       kilo-double      20
                       mega-single       0
                       mega-double       0
                       giga-single       0
                       giga-double       0
                       tera-single      19
                       tera-double       0
                       peta-single       0
                       peta-double       0
CAMPANIA               kilo-single     100
                       kilo-double     231
                       mega-single     508
                       mega-double     542
                       giga-single     204
                       giga-double     269
                       tera-single      22
                       tera-double     166
                       peta-single       2
                       peta-double     262
CROAZIA                kilo-single       0
                       kilo-double       0
                       mega-single       0
                       mega-double       4
                       giga-single       0
                       giga-double       0
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
EMILIA-ROMAGNA         kilo-single      49
                       kilo-double      44
                       mega-single     230
                       mega-double     545
                       giga-single      73
                       giga-double     256
                       tera-single     322
                       tera-double     100
                       peta-single      31
                       peta-double     289
FRIULI-VENEZIA GIULIA  kilo-single       3
                       kilo-double      72
                       mega-single      77
                       mega-double      20
                       giga-single      38
                       giga-double      16
                       tera-single     384
                       tera-double     198
                       peta-single      97
                       peta-double     186
ILFOV                  kilo-single       0
                       kilo-double       0
                       mega-single       0
                       mega-double       0
                       giga-single       2
                       giga-double       0
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
LAZIO                  kilo-single     177
                       kilo-double     192
                       mega-single      12
                       mega-double     230
                       giga-single       3
                       giga-double      76
                       tera-single      17
                       tera-double     261
                       peta-single      50
                       peta-double     332
LIGURIA                kilo-single      20
                       kilo-double     182
                       mega-single      12
                       mega-double     141
                       giga-single      18
                       giga-double      51
                       tera-single       0
                       tera-double      18
                       peta-single       0
                       peta-double       0
LOMBARDIA              kilo-single     218
                       kilo-double    1302
                       mega-single     868
                       mega-double    1737
                       giga-single     413
                       giga-double     878
                       tera-single     540
                       tera-double     708
                       peta-single     359
                       peta-double     314
MARCHE                 kilo-single      51
                       kilo-double       0
                       mega-single       1
                       mega-double       0
                       giga-single       0
                       giga-double       0
                       tera-single      51
                       tera-double      43
                       peta-single     133
                       peta-double      36
MOLISE                 kilo-single       0
                       kilo-double       0
                       mega-single       0
                       mega-double      12
                       giga-single       0
                       giga-double       4
                       tera-single      21
                       tera-double      14
                       peta-single      20
                       peta-double      18
PIEMONTE               kilo-single      68
                       kilo-double      66
                       mega-single     205
                       mega-double     272
                       giga-single     116
                       giga-double     138
                       tera-single      16
                       tera-double     319
                       peta-single      79
                       peta-double     302
PUGLIA                 kilo-single     167
                       kilo-double     271
                       mega-single     675
                       mega-double     274
                       giga-single     229
                       giga-double     141
                       tera-single      33
                       tera-double     148
                       peta-single      57
                       peta-double      56
SARDEGNA               kilo-single       0
                       kilo-double       0
                       mega-single       8
                       mega-double      58
                       giga-single       0
                       giga-double       8
                       tera-single       0
                       tera-double      14
                       peta-single       0
                       peta-double      18
SICILIA                kilo-single       0
                       kilo-double       0
                       mega-single      44
                       mega-double     184
                       giga-single      15
                       giga-double      87
                       tera-single      57
                       tera-double      42
                       peta-single       0
                       peta-double      90
TOSCANA                kilo-single      10
                       kilo-double      82
                       mega-single     158
                       mega-double     272
                       giga-single     110
                       giga-double      24
                       tera-single       0
                       tera-double       2
                       peta-single       0
                       peta-double      21
TRENTINO-ALTO ADIGE    kilo-single       0
                       kilo-double       0
                       mega-single     237
                       mega-double      88
                       giga-single     131
                       giga-double      44
                       tera-single       3
                       tera-double     116
                       peta-single       4
                       peta-double      44
UMBRIA                 kilo-single       5
                       kilo-double       3
                       mega-single     136
                       mega-double       0
                       giga-single      60
                       giga-double       0
                       tera-single     184
                       tera-double       0
                       peta-single       0
                       peta-double       0
VALLE D'AOSTA          kilo-single      43
                       kilo-double       0
                       mega-single     254
                       mega-double     464
                       giga-single     116
                       giga-double     304
                       tera-single       0
                       tera-double       0
                       peta-single       0
                       peta-double       0
VENETO                 kilo-single      79
                       kilo-double     448
                       mega-single     740
                       mega-double     898
                       giga-single     358
                       giga-double     447
                       tera-single     478
                       tera-double     213
                       peta-single     223
                       peta-double     323
Name: studenti, dtype: int64

Cartografia ISTAT 2011 (fonte: http://www.istat.it/it/archivio/24613), convertita con il comando:

ogr2ogr -f GeoJSON -s_srs reg2011_g.prj -t_srs EPSG:4326 it.json reg2011_g.shp

(fonte: https://gist.github.com/riccardoscalco/6029355)

import geopandas as gpd
%matplotlib inline

it = gpd.read_file("it.json")

TYPES = ['totale'] + list(snames.values())

dreg = done.groupby(['regione']).count()
dregk = done.groupby(['regione','categoria']).count()

sreg = done.groupby(['regione']).sum()
sregk = done.groupby(['regione','categoria']).sum()


def get_data_with_default(geo, i, t, ddata, sdata, jj, labeld='login', labels='studenti'):
    try:
        geo.loc[i, 'squadre' + ' ' + t] = 0
        for j in jj:
            geo.loc[i, 'squadre' + ' ' + t] += ddata.loc[j, labeld] if ddata.loc[j, labeld] > 0 else 0 
    except:
        geo.loc[i, 'squadre' + ' ' + t] += 0
    try:
        geo.loc[i, 'studenti' + ' ' + t] = 0
        for j in jj:
            geo.loc[i, 'studenti' + ' ' + t] += sdata.loc[j, labels] if sdata.loc[j, labels] > 0 else 0
    except:
        geo.loc[i, 'studenti' + ' ' + t] += 0

        
for i, r in it.iterrows():
    for cname in istat.index:
        if r['NOME_REG'][0:5] == cname[0:5]:
            it.loc[i, 'NOME_REG'] = cname
            get_data_with_default(it, i, TYPES[0], dreg, sreg, [cname])
            get_data_with_default(it, i, TYPES[1], dregk, sregk, [(cname, 'kilo-single'), (cname, 'kilo-double')])
            get_data_with_default(it, i, TYPES[2], dregk, sregk, [(cname, 'mega-single'), (cname, 'mega-double'),
                                                                  (cname, 'giga-single'), (cname, 'giga-double')])
            get_data_with_default(it, i, TYPES[3], dregk, sregk, [(cname, 'tera-single'), (cname, 'tera-double'),
                                                                  (cname, 'peta-single'), (cname, 'peta-double')])
                
            it.loc[i, 'popolazione ' + TYPES[0]] = istat.loc[cname, 'totale']
            it.loc[i, 'popolazione ' + TYPES[1]] = istat.loc[cname, snames['E']]
            it.loc[i, 'popolazione ' + TYPES[2]] = istat.loc[cname, snames['M']]
            it.loc[i, 'popolazione ' + TYPES[3]] = istat.loc[cname, snames['S']]
            break

for t in TYPES:
    it['copertura ' + t] = 1000 * it['studenti ' + t] / it['popolazione ' + t]

fig, ax = plt.subplots(2,2)
fig.set_size_inches(15,11)
for i, t in enumerate(TYPES):
    r = i // 2
    c = i % 2
    ax[r][c].set_aspect("equal")
    ax[r][c].set_axis_off()
    ax[r][c].set_title("Studenti ogni mille ({})".format(t))
    it.plot(ax=ax[r][c], column='copertura ' + t, cmap='YlOrRd', scheme='quantiles', legend=True)
    
fig.savefig('italia.png')    
plt.show()

Il Bebras nel mondo (dati 2020)¶

w = gpd.read_file("world.json")
w = w.set_index("name")

with open("wbebras.json", "r") as t:
    wbebras = pd.DataFrame(pd.read_json(t, convert_axes=True, orient='index'))

wbebras['copertura'] = 1000 * wbebras["bebras"] / wbebras["oecd"]    
    

for i in wbebras.index:
    try:
        w.loc[i, "bebras"] = wbebras.loc[i, "bebras"]
        w.loc[i, "oecd"]   = wbebras.loc[i, "oecd"]
        w.loc[i, "copertura"]   = wbebras.loc[i, "copertura"]
    except:
        print(i)

plt.figure(figsize=(20,20))
ax = plt.subplot(212)
ax.set_aspect("equal")
ax.set_axis_off()
ax.set_title("Partecipanti ogni 1000 studenti (dati OECD 2018)")       
w.dropna().plot(ax=ax,column='copertura', cmap='Blues', scheme='quantiles', legend=True)


ax = plt.subplot(211)
ax.set_aspect("equal")
ax.set_axis_off()
ax.set_title("Partecipanti Bebras 2020")       
p = w.dropna(subset=["bebras"]).plot(ax=ax,column='bebras', cmap='YlOrRd', scheme='quantiles', legend=True)
plt.show()

Numeri assoluti¶

display(wbebras.sort_values("bebras",ascending=False)[["bebras","oecd","copertura"]])

print("In totale nel mondo {} partecipanti".format(wbebras['bebras'].sum()))

In totale nel mondo 2479789 partecipanti

	bebras	oecd	copertura
France	523598	10412016.0	50.287860
Germany	381580	9935909.0	38.404136
United Kingdom	240803	11066193.0	21.760239
Belarus	166038	NaN	NaN
Taiwan	159039	NaN	NaN
India	103114	NaN	NaN
South Korea	63897	5613337.0	11.383069
Czech Republic	61788	1370636.0	45.079802
Slovakia	55064	671408.0	82.012725
Australia	54920	4627837.0	11.867315
Ukraine	49317	NaN	NaN
United States of America	47470	49829312.0	0.952652
Turkey	40431	16384160.0	2.467688
China	39869	NaN	NaN
Croatia	36491	NaN	NaN
Lithuania	32107	349619.0	91.834254
Switzerland	30994	1123542.0	27.585974
Hungary	29341	1182406.0	24.814658
Republic of Serbia	28187	NaN	NaN
Uzbekistan	26335	NaN	NaN
Vietnam	24787	NaN	NaN
Austria	19741	1026408.0	19.233092
North Macedonia	19263	NaN	NaN
Slovenia	18955	276126.0	68.646198
Netherlands	18245	2807504.0	6.498655
Portugal	17496	1388754.0	12.598344
Indonesia	16186	NaN	NaN
Algeria	16184	NaN	NaN
Canada	15462	5061204.0	3.055004
Latvia	14896	239000.0	62.326360
Thailand	14776	NaN	NaN
Italy	14519	7501201.0	1.935557
South Africa	12359	NaN	NaN
Poland	10074	4669836.0	2.157249
Ireland	9533	1056064.0	9.026915
Russia	8756	16217168.0	0.539922
Romania	7194	NaN	NaN
Saudi Arabia	6527	NaN	NaN
Iran	5371	NaN	NaN
Bosnia and Herzegovina	5167	NaN	NaN
Japan	4554	13500120.0	0.337330
Finland	4307	915321.0	4.705453
Uruguay	3593	NaN	NaN
Belgium	3369	2000570.0	1.684020
Estonia	3354	172124.0	19.485952
Pakistan	3336	NaN	NaN
New Zealand	2614	878178.0	2.976617
Iceland	2504	67592.0	37.045804
Egypt	2300	NaN	NaN
Spain	2160	6414465.0	0.336739
Cyprus	1085	NaN	NaN
Bulgaria	244	NaN	NaN
Philippines	213	NaN	NaN
Syria	163	NaN	NaN
Niger	119	NaN	NaN
Sweden	0	1828036.0	0.000000
Kazakhstan	0	NaN	NaN
Singapore	0	NaN	NaN

	login	teams_2020	Incremento %
giga	6648	3321	100.180668
kilo	4265	3120	36.698718
mega	14460	6573	119.990872
peta	3817	2363	61.531951
tera	5653	3325	70.015038

	Primaria	Secondaria I grado	Secondaria II grado	totale
PIEMONTE	191399	117997	168439	477835
VALLE D'AOSTA	5981	3691	5309	14981
LIGURIA	61566	39213	60184	160963
LOMBARDIA	468662	283007	381619	1133288
TRENTINO-ALTO ADIGE	27028	16890	21836	65754
VENETO	232694	142401	204262	579357
FRIULI-VENEZIA GIULIA	51830	32143	46949	130922
EMILIA-ROMAGNA	198417	118460	176968	493845
TOSCANA	161001	98203	152886	412090
UMBRIA	39181	23488	36946	99615
MARCHE	67996	42095	70602	180693
LAZIO	268133	161573	249145	678851
ABRUZZO	57146	35828	58578	151552
MOLISE	12595	8354	14990	35939
CAMPANIA	317346	204223	326644	848213
PUGLIA	198662	130675	213545	542882
BASILICATA	25237	17097	30214	72548
CALABRIA	93277	59624	101208	254109
SICILIA	254023	164520	252730	671273
SARDEGNA	67379	44105	74003	185487
ESTERO	NaN	NaN	NaN	NaN