import csv import unicodedata file = 'C:\\Users\\Roland\\Documents\\Statistici\\note_an1.csv' note = {} with open(file) as csvfile: reader = csv.DictReader(csvfile) for row in reader: row['Nume'] = u''.join(c for c in unicodedata.normalize('NFD', row['Nume'].replace('-',' ')) if unicodedata.category(c) != 'Mn') note[row['Nume'].strip()] = row del note[row['Nume'].strip()]['Nume'] print(list(note.values())[10:20]) def capitalize_remove_father_name(nume): nume = u''.join(c for c in unicodedata.normalize('NFD', nume.replace('-',' ')) if unicodedata.category(c) != 'Mn').split() if len(nume) > 2 and "." in nume[1]: nume = [nume[0]] + nume[2:] return " ".join([x.capitalize() for x in nume]) for i in range(1,7): file = 'C:\\Users\\Roland\\Documents\\Statistici\\Rezultate 22'+str(i)+'.htm' with open(file) as fisier: in_pre = False for line in fisier: if '' in line: break if in_pre: reg = re.match("^(([^\W\d_]|\.| |-)+)([0-9]+)?",line) if reg: gr = reg.groups() nume = get_nume(capitalize_remove_father_name(gr[0])) try: nota = int(gr[2]) except TypeError: nota = 0 if not nume: nume = capitalize_remove_father_name(gr[0]) note[nume] = {} print(nume) if 'BD' in note[nume]: print("erreur BD",nume) note[nume]['BD'] = nota note[nume]['Grupa 2'] = '22'+str(i) if '

' in line:
                in_pre = True

import difflib,re

def get_nume(nume):
    if nume == '':
        return None
    if nume in note:
        return nume
    else:
        nume_l = re.split('[ -]', nume)
        loop = [" ".join(nume_l)]
        for n in sorted(note.keys()):
            split_n = list(filter(bool,re.split('[ -]',n)))
            if set(nume_l) <= set(split_n) or  set(split_n) < set(nume_l):
                loop = [" ".join(split_n)," ".join(nume_l)]
                break
        if len(loop) > 1 and loop[0].split()[0] == loop[1].split()[0]:
            nume_l = loop[0]
        else:
            closest = difflib.get_close_matches(nume,note.keys(),n=1,cutoff = 0.85)
            if len(closest):
                nume_l = closest[0]
            else:
                return None
        return nume_l

file = 'C:\\Users\\Roland\\Documents\\Statistici\\ps.csv'

with open(file) as fisier:
    reader = csv.DictReader(fisier)
    for row in reader:
        if row['Nume'] == '':
            continue
        nume = capitalize_remove_father_name(row['Nume'])
        nume = get_nume(nume)
        if not nume:
            print('PS',row['Nume'])
            continue
        if 'PS' in note[nume]:
            print("erreur PS",nume,row['Nume'])
        try:
            nota = int(row['Nota'])
        except:
            nota = 0
        note[nume]['PS'] = row['Nota']

file = 'C:\\Users\\Roland\\Documents\\Statistici\\plf.csv'
with open(file) as fisier:
    reader = csv.DictReader(fisier)
    for row in reader:
        if row['Nume prenume'] == '':
            continue
        nume = get_nume(row['Nume prenume'])
        if not nume:
            print('PLF',row['Nume prenume'])
            continue
        if 'PLF' in note[nume]:
            print("erreur PLF",nume,row['Nume prenume'])
        try:
            nota = int(row['Nota'])
        except:
            nota = 0
        note[nume]['PLF'] = nota

print(list(note.values())[30:40])

for key in note.keys():
    if len(note[key]) < 7:
        print(key,note[key])


file = 'C:\\Users\\Roland\\Documents\\Statistici\\grand.csv'
with open(file,"w") as fisier:
    header = ['Grupa', 'Grupa 2', 'Nota algebra', 'Nota AC', 'Nota FP', 'SdA', 'Geometrie', 'POO', 'BD', 'PLF', 'PS']
    writer = csv.DictWriter(fisier, fieldnames=header)
    writer.writeheader()
    for row in note:
        for materie in header[2:]:
            if materie not in note[row]:
                note[row][materie] = 0
            try:
                int(note[row][materie])
            except ValueError:
                note[row][materie] = 0
        writer.writerow(note[row])

Grupa,Grupa 2,Nota algebra,Nota AC,Nota FP,SdA,Geometrie,POO,BD,PLF,PS

Gr. 217,224,6,5,8,7,9,8,8,7,5

Gr. 213,223,2,4,6,0,6,8,6,4,4

Gr. 211,221,9,10,10,10,9,10,10,10,10

Gr. 216,226,8,7,6,6,9,7,9,8,7