import csv import unicodedata file = 'C:\\Users\\Roland\\Documents\\Statistici\\note_an1.csv' note = {} with open(file) as csvfile: reader = csv.DictReader(csvfile) for row in reader: row['Nume'] = u''.join(c for c in unicodedata.normalize('NFD', row['Nume'].replace('-',' ')) if unicodedata.category(c) != 'Mn') note[row['Nume'].strip()] = row del note[row['Nume'].strip()]['Nume'] print(list(note.values())[10:20]) def capitalize_remove_father_name(nume): nume = u''.join(c for c in unicodedata.normalize('NFD', nume.replace('-',' ')) if unicodedata.category(c) != 'Mn').split() if len(nume) > 2 and "." in nume[1]: nume = [nume[0]] + nume[2:] return " ".join([x.capitalize() for x in nume]) for i in range(1,7): file = 'C:\\Users\\Roland\\Documents\\Statistici\\Rezultate 22'+str(i)+'.htm' with open(file) as fisier: in_pre = False for line in fisier: if '' in line: break if in_pre: reg = re.match("^(([^\W\d_]|\.| |-)+)([0-9]+)?",line) if reg: gr = reg.groups() nume = get_nume(capitalize_remove_father_name(gr[0])) try: nota = int(gr[2]) except TypeError: nota = 0 if not nume: nume = capitalize_remove_father_name(gr[0]) note[nume] = {} print(nume) if 'BD' in note[nume]: print("erreur BD",nume) note[nume]['BD'] = nota note[nume]['Grupa 2'] = '22'+str(i) if '
' in line: in_pre = True import difflib,re def get_nume(nume): if nume == '': return None if nume in note: return nume else: nume_l = re.split('[ -]', nume) loop = [" ".join(nume_l)] for n in sorted(note.keys()): split_n = list(filter(bool,re.split('[ -]',n))) if set(nume_l) <= set(split_n) or set(split_n) < set(nume_l): loop = [" ".join(split_n)," ".join(nume_l)] break if len(loop) > 1 and loop[0].split()[0] == loop[1].split()[0]: nume_l = loop[0] else: closest = difflib.get_close_matches(nume,note.keys(),n=1,cutoff = 0.85) if len(closest): nume_l = closest[0] else: return None return nume_l file = 'C:\\Users\\Roland\\Documents\\Statistici\\ps.csv' with open(file) as fisier: reader = csv.DictReader(fisier) for row in reader: if row['Nume'] == '': continue nume = capitalize_remove_father_name(row['Nume']) nume = get_nume(nume) if not nume: print('PS',row['Nume']) continue if 'PS' in note[nume]: print("erreur PS",nume,row['Nume']) try: nota = int(row['Nota']) except: nota = 0 note[nume]['PS'] = row['Nota'] file = 'C:\\Users\\Roland\\Documents\\Statistici\\plf.csv' with open(file) as fisier: reader = csv.DictReader(fisier) for row in reader: if row['Nume prenume'] == '': continue nume = get_nume(row['Nume prenume']) if not nume: print('PLF',row['Nume prenume']) continue if 'PLF' in note[nume]: print("erreur PLF",nume,row['Nume prenume']) try: nota = int(row['Nota']) except: nota = 0 note[nume]['PLF'] = nota print(list(note.values())[30:40]) for key in note.keys(): if len(note[key]) < 7: print(key,note[key]) file = 'C:\\Users\\Roland\\Documents\\Statistici\\grand.csv' with open(file,"w") as fisier: header = ['Grupa', 'Grupa 2', 'Nota algebra', 'Nota AC', 'Nota FP', 'SdA', 'Geometrie', 'POO', 'BD', 'PLF', 'PS'] writer = csv.DictWriter(fisier, fieldnames=header) writer.writeheader() for row in note: for materie in header[2:]: if materie not in note[row]: note[row][materie] = 0 try: int(note[row][materie]) except ValueError: note[row][materie] = 0 writer.writerow(note[row]) Grupa,Grupa 2,Nota algebra,Nota AC,Nota FP,SdA,Geometrie,POO,BD,PLF,PS Gr. 217,224,6,5,8,7,9,8,8,7,5 Gr. 213,223,2,4,6,0,6,8,6,4,4 Gr. 211,221,9,10,10,10,9,10,10,10,10 Gr. 216,226,8,7,6,6,9,7,9,8,7