from glob import glob import pandas as pd from numpy import nan tt_list_alarmdepesche = [] tt_list_infodepesche = [] tt_list_einsatzprotokoll = [] def parse_securecad_message(body_html: str): body_html = body_html.replace(u'\xa0', u' ') if 'ALARMDEPESCHE' in body_html: t_list = {} tables = pd.read_html(body_html) # Returns list of all tables on page t_count = 0 for t in tables: t_count = t_count + 1 table_dict = t.to_dict('index') if t_count == 1 and 'ALARMDEPESCHE' in table_dict[0][0]: k:str v:str k,v = table_dict[0][0].split('>>') t_list[k.strip()] = v.strip() elif t_count == 2: continue elif t_count == 3: for r in table_dict: if table_dict[r][0] is not nan: t_list[table_dict[r][0].strip(' :')] = table_dict[r][1] elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Einsatzziel' and 'Einsatzziel' not in t_list: t_list['Einsatzziel'] = {} for r in table_dict: if table_dict[r][0] is not nan and table_dict[r][0] != 'Einsatzziel': t_list['Einsatzziel'][table_dict[r][0].strip(' :')] = table_dict[r][1] elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Einsatzziel' and 'Einsatzziel' in t_list: continue elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Zusatztext zum Ort' and 'Zusatztext zum Ort' not in t_list: t_list['Zusatztext zum Ort'] = [] for r in table_dict: if table_dict[r][0] is not nan and table_dict[r][0] != 'Zusatztext zum Ort': t_list['Zusatztext zum Ort'].append(table_dict[r][1]) elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Zusatztext zum Ort' and 'Zusatztext zum Ort' in t_list: continue elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Zusatztext zum Objekt' and 'Zusatztext zum Objekt' not in t_list: t_list['Zusatztext zum Objekt'] = [] for r in table_dict: if table_dict[r][0] is not nan and table_dict[r][0] != 'Zusatztext zum Objekt': t_list['Zusatztext zum Objekt'].append(table_dict[r][1]) elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Zusatztext zum Objekt' and 'Zusatztext zum Objekt' in t_list: continue elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Einsatzmittelliste': # es folgt die Einsatzmittelliste continue elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Ressourcen'and table_dict[0][1] == 'Typ' and table_dict[0][2] == 'Organisation': t_list['Einsatzmittelliste'] = [] for r in table_dict: if table_dict[r][0] != "Ressourcen": t_list['Einsatzmittelliste'].append({ 'Ressourcen': table_dict[r][0] if table_dict[r][0] is not nan else "", 'Typ': table_dict[r][1] if table_dict[r][1] is not nan else "", 'Organisation': table_dict[r][2] if table_dict[r][2] is not nan else "", 'Status': table_dict[r][3] if table_dict[r][3] is not nan else "", 'Alarm': table_dict[r][4] if table_dict[r][4] is not nan else "", 'aus': table_dict[r][5] if table_dict[r][5] is not nan else "", 'an': table_dict[r][6] if table_dict[r][6] is not nan else "", 'Auftrag': table_dict[r][7] if table_dict[r][7] is not nan else "", }) else: #print(table_dict) pass #print(t_list) #tt_list_alarmdepesche.append(t_list) # bereinigung if 'Zusatztext zum Objekt' in t_list: if t_list['Zusatztext zum Objekt'].__len__() == 1 and t_list['Zusatztext zum Objekt'][0].strip() == ".": del t_list['Zusatztext zum Objekt'] else: t_list['Zusatztext zum Objekt'] = "\n".join(t_list['Zusatztext zum Objekt']) if 'Zusatztext zum Ort' in t_list: if t_list['Zusatztext zum Ort'].__len__() == 1 and t_list['Zusatztext zum Ort'][0].strip() == ".": del t_list['Zusatztext zum Ort'] else: t_list['Zusatztext zum Ort'] = "\n".join(t_list['Zusatztext zum Ort']) t_list["__HTML_BODY"] = body_html return t_list else: pass pass if __name__ == "__main__": for i in glob('*.html'): with open(i,'r') as f: parse_securecad_message(f.read()) pass