95 lines
4.9 KiB
Python
95 lines
4.9 KiB
Python
from glob import glob
|
|
import pandas as pd
|
|
from numpy import nan
|
|
|
|
tt_list_alarmdepesche = []
|
|
tt_list_infodepesche = []
|
|
tt_list_einsatzprotokoll = []
|
|
def parse_securecad_message(body_html: str):
|
|
body_html = body_html.replace(u'\xa0', u' ')
|
|
if 'ALARMDEPESCHE' in body_html:
|
|
t_list = {}
|
|
tables = pd.read_html(body_html) # Returns list of all tables on page
|
|
t_count = 0
|
|
for t in tables:
|
|
t_count = t_count + 1
|
|
table_dict = t.to_dict('index')
|
|
if t_count == 1 and 'ALARMDEPESCHE' in table_dict[0][0]:
|
|
k:str
|
|
v:str
|
|
k,v = table_dict[0][0].split('>>')
|
|
t_list[k.strip()] = v.strip()
|
|
elif t_count == 2:
|
|
continue
|
|
elif t_count == 3:
|
|
for r in table_dict:
|
|
if table_dict[r][0] is not nan:
|
|
t_list[table_dict[r][0].strip(' :')] = table_dict[r][1]
|
|
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Einsatzziel' and 'Einsatzziel' not in t_list:
|
|
t_list['Einsatzziel'] = {}
|
|
for r in table_dict:
|
|
if table_dict[r][0] is not nan and table_dict[r][0] != 'Einsatzziel':
|
|
t_list['Einsatzziel'][table_dict[r][0].strip(' :')] = table_dict[r][1]
|
|
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Einsatzziel' and 'Einsatzziel' in t_list:
|
|
continue
|
|
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Zusatztext zum Ort' and 'Zusatztext zum Ort' not in t_list:
|
|
t_list['Zusatztext zum Ort'] = []
|
|
for r in table_dict:
|
|
if table_dict[r][0] is not nan and table_dict[r][0] != 'Zusatztext zum Ort':
|
|
t_list['Zusatztext zum Ort'].append(table_dict[r][1])
|
|
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Zusatztext zum Ort' and 'Zusatztext zum Ort' in t_list:
|
|
continue
|
|
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Zusatztext zum Objekt' and 'Zusatztext zum Objekt' not in t_list:
|
|
t_list['Zusatztext zum Objekt'] = []
|
|
for r in table_dict:
|
|
if table_dict[r][0] is not nan and table_dict[r][0] != 'Zusatztext zum Objekt':
|
|
t_list['Zusatztext zum Objekt'].append(table_dict[r][1])
|
|
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Zusatztext zum Objekt' and 'Zusatztext zum Objekt' in t_list:
|
|
continue
|
|
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Einsatzmittelliste':
|
|
# es folgt die Einsatzmittelliste
|
|
continue
|
|
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Ressourcen'and table_dict[0][1] == 'Typ' and table_dict[0][2] == 'Organisation':
|
|
t_list['Einsatzmittelliste'] = []
|
|
for r in table_dict:
|
|
if table_dict[r][0] != "Ressourcen":
|
|
t_list['Einsatzmittelliste'].append({
|
|
'Ressourcen': table_dict[r][0] if table_dict[r][0] is not nan else "",
|
|
'Typ': table_dict[r][1] if table_dict[r][1] is not nan else "",
|
|
'Organisation': table_dict[r][2] if table_dict[r][2] is not nan else "",
|
|
'Status': table_dict[r][3] if table_dict[r][3] is not nan else "",
|
|
'Alarm': table_dict[r][4] if table_dict[r][4] is not nan else "",
|
|
'aus': table_dict[r][5] if table_dict[r][5] is not nan else "",
|
|
'an': table_dict[r][6] if table_dict[r][6] is not nan else "",
|
|
'Auftrag': table_dict[r][7] if table_dict[r][7] is not nan else "",
|
|
})
|
|
else:
|
|
#print(table_dict)
|
|
pass
|
|
|
|
|
|
#print(t_list)
|
|
#tt_list_alarmdepesche.append(t_list)
|
|
# bereinigung
|
|
if 'Zusatztext zum Objekt' in t_list:
|
|
if t_list['Zusatztext zum Objekt'].__len__() == 1 and t_list['Zusatztext zum Objekt'][0].strip() == ".":
|
|
del t_list['Zusatztext zum Objekt']
|
|
else:
|
|
t_list['Zusatztext zum Objekt'] = "\n".join(t_list['Zusatztext zum Objekt'])
|
|
if 'Zusatztext zum Ort' in t_list:
|
|
if t_list['Zusatztext zum Ort'].__len__() == 1 and t_list['Zusatztext zum Ort'][0].strip() == ".":
|
|
del t_list['Zusatztext zum Ort']
|
|
else:
|
|
t_list['Zusatztext zum Ort'] = "\n".join(t_list['Zusatztext zum Ort'])
|
|
t_list["__HTML_BODY"] = body_html
|
|
return t_list
|
|
else:
|
|
pass
|
|
pass
|
|
|
|
if __name__ == "__main__":
|
|
for i in glob('*.html'):
|
|
with open(i,'r') as f:
|
|
parse_securecad_message(f.read())
|
|
|
|
pass |