wnd_ils_alarmfax_parser/app/securecad_parser.py
2024-01-08 16:06:47 +00:00

95 lines
4.9 KiB
Python

from glob import glob
import pandas as pd
from numpy import nan
tt_list_alarmdepesche = []
tt_list_infodepesche = []
tt_list_einsatzprotokoll = []
def parse_securecad_message(body_html: str):
body_html = body_html.replace(u'\xa0', u' ')
if 'ALARMDEPESCHE' in body_html:
t_list = {}
tables = pd.read_html(body_html) # Returns list of all tables on page
t_count = 0
for t in tables:
t_count = t_count + 1
table_dict = t.to_dict('index')
if t_count == 1 and 'ALARMDEPESCHE' in table_dict[0][0]:
k:str
v:str
k,v = table_dict[0][0].split('>>')
t_list[k.strip()] = v.strip()
elif t_count == 2:
continue
elif t_count == 3:
for r in table_dict:
if table_dict[r][0] is not nan:
t_list[table_dict[r][0].strip(' :')] = table_dict[r][1]
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Einsatzziel' and 'Einsatzziel' not in t_list:
t_list['Einsatzziel'] = {}
for r in table_dict:
if table_dict[r][0] is not nan and table_dict[r][0] != 'Einsatzziel':
t_list['Einsatzziel'][table_dict[r][0].strip(' :')] = table_dict[r][1]
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Einsatzziel' and 'Einsatzziel' in t_list:
continue
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Zusatztext zum Ort' and 'Zusatztext zum Ort' not in t_list:
t_list['Zusatztext zum Ort'] = []
for r in table_dict:
if table_dict[r][0] is not nan and table_dict[r][0] != 'Zusatztext zum Ort':
t_list['Zusatztext zum Ort'].append(table_dict[r][1])
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Zusatztext zum Ort' and 'Zusatztext zum Ort' in t_list:
continue
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Zusatztext zum Objekt' and 'Zusatztext zum Objekt' not in t_list:
t_list['Zusatztext zum Objekt'] = []
for r in table_dict:
if table_dict[r][0] is not nan and table_dict[r][0] != 'Zusatztext zum Objekt':
t_list['Zusatztext zum Objekt'].append(table_dict[r][1])
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Zusatztext zum Objekt' and 'Zusatztext zum Objekt' in t_list:
continue
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Einsatzmittelliste':
# es folgt die Einsatzmittelliste
continue
elif t_count > 3 and table_dict[0] is not nan and table_dict[0][0] == 'Ressourcen'and table_dict[0][1] == 'Typ' and table_dict[0][2] == 'Organisation':
t_list['Einsatzmittelliste'] = []
for r in table_dict:
if table_dict[r][0] != "Ressourcen":
t_list['Einsatzmittelliste'].append({
'Ressourcen': table_dict[r][0] if table_dict[r][0] is not nan else "",
'Typ': table_dict[r][1] if table_dict[r][1] is not nan else "",
'Organisation': table_dict[r][2] if table_dict[r][2] is not nan else "",
'Status': table_dict[r][3] if table_dict[r][3] is not nan else "",
'Alarm': table_dict[r][4] if table_dict[r][4] is not nan else "",
'aus': table_dict[r][5] if table_dict[r][5] is not nan else "",
'an': table_dict[r][6] if table_dict[r][6] is not nan else "",
'Auftrag': table_dict[r][7] if table_dict[r][7] is not nan else "",
})
else:
#print(table_dict)
pass
#print(t_list)
#tt_list_alarmdepesche.append(t_list)
# bereinigung
if 'Zusatztext zum Objekt' in t_list:
if t_list['Zusatztext zum Objekt'].__len__() == 1 and t_list['Zusatztext zum Objekt'][0].strip() == ".":
del t_list['Zusatztext zum Objekt']
else:
t_list['Zusatztext zum Objekt'] = "\n".join(t_list['Zusatztext zum Objekt'])
if 'Zusatztext zum Ort' in t_list:
if t_list['Zusatztext zum Ort'].__len__() == 1 and t_list['Zusatztext zum Ort'][0].strip() == ".":
del t_list['Zusatztext zum Ort']
else:
t_list['Zusatztext zum Ort'] = "\n".join(t_list['Zusatztext zum Ort'])
t_list["__HTML_BODY"] = body_html
return t_list
else:
pass
pass
if __name__ == "__main__":
for i in glob('*.html'):
with open(i,'r') as f:
parse_securecad_message(f.read())
pass