From f90f5da9cf6ccce35771b0a6c3d74f261411e309 Mon Sep 17 00:00:00 2001 From: "Fred T. Hamster" Date: Wed, 15 Nov 2023 14:35:43 -0500 Subject: [PATCH] fix to handle windows encoded emails also. --- scripts/email/eml_to_txt.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/scripts/email/eml_to_txt.py b/scripts/email/eml_to_txt.py index 6c4eb1a3..c07746af 100644 --- a/scripts/email/eml_to_txt.py +++ b/scripts/email/eml_to_txt.py @@ -52,10 +52,25 @@ def pullout (m, key): return Text, Html, Files, 1 cp = m.get_content_type() if cp=="text/plain": - Text += m.get_payload(decode=True).decode("utf-8") + try: + Text += m.get_payload(decode=True).decode("utf-8") + except: + try: + Text += m.get_payload(decode=True).decode("cp437") + except: + print("failed to process text attachment with either utf-8 or cp437 code pages.") + exit(1) elif cp=="text/html": - soup = BeautifulSoup(m.get_payload(decode=True).decode("utf-8"), features="html.parser") - Html += soup.get_text('\n', strip=True) + try: + soup = BeautifulSoup(m.get_payload(decode=True).decode("utf-8"), features="html.parser") + Html += soup.get_text('\n', strip=True) + except: + try: + soup = BeautifulSoup(m.get_payload(decode=True).decode("cp437"), features="html.parser") + Html += soup.get_text('\n', strip=True) + except: + print("failed to process html attachment with either utf-8 or cp437 code pages.") + exit(1) else: cp = m.get("content-type") try: id = disgra(m.get("content-id")) -- 2.34.1