In this article, I will demonstrate the basic practice of phrasing emails using Python’s build-in email module. Assuming that you have already received an email via pop, imap, or you already have an EML file.
import imaplib from email.parser import Parser from email.header import decode_header import re import datetime imap_user = '<your-user-name>' imap_object = imaplib.IMAP4_SSL(port="993",host="imap.xxx.com") #If you are not using SSL, try `imaplib.IMAP4` instead imap_object.login(imap_user, '<password-or-authentication-code>') imap_object.select('INBOX') #Select'inbox' typ, msg_ids = imap_object.search(None, 'ALL') ids = msg_ids[0] ret = ids.decode('utf-8') message_id_list = ret.split() int_mail_num = len(message_id_list) print('There are %s emails in the inbox'%int_mail_num) msg = msg_ids[0] msg_list = msg.split() #print('msg_list=',msg_list) ids = msg_list[0] #Select the first email results, data = imap_object.fetch(ids, "(RFC822)") imap_object.close() #Close the connection with the IMAP server str_source = data[0][1].decode('UTF-8') ############### The content above isn't the the key point of this article. It is just to ensure you can run this code snippet directly. #############
msg_email = Parser().parsestr(str_source) #Read the source codes of the email from the string, return data in email.message format. #If you load source codes from an EML file, try the message_from_file method of the email moudle.(I haven't tested it.) #import email #fp = open("xxx.eml","r") #msg_email = email.message_from_file(fp) str_from = msg_email["from"] #Read the sender's information #tuple_from = email.utils.parseaddr(str_from) #You can also read the sender's information(receiver's information as well) using the parseaddr method of email.utils #I didn't use this method because I found that it will reguard the sender's name as its email address when the sender's email address is empty in an email. #str_from_name = tuple_from[0] #str_from_address = tuple_from[1] str_from_name = re.search(r'(?<=")[\s\S]*?(?=")',str_from).group() #Extract the sender's name via regex (undecoded format). str_from_address = re.search(r'(?<=<)[\s\S]*?(?=>)',str_from).group() #Extract the sender's email address via regex. value, charset = decode_header(str_from_name)[0] #Read the sender's data and it's charset information. if charset: #If the email has set the charset. str_from_name = value.decode(charset) #Then use this charset to decode the data. print(">>From: %s<%s>"%(str_from_name,str_from_address)) str_to = msg_email["to"] #Read the receiver's information. str_to_name = re.search(r'(?<=")[\s\S]*?(?=")',str_to).group() #Extract the receiver's name via regex (undecoded format). str_to_address = re.search(r'(?<=<)[\s\S]*?(?=>)',str_to).group() #Extract the receiver's email address via regex. value, charset = decode_header(str_to_name)[0] #Read the receiver's data and charset information. if charset: #If the email has set the charset. str_to_name = value.decode(charset) #Then use this charset to decode the data. print(">>To: %s<%s>"%(str_to_name,str_to_address)) str_date = msg_email["date"] #Read the date information. str_date = str_date.replace('GMT','+0000') #Emails from some email servers located in GMT+0:00 may not include the time zone information. dtime_date = datetime.datetime.strptime(str_date, '%a, %d %b %Y %H:%M:%S %z') #Convert the data from string format to standard datetime format. print('>>Time: %s'%dtime_date) str_subject = msg_email["subject"] #Read the email's subject value, charset = decode_header(str_subject)[0] #Read the subject's data and charest information. if charset: #If the email has set the charset. str_subject = value.decode(charset) #Then use this charset to decode the data. print('>>Subject: %s'%str_subject) defdecode_mime(msg): #The purpose of encapsulation here is to re-use this code snippet when processing MIME blocks. if msg.is_multipart(): #If it is a multi-part MIME. parts = msg.get_payload() for part in parts: #Then traverse each part. decode_mime(part) #Pass the single data block to the decode_mime function.(Equivalent to creating a new instance within the present loop.) else: #Process every single MIME block. str_content_type = msg.get_content_type() #Data block's Content-Type. #print('str_content_type=%s'%str_content_type) str_charset = msg.get_content_charset(failobj=None) #Data block's charset information. #print('str_charset=',str_charset) if str_content_type in ('text/plain', 'text/html'): #Here I only demonstrate how to process plain text and HTML. #You can continue to google how to process other data types such as attachments, media, etc. bytes_content = msg.get_payload(decode=True) #Data block's message body, in bytes format. str_content = bytes_content.decode(str_charset) #Decoding print('>>Message Body(%s): %s'%(str_content_type,str_content)) #Most emails contain both plain text and HTML format message bodies in MIME, so the phrasing result of the messsage body may appear repetitive. You can handle it as needed. # This article was originally published on Zhu's Blog https://azhu.site/posts/phrasing-emails-using-python-build-in-module/ #This code snippet only demonstrates the basic practice. You can handle further error tolerance and encapsulations yourself. decode_mime(msg_email)