Phrasing Emails Using Python's Built-in Email Module

In this article, I will demonstrate the basic practice of phrasing emails using Python’s build-in email module.
Assuming that you have already received an email via pop, imap, or you already have an EML file.
import imaplib
from email.parser import Parser
from email.header import decode_header
import re
import datetime
 
imap_user = '<your-user-name>'
imap_object = imaplib.IMAP4_SSL(port="993",host="imap.xxx.com") #If you are not using SSL, try `imaplib.IMAP4` instead
imap_object.login(imap_user, '<password-or-authentication-code>')
imap_object.select('INBOX')  #Select'inbox'
typ, msg_ids = imap_object.search(None, 'ALL')
 
ids = msg_ids[0]
ret = ids.decode('utf-8')
message_id_list = ret.split()
int_mail_num =  len(message_id_list)
print('There are %s emails in the inbox'%int_mail_num)
 
msg = msg_ids[0]
msg_list = msg.split()
#print('msg_list=',msg_list)
ids = msg_list[0] #Select the first email
results, data = imap_object.fetch(ids, "(RFC822)")
imap_object.close() #Close the connection with the IMAP server
str_source = data[0][1].decode('UTF-8')
############### The content above isn't the the key point of this article. It is just to ensure you can run this code snippet directly. #############

msg_email = Parser().parsestr(str_source) #Read the source codes of the email from the string, return data in email.message format.
#If you load source codes from an EML file, try the message_from_file method of the email moudle.(I haven't tested it.)
#import email
#fp = open("xxx.eml","r")
#msg_email = email.message_from_file(fp)
 
str_from = msg_email["from"] #Read the sender's information
#tuple_from = email.utils.parseaddr(str_from) #You can also read the sender's information(receiver's information as well) using the parseaddr method of email.utils
#I didn't use this method because I found that it will reguard the sender's name as its email address when the sender's email address is empty in an email.
#str_from_name = tuple_from[0]
#str_from_address = tuple_from[1]
str_from_name = re.search(r'(?<=")[\s\S]*?(?=")',str_from).group() #Extract the sender's name via regex (undecoded format).
str_from_address = re.search(r'(?<=<)[\s\S]*?(?=>)',str_from).group() #Extract the sender's email address via regex.
value, charset = decode_header(str_from_name)[0] #Read the sender's data and it's charset information.
if charset: #If the email has set the charset.
    str_from_name = value.decode(charset) #Then use this charset to decode the data.
print(">>From: %s<%s>"%(str_from_name,str_from_address))
 
str_to = msg_email["to"] #Read the receiver's information.
str_to_name = re.search(r'(?<=")[\s\S]*?(?=")',str_to).group() #Extract the receiver's name via regex (undecoded format).
str_to_address = re.search(r'(?<=<)[\s\S]*?(?=>)',str_to).group() #Extract the receiver's email address via regex.
value, charset = decode_header(str_to_name)[0] #Read the receiver's data and charset information.
if charset: #If the email has set the charset.
    str_to_name = value.decode(charset) #Then use this charset to decode the data.
print(">>To: %s<%s>"%(str_to_name,str_to_address))
 
str_date = msg_email["date"] #Read the date information.
str_date = str_date.replace('GMT','+0000') #Emails from some email servers located in GMT+0:00 may not include the time zone information.
dtime_date = datetime.datetime.strptime(str_date, '%a, %d %b %Y %H:%M:%S %z') #Convert the data from string format to standard datetime format.
print('>>Time: %s'%dtime_date)
 
str_subject = msg_email["subject"] #Read the email's subject
value, charset = decode_header(str_subject)[0] #Read the subject's data and charest information.
if charset: #If the email has set the charset.
    str_subject = value.decode(charset) #Then use this charset to decode the data.
print('>>Subject: %s'%str_subject)
 
def decode_mime(msg): #The purpose of encapsulation here is to re-use this code snippet when processing MIME blocks.
    if msg.is_multipart(): #If it is a multi-part MIME.
        parts = msg.get_payload()
        for part in parts: #Then traverse each part.
            decode_mime(part) #Pass the single data block to the decode_mime function.(Equivalent to creating a new instance within the present loop.)
    else: #Process every single MIME block.
        str_content_type = msg.get_content_type() #Data block's Content-Type.
        #print('str_content_type=%s'%str_content_type)
        str_charset = msg.get_content_charset(failobj=None) #Data block's charset information.
        #print('str_charset=',str_charset)
        if str_content_type in ('text/plain', 'text/html'): #Here I only demonstrate how to process plain text and HTML. 
        #You can continue to google how to process other data types such as attachments, media, etc.
            bytes_content = msg.get_payload(decode=True) #Data block's message body, in bytes format.
            str_content = bytes_content.decode(str_charset) #Decoding
            print('>>Message Body(%s): %s'%(str_content_type,str_content))
#Most emails contain both plain text and HTML format message bodies in MIME, so the phrasing result of the messsage body may appear repetitive. You can handle it as needed.
# This article was originally published on Zhu's Blog https://azhu.site/posts/phrasing-emails-using-python-build-in-module/
#This code snippet only demonstrates the basic practice. You can handle further error tolerance and encapsulations yourself.
decode_mime(msg_email)