基于python的邮件地址提取小程序

import sys

import os

import re

def analysis_file(path):

print("analysis file: %s." % path)

fi = open(path, "r")

try:

all_text = fi.read()

finally:

fi.close()

#print("content:")

#print(all_text)

mails = set()

re_mail = re.compile(r"([a-zA-Z-]+(?:\.[\w-]+)*@[\w-]+(?:\.[a-zA-Z-]+)+)")

ms = re_mail.findall(all_text)

for m in ms:

#print(m)

mails.add(m)

print("results: %d" % len(mails))

if len(mails) > 0:

fo = open(path + ".mail.txt", "wt")

for mail in mails:

fo.write(mail)

fo.write(",")

fo.close()

def analysis_dir(path):

files = os.listdir(path)

for file in files:

if (not os.path.isfile(file)) or file.endswith(".mail.txt"):

continue

analysis_file(path + "\\" + file)

def main():

print("analysis is working... ...")

print("current direcotry: %s." % os.getcwd())

if len(sys.argv) < 2:

print("set the directory to serach")

return

path = sys.argv[1]

is_file = os.path.isfile(path)

if is_file:

print("searching file: %s." % path)

analysis_file(path)

else:

if not os.path.exists(path):

print("there isn't exist direcoty: %s" % path)

return

print("searching alll files in directory: %s." % path)

analysis_dir(path)

if __name__ == '__main__':

main()

这是我在实际过程中常碰到的问题,大量的邮件地址分散在若干文件中,可以处理单个文件或目录,提取所有的邮件地址,并拼接成邮件地址列表,可以直接由于批量邮件的发送。