一个简单的Python爬虫+写入文本

import os


import requests


from bs4 import BeautifulSoup


# 获取HTML文档


def get_html(url):


response = requests.get(url)


response.encoding = 'uft-8'


return response.text


# 获取笑话


def get_joke(html):


soup = BeautifulSoup(html,'lxml')


abc = ''


num = 0


for link in soup.find_all("div", class_="content"):


# for i in range(10):


# joke_content = soup.select('div.content')[i].get_text()


num = num + 1


abc += "--------" + str(num) + link.get_text()


return abc


# 将笑话写入txt


# ls = os.linesep


def writeJoke(joke):


while True:


filename = input('文件名:')


if os.path.exists(filename):


print("错误:'%s' 该文件已存在" % filename) # 是否存在


else:


break


fobj = open(filename, 'w') #写入文本


fobj.write(joke)


# fobj.writelines(['%s%s' % (x, ls) for x in all]) #每一个字符后面都会换行


fobj.close()


print('写入成功!')


url_joke = "https://www.qiushibaike.com"


html = get_html(url_joke)


joke = get_joke(html)


writeJoke(joke)


# print(joke)