import re
import requests
import time
def main():
# 访问第三关,需要登录,登录的url
url_login = 'http://www.heibanke.com/accounts/login/?next=/lesson/crawler_ex02/'
# 登录成功后,访问第三关url
url = 'http://www.heibanke.com/lesson/crawler_ex02/'
session = requests.Session()
# 获取cookie
session.get(url_login)
# 获取csrftoken
token = session.cookies['csrftoken']
# 将用户名密码和csrftoken一起提交给登录页面
session.post(url_login, data={'csrfmiddlewaretoken': token, 'username': 'tianlegg', 'password': '123456'})
# 登录成功后,携带了token再来访问页面会看到第三关内容,和第二关一样,只不过每次提交时同样需要带着csrftoken,否则还是会报错
for psd in range(30):
print(f'test password {psd}')
session.get(url)
token = session.cookies['csrftoken']
r = session.post(url, data={'csrfmiddlewaretoken': token, 'username': 'aa', 'password': psd})
html = r.text
if '密码错误' not in html:
m = re.search('(?<=\<h3\>).*?(?=\</h3\>)', html)
print(m.group())
m = re.search('(\<).*?href="([^"]*?)".*?(\>下一关\</a\>)', html)
print(f'下一关 http://www.heibanke.com{m.group(2)}')
return
else:
time.sleep(1)
if __name__ == '__main__':
main()