python爬虫学习,3:使用User-Agent和代理ip

使用User-Agent
方法一,先建立head,作为参数传进去

import urllib.request

import json

content=input("请输入需要翻译的内容:")

url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'

data={}

data['i']=content

data['from']='AUTO'

data['to']='AUTO'

data['smartresult']='dict'

data['client']='fanyideskweb'

data['salt']='1520575049536'

data['sign']='4514c46c320493ba8c034eaa8d9decaf'

data['doctype']='json'

data['version']='2.1'

data['keyfrom']='fanyi.web'

data['action']='FY_BY_CLICKBUTTION'

data['typoResult']='false'

data['ue']='utf-8'

data=urllib.parse.urlencode(data).encode('utf-8')

head={}

head['User-Agent']='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3355.4 Safari/537.36'

#urllib.request.Request的可以有三个参数,head(必须是字典)可以作为第三个参数

req=urllib.request.Request(url,data,head)

response=urllib.request.urlopen(req)

html=response.read().decode('utf-8')

target=json.loads(html)

print("翻译结果:%s"%target['translateResult'][0][0]['tgt'])

################################################################################

方法二使用建立request后add_hander

import urllib.request

import json

content=input("请输入需要翻译的内容:")

url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'

data={}

data['i']=content

data['from']='AUTO'

data['to']='AUTO'

data['smartresult']='dict'

data['client']='fanyideskweb'

data['salt']='1520575049536'

data['sign']='4514c46c320493ba8c034eaa8d9decaf'

data['doctype']='json'

data['version']='2.1'

data['keyfrom']='fanyi.web'

data['action']='FY_BY_CLICKBUTTION'

data['typoResult']='false'

data['ue']='utf-8'

data=urllib.parse.urlencode(data).encode('utf-8')

req=urllib.request.Request(url,data)

req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3355.4 Safari/537.36')

response=urllib.request.urlopen(req)

html=response.read().decode('utf-8')

target=json.loads(html)

print("翻译结果:%s"%target['translateResult'][0][0]['tgt'])

###########################################################################################

#使用代理ip

import urllib

def main():

#要访问的网址

url='http://www.whatismyip.com.tw/'

#代理ip

proxy={'http':'106.46.136.112:808'}

#创建ProxyHandler

proxy_support=urllib.request.ProxyHandler(proxy)

#创建opener

opener=urllib.request.build_opener(proxy_support)

#添加User-Agent

opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36')]

#安装opener

urllib.request.install_opener(opener)

#使用自己安装好的opener

response=urllib.request.urlopen(url)

#读取相应信息并解码

html=response.read().decode('utf-8')

print(html)

if __name__=='__name__':

main()