import scrapy
from datetime import datetime
class BianSpider(scrapy.Spider):
name = 'bian'
# allowed_domains = ['www']
start_urls = ['http://tech.163.com/special/00097UHL/tech_datalist.js?callback=data_callback']
def parse(self, response):
# print(response.body.decode('gbk'))
import json
---《for i in json.loads(response.body.decode('gbk').strip('data_callback(').strip(')')):》---
print(i['title'])
print(i['label'])
time_list = i['time']
---《print(datetime.strptime(time_list,'%m/%d/%Y %H:%M:%S'))》---
print(','.join([ii['keyname'] for ii in i['keywords']]))
desc_href = i['docurl']
yield scrapy.Request(desc_href,self.show)
def show(self,response):
# print(response.xpath('//div[3]/div[2]/div[1]/div[1]//text()'))
types = response.xpath("string(//div[@class='post_crumb'])").extract_first().strip()
weizhi = ' '.join(response.xpath("//div[@class='post_crumb']//text()").extract()).strip()
print(weizhi)
print(response.xpath('//*[@>).extract())
print(response.xpath('//*[@>').extract())