python_scrapy_filespipe重写

主要原因:需要下载文件并保留原有后缀名,但scrapy的下载管道没有这个选项,需要重新定义filespipelines功能,参考其他人的文件,

import time
from urllib import parse
from scrapy.pipelines.files import FilesPipeline
class FileRenamePipeline(FilesPipeline):
    def file_path(self, request, response=None, info=None):
        print('_'*100)
        timest = str(int(time.time()*1000))
        name = parse.unquote(parse.unquote(request.url).split(';')[1]).split('"')[1]
        if '.' in name:
            file_name = name.split('.')[0] + '_' + timest + '.' + name.split('.')[1]
        else:
            file_name = name + '_' + timest
        return 'full/' + file_name
    custom_settings = {
        'ITEM_PIPELINES':{
            'spider_dataPlat.pipelines.FileRenamePipeline':2,
                          },
        'FILES_STORE':'E:\下载', # 文件下载路径
    }
        items = SpiderFileItem()
        items['file_urls'] = [final_url]
        items['files'] = name.split('.')[0]
        yield items