python将文件夹下的所有csv文件存入mysql和oracle数据库 - iUpoint

python将文件夹下的所有csv文件存入mysql和oracle数据库

#oracle版

首先新建python脚本(d:/python/orclImport.py)

import os  #引入os包

if __name__ == \'__main__\':
    #遍历文件夹下所有的dmp文件,其中filename为所有文件、文件夹的名称。
    #因为我文件夹下确定都是dmp文件,所以无需进行特殊判断
    for filename in os.listdir("D:\dmp"):
        # 调用cmd窗口,并执行dmp的导入命令
        # 因为dmp文件数据量太大,我选择一条一条的去执行,就是如此,电脑已经打字都卡了
        os.system("imp orcl/123456@127.0.0.1/zrr file=\"D:\dmp\\"+filename+"\" full=y ignore=y")

在命令行执行 python d:/python/orclImport.py

# mysql版

import os
import pymysql
# import datetime, time
# from config import *


database_table_name = "stockdata"
infile_path = \'E:/学习资料/stock_data\'
outfile_path = \'E:/学习资料/stock_data1\'


# 删除首行的股票代码和空行
def delblankline(infile, outfile):
    infp = open(infile, "r")
    outfp = open(outfile, "w")
    lines = infp.readlines()
    for li in lines[1:]:
        if li.split():
            outfp.writelines(li)
    infp.close()
    outfp.close()

def csv2bigcsv(path, outfile):
    """1.删除股票代码+空行  2.合并所有文件数据"""
    # infp = open(infile, "r")
    outfp = open(outfile, "w")
    i = 1
    for filename in os.listdir(path):
        domain = os.path.abspath(path)  # 获取文件夹的路径,也可去掉
        filepath = os.path.join(domain, filename)  # 文件的完整路径
        infp = open(filepath, \'r\')  # 读取文件内容
        lines = infp.readlines()
        if i == 1: outfp.writelines(lines[3].split())
        for li in lines[5:]:
            if li.split():
                outfp.writelines(li)
        infp.close()
        i += 1
    outfp.close()

# 执行sql语句
def db_excute(sql):
    # local_infile = 1 执行load data infile
    db_info = {
        \'host\': \'127.0.0.1\',
        \'user\': \'root\',
        \'password\': \'root\',
        \'port\': 3306,
        \'database\': \'test\',
        \'charset\': \'utf8\',
        \'local_infile\': 1
    }
    db = pymysql.connect(**db_info)
    # db.set_charset(\'utf8\')
    cursor = db.cursor()
    try:
        cursor.execute(sql)
        db.commit()
    except pymysql.Error as e:
        print(e)
    finally:
        db.close()


# 将单个csv文件插入数据库
def csv2mysql(file_path, database_table_name):
    \'\'\' LOAD DATA LOCAL INFILE "/root/test/infohash_20180603.txt"
        INTO TABLE xl_log_analysis.infohash_20180603
        FIELDS TERMINATED BY \',\'
        OPTIONALLY ENCLOSED BY \'"\' ESCAPED BY \'"\'
        LINES TERMINATED BY \'\r\n\'
        IGNORE 1 LINES ( infohash );\'\'\'
    \'\'\'LINES TERMINATED BY \\r\\n WIN 为\\r,LINUX 为\\n\'\'\'

    # " LOAD DATA LOCAL INFILE \'" + file_path + "\'"\
    #       + " INTO TABLE " + database_table_name \
    #       + " FIELDS TERMINATED BY \',\' " \
    #       + " OPTIONALLY ENCLOSED BY \'\"\' ESCAPED BY \'\"\' " \
    #       + " LINES TERMINATED BY \'\\r\\n\' " \
    #       + " IGNORE 5 LINES; "

    # 报1148错误,可以本地开启local_infile服务
    # set global local_infile = \'ON\';
    sql = \'\'\'LOAD DATA LOCAL INFILE \'{0}\' INTO TABLE {1} 
            CHARACTER SET UTF8 
            FIELDS TERMINATED BY \',\' 
            ENCLOSED BY \'\"\' ESCAPED BY \'\"\' 
            LINES TERMINATED BY \'\\r\\n\' 
            IGNORE 5 LINES (`symbol`, `date`, `opening_price`, `highest_price` , `lowest_price`, `closing_price`,  
            `change`, `trading_volume`, `turnover`, `circulation_market_value`, `total_value`, 
            `turnover_rate`, `post_resumption_price`, `report_type`, `report_time`, `PE_TTM`, 
            `PS_TTM`, `PCF_TTM`, `PB`, `ex_duplicate_price`, `tradable_share`, `total_stock_issue`, 
            `total_assets`, `current_asset`, `gross_liabilities`, `current_liabilities`, `total_operating_income`, 
            `operating_income`, `total_operating_costs`, `operating_costs`, `operating_profit`, `net_profits`, 
            `selling_expenses`, `administration_expenses`, `financial_expenses`, `net_cash_flow`, 
            `NOCF`, `NICF`, `NFCF`, `ROE_TTM`);\'\'\'.format(file_path, database_table_name)
    db_excute(sql)
    # print(sql)


# 将文件路径下的所有csv文件插入数据库
def to_mysql(infile_path, database_table_name):
    for filename in os.listdir(infile_path):
        print(\'正在上传:\', filename)
        # domain = os.path.abspath(infile_path)  # 获取文件夹的路径,也可去掉
        # file_path = os.path.join(domain, filename)  # 文件的完整路径
        file_path = infile_path + \'/\' + filename
        # print(\'file_path: \',file_path)
        csv2mysql(file_path, database_table_name)


if __name__ == \'__main__\':
    # delblankline("sh600000.csv", "sh600000_.csv")
    # csv2bigcsv(infile_path, "all-groups.csv")

    # 创建新表
    createtable_sql = \'\'\'CREATE TABLE IF NOT EXISTS `{0}` (
        `id` INT NOT NULL AUTO_INCREMENT COMMENT \'ID\',
        `symbol` VARCHAR ( 10 ) DEFAULT NULL COMMENT \'股票代码\',
        `date` DATE DEFAULT NULL COMMENT \'日期\',
        `opening_price` FLOAT DEFAULT NULL COMMENT \'开盘价\',
        `highest_price` FLOAT DEFAULT NULL COMMENT \'最高价\',
        `lowest_price` FLOAT DEFAULT NULL COMMENT \'最低价\',
        `closing_price` FLOAT DEFAULT NULL COMMENT \'收盘价\',
        `change` FLOAT DEFAULT NULL COMMENT \'涨跌幅\',
        `trading_volume` BIGINT DEFAULT NULL COMMENT \'交易量\',
        `turnover` BIGINT DEFAULT NULL COMMENT \'交易额\',
        `circulation_market_value` BIGINT DEFAULT NULL COMMENT \'流通市值\',
        `total_value` BIGINT DEFAULT NULL COMMENT \'总市值\',
        `turnover_rate` FLOAT DEFAULT NULL COMMENT \'换手率\',
        `post_resumption_price` FLOAT DEFAULT NULL COMMENT \'后复权价格\',
        `report_type` VARCHAR ( 64 ) DEFAULT NULL COMMENT \'报告类型\',
        `report_time` DATE DEFAULT NULL COMMENT \'报告时间\',
        # 市盈率PE 市净率PB 市销率PS 市现率PCF
        `PE_TTM` FLOAT DEFAULT NULL COMMENT \'市盈率TTM\',
        `PS_TTM` FLOAT DEFAULT NULL COMMENT \'市销率TTM\',
        `PCF_TTM` FLOAT DEFAULT NULL COMMENT \'市现率TTM\',
        `PB` FLOAT DEFAULT NULL COMMENT \'市净率\',
        `ex_duplicate_price` FLOAT DEFAULT NULL COMMENT \'前复权价格\',
        `tradable_share` BIGINT DEFAULT NULL COMMENT \'流通股本\',
        `total_stock_issue` BIGINT DEFAULT NULL COMMENT \'总股本\',
        `total_assets` BIGINT DEFAULT NULL COMMENT \'总资产\',
        `current_asset` BIGINT DEFAULT NULL COMMENT \'流动资产\',
        `gross_liabilities` BIGINT DEFAULT NULL COMMENT \'总负债\',
        `current_liabilities` BIGINT DEFAULT NULL COMMENT \'流动负债\',
        `total_operating_income` BIGINT DEFAULT NULL COMMENT \'营业总收入\',
        `operating_income` BIGINT DEFAULT NULL COMMENT \'营业收入\',
        `total_operating_costs` BIGINT DEFAULT NULL COMMENT \'营业总成本\',
        `operating_costs` BIGINT DEFAULT NULL COMMENT \'营业成本\',
        `operating_profit` BIGINT DEFAULT NULL COMMENT \'营业利润\',
        `net_profits` BIGINT DEFAULT NULL COMMENT \'净利润\',
        `selling_expenses` BIGINT DEFAULT NULL COMMENT \'销售费用\',
        `administration_expenses` BIGINT DEFAULT NULL COMMENT \'管理费用\',
        `financial_expenses` BIGINT DEFAULT NULL COMMENT \'财务费用\',
        `net_cash_flow` BIGINT DEFAULT NULL COMMENT \'净现金流\',
        `NOCF` BIGINT DEFAULT NULL COMMENT \'经营活动净现金流operating activities\',
        `NICF` BIGINT DEFAULT NULL COMMENT \'投资活动净现金流investment activities\',
        `NFCF` BIGINT DEFAULT NULL COMMENT \'筹资活动净现金流financing activities\',
        `ROE_TTM` BIGINT DEFAULT NULL COMMENT \'净资产收益率TTM Net asset yield\',
        PRIMARY KEY ( `id` )
        ) ENGINE = INNODB DEFAULT CHARSET = UTF8MB4;\'\'\'.format(database_table_name)
    db_excute(createtable_sql)  # 创建新表
    print(createtable_sql)
    to_mysql(infile_path, database_table_name)  # 导入infohash数据到mysql xl_log_analysis 表