python将文件夹下的所有csv文件存入mysql和oracle数据库 - iUpoint
python将文件夹下的所有csv文件存入mysql和oracle数据库
#oracle版
首先新建python脚本(d:/python/orclImport.py)
import os #引入os包 if __name__ == \'__main__\': #遍历文件夹下所有的dmp文件,其中filename为所有文件、文件夹的名称。 #因为我文件夹下确定都是dmp文件,所以无需进行特殊判断 for filename in os.listdir("D:\dmp"): # 调用cmd窗口,并执行dmp的导入命令 # 因为dmp文件数据量太大,我选择一条一条的去执行,就是如此,电脑已经打字都卡了 os.system("imp orcl/123456@127.0.0.1/zrr file=\"D:\dmp\\"+filename+"\" full=y ignore=y")
在命令行执行 python d:/python/orclImport.py
# mysql版
import os import pymysql # import datetime, time # from config import * database_table_name = "stockdata" infile_path = \'E:/学习资料/stock_data\' outfile_path = \'E:/学习资料/stock_data1\' # 删除首行的股票代码和空行 def delblankline(infile, outfile): infp = open(infile, "r") outfp = open(outfile, "w") lines = infp.readlines() for li in lines[1:]: if li.split(): outfp.writelines(li) infp.close() outfp.close() def csv2bigcsv(path, outfile): """1.删除股票代码+空行 2.合并所有文件数据""" # infp = open(infile, "r") outfp = open(outfile, "w") i = 1 for filename in os.listdir(path): domain = os.path.abspath(path) # 获取文件夹的路径,也可去掉 filepath = os.path.join(domain, filename) # 文件的完整路径 infp = open(filepath, \'r\') # 读取文件内容 lines = infp.readlines() if i == 1: outfp.writelines(lines[3].split()) for li in lines[5:]: if li.split(): outfp.writelines(li) infp.close() i += 1 outfp.close() # 执行sql语句 def db_excute(sql): # local_infile = 1 执行load data infile db_info = { \'host\': \'127.0.0.1\', \'user\': \'root\', \'password\': \'root\', \'port\': 3306, \'database\': \'test\', \'charset\': \'utf8\', \'local_infile\': 1 } db = pymysql.connect(**db_info) # db.set_charset(\'utf8\') cursor = db.cursor() try: cursor.execute(sql) db.commit() except pymysql.Error as e: print(e) finally: db.close() # 将单个csv文件插入数据库 def csv2mysql(file_path, database_table_name): \'\'\' LOAD DATA LOCAL INFILE "/root/test/infohash_20180603.txt" INTO TABLE xl_log_analysis.infohash_20180603 FIELDS TERMINATED BY \',\' OPTIONALLY ENCLOSED BY \'"\' ESCAPED BY \'"\' LINES TERMINATED BY \'\r\n\' IGNORE 1 LINES ( infohash );\'\'\' \'\'\'LINES TERMINATED BY \\r\\n WIN 为\\r,LINUX 为\\n\'\'\' # " LOAD DATA LOCAL INFILE \'" + file_path + "\'"\ # + " INTO TABLE " + database_table_name \ # + " FIELDS TERMINATED BY \',\' " \ # + " OPTIONALLY ENCLOSED BY \'\"\' ESCAPED BY \'\"\' " \ # + " LINES TERMINATED BY \'\\r\\n\' " \ # + " IGNORE 5 LINES; " # 报1148错误,可以本地开启local_infile服务 # set global local_infile = \'ON\'; sql = \'\'\'LOAD DATA LOCAL INFILE \'{0}\' INTO TABLE {1} CHARACTER SET UTF8 FIELDS TERMINATED BY \',\' ENCLOSED BY \'\"\' ESCAPED BY \'\"\' LINES TERMINATED BY \'\\r\\n\' IGNORE 5 LINES (`symbol`, `date`, `opening_price`, `highest_price` , `lowest_price`, `closing_price`, `change`, `trading_volume`, `turnover`, `circulation_market_value`, `total_value`, `turnover_rate`, `post_resumption_price`, `report_type`, `report_time`, `PE_TTM`, `PS_TTM`, `PCF_TTM`, `PB`, `ex_duplicate_price`, `tradable_share`, `total_stock_issue`, `total_assets`, `current_asset`, `gross_liabilities`, `current_liabilities`, `total_operating_income`, `operating_income`, `total_operating_costs`, `operating_costs`, `operating_profit`, `net_profits`, `selling_expenses`, `administration_expenses`, `financial_expenses`, `net_cash_flow`, `NOCF`, `NICF`, `NFCF`, `ROE_TTM`);\'\'\'.format(file_path, database_table_name) db_excute(sql) # print(sql) # 将文件路径下的所有csv文件插入数据库 def to_mysql(infile_path, database_table_name): for filename in os.listdir(infile_path): print(\'正在上传:\', filename) # domain = os.path.abspath(infile_path) # 获取文件夹的路径,也可去掉 # file_path = os.path.join(domain, filename) # 文件的完整路径 file_path = infile_path + \'/\' + filename # print(\'file_path: \',file_path) csv2mysql(file_path, database_table_name) if __name__ == \'__main__\': # delblankline("sh600000.csv", "sh600000_.csv") # csv2bigcsv(infile_path, "all-groups.csv") # 创建新表 createtable_sql = \'\'\'CREATE TABLE IF NOT EXISTS `{0}` ( `id` INT NOT NULL AUTO_INCREMENT COMMENT \'ID\', `symbol` VARCHAR ( 10 ) DEFAULT NULL COMMENT \'股票代码\', `date` DATE DEFAULT NULL COMMENT \'日期\', `opening_price` FLOAT DEFAULT NULL COMMENT \'开盘价\', `highest_price` FLOAT DEFAULT NULL COMMENT \'最高价\', `lowest_price` FLOAT DEFAULT NULL COMMENT \'最低价\', `closing_price` FLOAT DEFAULT NULL COMMENT \'收盘价\', `change` FLOAT DEFAULT NULL COMMENT \'涨跌幅\', `trading_volume` BIGINT DEFAULT NULL COMMENT \'交易量\', `turnover` BIGINT DEFAULT NULL COMMENT \'交易额\', `circulation_market_value` BIGINT DEFAULT NULL COMMENT \'流通市值\', `total_value` BIGINT DEFAULT NULL COMMENT \'总市值\', `turnover_rate` FLOAT DEFAULT NULL COMMENT \'换手率\', `post_resumption_price` FLOAT DEFAULT NULL COMMENT \'后复权价格\', `report_type` VARCHAR ( 64 ) DEFAULT NULL COMMENT \'报告类型\', `report_time` DATE DEFAULT NULL COMMENT \'报告时间\', # 市盈率PE 市净率PB 市销率PS 市现率PCF `PE_TTM` FLOAT DEFAULT NULL COMMENT \'市盈率TTM\', `PS_TTM` FLOAT DEFAULT NULL COMMENT \'市销率TTM\', `PCF_TTM` FLOAT DEFAULT NULL COMMENT \'市现率TTM\', `PB` FLOAT DEFAULT NULL COMMENT \'市净率\', `ex_duplicate_price` FLOAT DEFAULT NULL COMMENT \'前复权价格\', `tradable_share` BIGINT DEFAULT NULL COMMENT \'流通股本\', `total_stock_issue` BIGINT DEFAULT NULL COMMENT \'总股本\', `total_assets` BIGINT DEFAULT NULL COMMENT \'总资产\', `current_asset` BIGINT DEFAULT NULL COMMENT \'流动资产\', `gross_liabilities` BIGINT DEFAULT NULL COMMENT \'总负债\', `current_liabilities` BIGINT DEFAULT NULL COMMENT \'流动负债\', `total_operating_income` BIGINT DEFAULT NULL COMMENT \'营业总收入\', `operating_income` BIGINT DEFAULT NULL COMMENT \'营业收入\', `total_operating_costs` BIGINT DEFAULT NULL COMMENT \'营业总成本\', `operating_costs` BIGINT DEFAULT NULL COMMENT \'营业成本\', `operating_profit` BIGINT DEFAULT NULL COMMENT \'营业利润\', `net_profits` BIGINT DEFAULT NULL COMMENT \'净利润\', `selling_expenses` BIGINT DEFAULT NULL COMMENT \'销售费用\', `administration_expenses` BIGINT DEFAULT NULL COMMENT \'管理费用\', `financial_expenses` BIGINT DEFAULT NULL COMMENT \'财务费用\', `net_cash_flow` BIGINT DEFAULT NULL COMMENT \'净现金流\', `NOCF` BIGINT DEFAULT NULL COMMENT \'经营活动净现金流operating activities\', `NICF` BIGINT DEFAULT NULL COMMENT \'投资活动净现金流investment activities\', `NFCF` BIGINT DEFAULT NULL COMMENT \'筹资活动净现金流financing activities\', `ROE_TTM` BIGINT DEFAULT NULL COMMENT \'净资产收益率TTM Net asset yield\', PRIMARY KEY ( `id` ) ) ENGINE = INNODB DEFAULT CHARSET = UTF8MB4;\'\'\'.format(database_table_name) db_excute(createtable_sql) # 创建新表 print(createtable_sql) to_mysql(infile_path, database_table_name) # 导入infohash数据到mysql xl_log_analysis 表