python 调用hive查询实现类似存储过程

需求:数据仓库中所有表的定义结构保存到新的文件中,保存后类似下面数据,重复的数据只保留7月份即可

****************ods_log_info*****************

lid string

uid string

mb_uid string

operation string

module string

result string

ts string

remark1 string

remark2 string

remark3 string

****************ods_mbportal_201407*****************

data_time_thread string

data_module string

data_operation string

data_result string

data_ipaddess string

day string

****************************************************************************

#!/usr/bin/env python

import sys

import re

from hive_service import ThriftHive

from hive_service.ttypes import HiveServerException

from thrift import Thrift

from thrift.transport import TSocket

from thrift.transport import TTransport

from thrift.protocol import TBinaryProtocol

try:

transport = TSocket.TSocket('localhost', 10000)

transport = TTransport.TBufferedTransport(transport)

protocol = TBinaryProtocol.TBinaryProtocol(transport)

client = ThriftHive.Client(protocol)

transport.open()

query = '''

show tables '''

client.execute(query)

row = client.fetchAll()

fp = open(r"dw_struct.txt","w")

p = re.compile("201[34][01][0-6|8-9]")

print "Excuting ZZZZZZZZZZ"

for line in row:

m = p.search(line)

if m == None:

qq = "desc " + line

client.execute(qq)

result = client.fetchAll()

fp.write("****************" + line + "*****************\n")

for resLine in result:

if not resLine.startswith("col"):

fp.write(resLine + "\n")

fp.write("\n")

fp.close()

print "Excute OK"

transport.close()

except Thrift.TException, tx:

print '%s' % (tx.message)