python实现Bencode解码方法

近期搞项目中遇到Bencode解码的问题,就用Py写了个Bencode解码的代码。作为笔记保存参考。

BEncoding是BitTorrent用在传输数据结构的编码方式,这种编码方式支持四种类型的数据:string, int, Dictionary<string, object>, List<object>,各自的编码规则如下:

    • string类型的编码格式为[length]:[string]。以字符串的长度开头,加一个冒号,并以字符串内容结束。示例:"abc" => 3:abc
    • int类型的编码格式为i[int]e。以i开头,加上数字,以e结尾。 示例:123 => i123e
    • List<object>类型的编码格式为l[object]e。以l开头,加上列表中各个元素的编码(元素的类型同样为BEncoding支持的类型),以e结尾。 示例:List<"abc", 123> => l3:abci123ee
    • Dictionary<string, object>类型的编码格式为d[Key-Value Pair]e。以d开头,加上字典中每个键值对的编码,以e结尾。
# 解析List<object>类型,例:l3:abci123ee => List<"abc", 123>
def decode_list(str,p):
    l = len(str)
    # print(l)
    # print(p)
    list = []
    while p < l:
        if str[p + 1].isdigit():
            f_m = str.index(":",p + 1,l)
            f_n = int(str[p + 1:f_m])
            f_e = f_m + f_n
            data_str = str[f_m + 1:f_e + 1]
            list.append(data_str)
            p = f_e
        elif str[p + 1] == "i":
            end = str.index("e", p + 1, l)
            data_int = str[p + 2: int(end)]
            list.append(data_int)
            p = end
        elif str[p + 1] == "e":
            p = p + 2
            list.append("p")
            list.append(p)
            break
        elif str[p + 1] == "l":
            p = p + 1
    return list
 
# 解析Dictionary<string, object>类型,例:d4:name11:create chen3:agei23ee => Dictionary<{"name":"create chen"},{"age":23}>
def decode_dict(str,p):
    test = decode_list(str,p)
 
    data_seq = []
    data_val = []
 
    i = 0
    while i < len(test):
        data_seq.append(test[i])
        data_val.append(test[i + 1])
        i = i + 2
 
    data_dict = dict.fromkeys(data_seq)
 
    i = 0
    while i < len(test) / 2:
        data_dict[data_seq[i]] = data_val[i]
        i = i + 1
 
    return data_dict
 
data = "d4:name11:create chen3:agei23eel3:abci123eei23e3:age"
print(data)
# 定义全局指针,追踪所查位置
global p
l = len(data)
p = 0
while p < l:
    # 判断是否为dict型
    if data[p] == "d":
        data_dict = decode_dict(data,p)
        p = int(data_dict["p"])
        del data_dict["p"]
        print(data_dict)
        f = open("text.txt","a+")
        for k, v in data_dict.items():
            f.write(k + ":" + str(v))
            f.write("\n")
        f.close()
    # 判断是否为list型
    elif data[p] == "l":
        data_list = decode_list(data,p)
        p = int(data_list[-1])
        del data_list[-1]
        del data_list[-1]
        print(data_list)
        f = open("text.txt", "a+")
        for i in data_list:
            f.write(i)
            f.write("\n")
        f.close()
    # 判断是否为str型,解析
    elif data[p] == "i":
        f = data.index("e",p,l)
        data_int = data[p + 1:f]
        p = f + 1
        print(data_int)
        f = open("text.txt", "a+")
        f.write(data_int)
        f.write("\n")
        f.close()
    # 判断是否为int型,解析(为了方便,暂未将其结果转为int型,而是str型,例:“123”
    elif data[p].isdigit():
        f = data.index(":",p,l)
        print(data[f])
        data_str = data[f + 1:int(data[p:f]) + f + 1]
        print(data_str)
        p = int(data[p:f]) + f + 1
        f = open("text.txt", "a+")
        f.write(data_str)
        f.write("\n")
        f.close()
print("It's done")