python 批量生成xml标记文件,连通域坐标分割

#!/usr/bin/python
# -*- coding=utf-8 -*-
# author : Manuel
# date: 2019-05-15

from xml.etree import ElementTree as ET
import numpy as np
from skimage import data,filters,segmentation,measure,morphology,color
from scipy.misc import imread
import os
from os import getcwd


IMAGES_LIST=os.listdir('ls')#图片路径

#连通域分割,返回连通域坐标列表
def connected_domain_position_get(image):
    coordinates_list=[]#创建坐标列表
    thresh = filters.threshold_otsu(image)  # 阈值分割,自动返回一个阈值
    bw = morphology.closing(image > thresh,
                            morphology.square(3))  # (image > thresh, morphology.square(3)) #闭运算#将0,1转换成bool
    cleared = bw.copy()  # 复制
    segmentation.clear_border(cleared)  # 清除与边界相连的目标物
    label_image = measure.label(cleared)  # 连通区域标记
    borders = np.logical_xor(bw, cleared)  # 逻辑异或
    label_image[borders] = -1  # ?
    # image_label_overlay = color.label2rgb(label_image, image=image)  # 不同标记用不同颜色显示
    for region in measure.regionprops(label_image):  # 循环得到每一个连通区域属性集
        # 忽略小区域
        if region.area < 1000:
            continue
        # print(region.bbox)
        # 绘制外包矩形
        minr, minc, maxr, maxc = region.bbox
        # rect = mpatches.Rectangle((minc - 10, minr - 10), maxc - minc + 20, maxr - minr + 20,
        #                           fill=False, edgecolor='red',
        #                           linewidth=2)  # mpatches.Rectangle(矩形左上顶点坐标(x,y), width, height)
        left = minc - 10
        upper = minr - 10
        right = maxc + 10
        lower = maxr + 10
        coordinates_list.append([left,upper,right,lower])#将每组连通域坐标添加进坐标列表
    return coordinates_list#返回连通域坐标列表

#创建一级分支object
def create_object(root,xi,yi,xa,ya):#参数依次,树根,xmin,ymin,xmax,ymax
    #创建一级分支object
    _object=ET.SubElement(root,'object')
    #创建二级分支
    name=ET.SubElement(_object,'name')
    name.text='AreaMissing'
    pose=ET.SubElement(_object,'pose')
    pose.text='Unspecified'
    truncated=ET.SubElement(_object,'truncated')
    truncated.text='0'
    difficult=ET.SubElement(_object,'difficult')
    difficult.text='0'
    #创建bndbox
    bndbox=ET.SubElement(_object,'bndbox')
    xmin=ET.SubElement(bndbox,'xmin')
    xmin.text='%s'%xi
    ymin = ET.SubElement(bndbox, 'ymin')
    ymin.text = '%s'%yi
    xmax = ET.SubElement(bndbox, 'xmax')
    xmax.text = '%s'%xa
    ymax = ET.SubElement(bndbox, 'ymax')
    ymax.text = '%s'%ya

#创建xml文件
def create_tree(image_name):
    global annotation
    # 创建树根annotation
    annotation = ET.Element('annotation')
    #创建一级分支folder
    folder = ET.SubElement(annotation,'folder')
    #添加folder标签内容
    folder.text=('ls')

    #创建一级分支filename
    filename=ET.SubElement(annotation,'filename')
    filename.text=image_name.strip('.jpg')

    #创建一级分支path
    path=ET.SubElement(annotation,'path')
    path.text=getcwd()+'/ls/%s'%image_name#用于返回当前工作目录

    #创建一级分支source
    source=ET.SubElement(annotation,'source')
    #创建source下的二级分支database
    database=ET.SubElement(source,'database')
    database.text='Unknown'

    #创建一级分支size
    size=ET.SubElement(annotation,'size')
    #创建size下的二级分支图像的宽、高及depth
    width=ET.SubElement(size,'width')
    width.text='512'
    height=ET.SubElement(size,'height')
    height.text='384'
    depth = ET.SubElement(size,'depth')
    depth.text = '3'

    #创建一级分支segmented
    segmented = ET.SubElement(annotation,'segmented')
    segmented.text = '0'




def main():
    for image_name in IMAGES_LIST:
        #只处理jpg文件
        if image_name.endswith('jpg'):
            #将图像通过连通域分割,得到连通域坐标列表,该列表的形式[[a,b,c,d],[e,f,g,h]...,]
            image = color.rgb2gray(imread(os.path.join(r'./ls', image_name)))
            coordinates_list = connected_domain_position_get(image)
            create_tree(image_name)

            for coordinate_list in coordinates_list:
                create_object(annotation, coordinate_list[0], coordinate_list[1], coordinate_list[2], coordinate_list[3])
                # if coordinates_list==[]:
                #     break
            # 将树模型写入xml文件
            tree = ET.ElementTree(annotation)
            tree.write('ls/%s.xml' % image_name.strip('.jpg'))



if __name__ == '__main__':
    main()

注:xml中所有值必须是字符串,否则报错