目标检测
深度学习和目标检测系列教程 18-凯发ag旗舰厅登录网址下载
@author:runsen
pascal voc(the pascal visual object classes)是一个世界级的计算机视觉挑战赛,第一届比赛在2005年举办,随后一年举办一次,直到2012年最后一次。pascal的全称是pattern analysis, statistical modelling and computational learning,官方地址是 http://host.robots.ox.ac.uk/pascal/voc/
一个典型的xml标签文件如下
<annotation><folder>images</folder><filename>003707.jpg</filename><path>g:\sz210628\images\003707.jpg</path><source><database>unknown</database></source><size><width>1920</width><height>1080</height><depth>3</depth></size><segmented>0</segmented><object><name>航空器头</name><pose>unspecified</pose><truncated>0</truncated><difficult>0</difficult><bndbox><xmin>827</xmin><ymin>224</ymin><xmax>916</xmax><ymax>333</ymax></bndbox></object> </annotation>- folder:所属文件夹
- filename:文件名
- database:数据库名
- annotation:标记文件格式
- size:图像尺寸,width宽、height高,depth通道数目
- segmented:分割
- object: 表示一个目标,name标签名、pose拍摄角度,有front、rear、left、right和unspecified、truncated是否被截断也就是图片中是否包含完整目标、
- difficult检测难易程度,1表示是、0表示否
- bndbox:目标所在的位置,用xmin、ymin、xmax、ymax来表示
yolo要求每张图片对应一个txt标注文件。
每行一个标注对象,分别是:class_id x y width height。
class_id: 从0到(classes-1)的整数。
x y width height: 相对于图像的宽度和高度的浮点值,它可以等于(0.0到1.0]。例如:x=pixel_x/image_width, height=pixel_height/image_height
x y: 是标注对象的中心点。
0 0.750000 0.501111 0.315000 0.993333下面脚本读取 pascalvoc xml 文件,并将它们转换为 yolo txt 文件。
#coding=gbk import os import xml.etree.elementtree as et# xml voc标注的类 classes=['person','rider','car','bus','truck','bike','motor','tl_green','tl_red','tl_yellow','tl_none','t_sign','train']def convert(size,box):# voc to yolo formatdw = 1./(size[0])dh = 1./(size[1])x = (box[0] box[1])/2.0 - 1y = (box[2] box[3])/2.0 - 1w = box[1] - box[0]h = box[3] - box[2]x = x*dww = w*dwy = y*dhh = h*dhreturn (x,y,w,h)def converting_annotation(ann_file,yolo_out_str):# ann_file 标注的所有xml文件# out_str yolo格式的txt 文件夹for ann in ann_file:try:txt_file= ann.split('.')[0]'.txt'in_file=open(ann_dir "/" ann,encoding="utf-8")out_file =open(yolo_out_str "\\"txt_file, 'w',encoding="utf-8")tree=et.parse(in_file)root = tree.getroot()size = root.find('size')w = int(size.find('width').text)h = int(size.find('height').text)for obj in root.iter('object'):cls=obj.find('name').textif cls not in classes:continuecls_id=classes.index(cls)xmlbox = obj.find('bndbox')b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))data = convert((w,h), b)out_file.write( yolo_out_str str(cls_id) " " " ".join([str(a) for a in data]) '\n')except exception as e:print(ann)continueif __name__ == '__main__':ann_dir= "annotations"ann_file=os.listdir(ann_dir)ann_file.sort()# "label" yolo txt 存放的位置converting_annotation(ann_file,"label")下面脚本读取 yolo txt 文件,并将它们转换为pascalvoc xml 文件。
#coding=gbk import csv import os from lxml import etree from pil import imageimg_path = r"images\trains" fw = os.listdir(img_path)save_path = r'labels\xml' # keep it blank# yolo txt_folder txt_folder = r'labels\trains'labels=['person','rider','car','bus','truck','bike','motor','tl_green','tl_red','tl_yellow','tl_none','t_sign','train']def csvread(fn):with open(fn, 'r') as csvfile:list_arr = []reader = csv.reader(csvfile, delimiter=' ')for row in reader:list_arr.append(row)return list_arrdef convert_label(txt_file):global labelfor i in range(len(labels)):if txt_file[0] == str(i):label = labels[i]return labelreturn labeldef extract_coor(txt_file, img_width, img_height):x_rect_mid = float(txt_file[1])y_rect_mid = float(txt_file[2])width_rect = float(txt_file[3])height_rect = float(txt_file[4])x_min_rect = ((2 * x_rect_mid * img_width) - (width_rect * img_width)) / 2x_max_rect = ((2 * x_rect_mid * img_width) (width_rect * img_width)) / 2y_min_rect = ((2 * y_rect_mid * img_height) -(height_rect * img_height)) / 2y_max_rect = ((2 * y_rect_mid * img_height) (height_rect * img_height)) / 2return x_min_rect, x_max_rect, y_min_rect, y_max_rectfor line in fw:root = etree.element("annotation")# try debug to check your pathimg_style = img_path.split('/')[-1]img_name = lineimage_info = img_path "\\" lineimg_txt_root = txt_folder "\\" line[:-4]txt = ".txt"txt_path = img_txt_root txttxt_file = csvread(txt_path)####################################### read the image informationimg_size = image.open(image_info).sizeimg_width = img_size[0]img_height = img_size[1]img_depth = image.open(image_info).layers######################################folder = etree.element("folder")folder.text = "%s" % (img_style)filename = etree.element("filename")filename.text = "%s" % (img_name)path = etree.element("path")path.text = "%s" % (img_path)source = etree.element("source")##################source - element##################source_database = etree.subelement(source, "database")source_database.text = "unknown"####################################################size = etree.element("size")####################size - element##################image_width = etree.subelement(size, "width")image_width.text = "%d" % (img_width)image_height = etree.subelement(size, "height")image_height.text = "%d" % (img_height)image_depth = etree.subelement(size, "depth")image_depth.text = "%d" % (img_depth)####################################################segmented = etree.element("segmented")segmented.text = "0"root.append(folder)root.append(filename)root.append(path)root.append(source)root.append(size)root.append(segmented)for ii in range(len(txt_file)):label = convert_label(txt_file[ii][0])x_min_rect, x_max_rect, y_min_rect, y_max_rect = extract_coor(txt_file[ii], img_width, img_height)object = etree.element("object")####################object - element##################name = etree.subelement(object, "name")name.text = "%s" % (label)pose = etree.subelement(object, "pose")pose.text = "unspecified"truncated = etree.subelement(object, "truncated")truncated.text = "0"difficult = etree.subelement(object, "difficult")difficult.text = "0"bndbox = etree.subelement(object, "bndbox")#####sub_sub########xmin = etree.subelement(bndbox, "xmin")xmin.text = "%d" % (x_min_rect)ymin = etree.subelement(bndbox, "ymin")ymin.text = "%d" % (y_min_rect)xmax = etree.subelement(bndbox, "xmax")xmax.text = "%d" % (x_max_rect)ymax = etree.subelement(bndbox, "ymax")ymax.text = "%d" % (y_max_rect)#####sub_sub########root.append(object)####################################################file_output = etree.tostring(root, pretty_print=true, encoding='utf-8')xml_dir = save_path "\\" '%s.xml' % (img_name[:-4])print(xml_dir)ff = open(xml_dir, 'w', encoding="utf-8")ff.write(file_output.decode('utf-8'))在处理 voc数据集,往往需要从voc数据集获取所有标签的所有类别数,具体的脚本如下所示:
#coding=gbkimport xml.dom.minidom as xmldom import os#voc数据集获取所有标签的所有类别数 annotation_path="annotations"annotation_names=[os.path.join(annotation_path,i) for i in os.listdir(annotation_path)]labels = list() for names in annotation_names:xmlfilepath = namesdomobj = xmldom.parse(xmlfilepath)# 得到元素对象elementobj = domobj.documentelement#获得子标签subelementobj = elementobj.getelementsbytagname("object")for s in subelementobj:label=s.getelementsbytagname("name")[0].firstchild.dataif label not in labels:labels.append(label)print(labels)在进行yolo模型训练,需要统计每一个类别的数量,来寻找哪些数据标签的样本样例比较少,怎么处理样本不平衡问题?
下面代码是样本类别统计计算的代码。
import os import xml.etree.elementtree as et from pil import imagedef parse_obj(xml_path, filename):tree = et.parse(xml_path filename)objects = []for obj in tree.findall('object'):obj_struct = {}obj_struct['name'] = obj.find('name').textobjects.append(obj_struct)return objectsdef read_image(image_path, filename):im = image.open(image_path filename)w = im.size[0]h = im.size[1]area = w * him_info = [w, h, area]return im_infoif __name__ == '__main__':xml_path = 'annotations\\'filenamess = os.listdir(xml_path)filenames = []for name in filenamess:name = name.replace('.xml', '')filenames.append(name)recs = {}obs_shape = {}classnames = []num_objs = {}obj_avg = {}for i, name in enumerate(filenames):recs[name] = parse_obj(xml_path, name '.xml')for name in filenames:for object in recs[name]:if object['name'] not in num_objs.keys():num_objs[object['name']] = 1else:num_objs[object['name']] = 1if object['name'] not in classnames:classnames.append(object['name'])for name in classnames:print('{}:{}个'.format(name, num_objs[name]))print('信息统计算完毕。')总结
以上是凯发ag旗舰厅登录网址下载为你收集整理的深度学习和目标检测系列教程 18-300:关于yolo、voc格式标签转化问题的全部内容,希望文章能够帮你解决所遇到的问题。
如果觉得凯发ag旗舰厅登录网址下载网站内容还不错,欢迎将凯发ag旗舰厅登录网址下载推荐给好友。
- 上一篇:
- 下一篇: