递归回溯涉及到的变量变化
def parse_xml_to_dict(self, xml):
if len(xml) == 0: # 遍历到底层,直接返回tag对应的信息
return {xml.tag: xml.text}
result = {}
print("bbb")
for child in xml:
print("aaa")
child_result = parse_xml_to_dict(child) # 递归遍历标签信息
print(result)
# if child.tag != 'object':
result[child.tag] = child_result[child.tag]
print(result)
# else:
# if child.tag not in result: # 因为object可能有多个,所以需要放入列表里
# result[child.tag] = []
# result[child.tag].append(child_result[child.tag])
return {xml.tag: result}
输入数据:
<annotation>
<folder>VOC2007</folder>
<filename>000001.jpg</filename>
<source>
<database>The VOC2007 Database</database>
<annotation>PASCAL VOC2007</annotation>
<image>flickr</image>
<flickrid>341012865</flickrid>
</source>
<owner>
<flickrid>Fried Camels</flickrid>
<name>Jinky the Fruit Bat</name>
</owner>
<size>
<width>353</width>
<height>500</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>dog</name>
<pose>Left</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>48</xmin>
<ymin>240</ymin>
<xmax>195</xmax>
<ymax>371</ymax>
</bndbox>
</object>
<object>
<name>person</name>
<pose>Left</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>8</xmin>
<ymin>12</ymin>
<xmax>352</xmax>
<ymax>498</ymax>
</bndbox>
</object>
</annotation>
输出如下:
bbb
aaa
{}
{'folder': 'VOC2007'}
aaa
{'folder': 'VOC2007'}
{'folder': 'VOC2007', 'filename': '000001.jpg'}
aaa
bbb
aaa
{}
{'database': 'The VOC2007 Database'}
aaa
{'database': 'The VOC2007 Database'}
{'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007'}
aaa
{'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007'}
{'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr'}
aaa
{'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr'}
{'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr', 'flickrid': '341012865'}
{'folder': 'VOC2007', 'filename': '000001.jpg'}
{'folder': 'VOC2007', 'filename': '000001.jpg', 'source': {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr', 'flickrid': '341012865'}}
aaa
bbb
aaa
{}
{'flickrid': 'Fried Camels'}
aaa
{'flickrid': 'Fried Camels'}
{'flickrid': 'Fried Camels', 'name': 'Jinky the Fruit Bat'}
{'folder': 'VOC2007', 'filename': '000001.jpg', 'source': {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr', 'flickrid': '341012865'}}
{'folder': 'VOC2007', 'filename': '000001.jpg', 'source': {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr', 'flickrid': '341012865'}, 'owner': {'flickrid': 'Fried Camels', 'name': 'Jinky the Fruit Bat'}}
aaa
bbb
aaa
{}
{'width': '353'}
aaa
{'width': '353'}
{'width': '353', 'height': '500'}
aaa
{'width': '353', 'height': '500'}
{'width': '353', 'height': '500', 'depth': '3'}
{'folder': 'VOC2007', 'filename': '000001.jpg', 'source': {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr', 'flickrid': '341012865'}, 'owner': {'flickrid': 'Fried Camels', 'name': 'Jinky the Fruit Bat'}}
{'folder': 'VOC2007', 'filename': '000001.jpg', 'source': {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr', 'flickrid': '341012865'}, 'owner': {'flickrid': 'Fried Camels', 'name': 'Jinky the Fruit Bat'}, 'size': {'width': '353', 'height': '500', 'depth': '3'}}
aaa
{'folder': 'VOC2007', 'filename': '000001.jpg', 'source': {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr', 'flickrid': '341012865'}, 'owner': {'flickrid': 'Fried Camels', 'name': 'Jinky the Fruit Bat'}, 'size': {'width': '353', 'height': '500', 'depth': '3'}}
{'folder': 'VOC2007', 'filename': '000001.jpg', 'source': {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr', 'flickrid': '341012865'}, 'owner': {'flickrid': 'Fried Camels', 'name': 'Jinky the Fruit Bat'}, 'size': {'width': '353', 'height': '500', 'depth': '3'}, 'segmented': '0'}
aaa
bbb
aaa
{}
{'name': 'dog'}
aaa
{'name': 'dog'}
{'name': 'dog', 'pose': 'Left'}
aaa
{'name': 'dog', 'pose': 'Left'}
{'name': 'dog', 'pose': 'Left', 'truncated': '1'}
aaa
{'name': 'dog', 'pose': 'Left', 'truncated': '1'}
{'name': 'dog', 'pose': 'Left', 'truncated': '1', 'difficult': '0'}
aaa
bbb
aaa
{}
{'xmin': '48'}
aaa
{'xmin': '48'}
{'xmin': '48', 'ymin': '240'}
aaa
{'xmin': '48', 'ymin': '240'}
{'xmin': '48', 'ymin': '240', 'xmax': '195'}
aaa
{'xmin': '48', 'ymin': '240', 'xmax': '195'}
{'xmin': '48', 'ymin': '240', 'xmax': '195', 'ymax': '371'}
{'name': 'dog', 'pose': 'Left', 'truncated': '1', 'difficult': '0'}
{'name': 'dog', 'pose': 'Left', 'truncated': '1', 'difficult': '0', 'bndbox': {'xmin': '48', 'ymin': '240', 'xmax': '195', 'ymax': '371'}}
{'folder': 'VOC2007', 'filename': '000001.jpg', 'source': {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr', 'flickrid': '341012865'}, 'owner': {'flickrid': 'Fried Camels', 'name': 'Jinky the Fruit Bat'}, 'size': {'width': '353', 'height': '500', 'depth': '3'}, 'segmented': '0'}
{'folder': 'VOC2007', 'filename': '000001.jpg', 'source': {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr', 'flickrid': '341012865'}, 'owner': {'flickrid': 'Fried Camels', 'name': 'Jinky the Fruit Bat'}, 'size': {'width': '353', 'height': '500', 'depth': '3'}, 'segmented': '0', 'object': {'name': 'dog', 'pose': 'Left', 'truncated': '1', 'difficult': '0', 'bndbox': {'xmin': '48', 'ymin': '240', 'xmax': '195', 'ymax': '371'}}}
aaa
bbb
aaa
{}
{'name': 'person'}
aaa
{'name': 'person'}
{'name': 'person', 'pose': 'Left'}
aaa
{'name': 'person', 'pose': 'Left'}
{'name': 'person', 'pose': 'Left', 'truncated': '1'}
aaa
{'name': 'person', 'pose': 'Left', 'truncated': '1'}
{'name': 'person', 'pose': 'Left', 'truncated': '1', 'difficult': '0'}
aaa
bbb
aaa
{}
{'xmin': '8'}
aaa
{'xmin': '8'}
{'xmin': '8', 'ymin': '12'}
aaa
{'xmin': '8', 'ymin': '12'}
{'xmin': '8', 'ymin': '12', 'xmax': '352'}
aaa
{'xmin': '8', 'ymin': '12', 'xmax': '352'}
{'xmin': '8', 'ymin': '12', 'xmax': '352', 'ymax': '498'}
{'name': 'person', 'pose': 'Left', 'truncated': '1', 'difficult': '0'}
{'name': 'person', 'pose': 'Left', 'truncated': '1', 'difficult': '0', 'bndbox': {'xmin': '8', 'ymin': '12', 'xmax': '352', 'ymax': '498'}}
{'folder': 'VOC2007', 'filename': '000001.jpg', 'source': {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr', 'flickrid': '341012865'}, 'owner': {'flickrid': 'Fried Camels', 'name': 'Jinky the Fruit Bat'}, 'size': {'width': '353', 'height': '500', 'depth': '3'}, 'segmented': '0', 'object': {'name': 'dog', 'pose': 'Left', 'truncated': '1', 'difficult': '0', 'bndbox': {'xmin': '48', 'ymin': '240', 'xmax': '195', 'ymax': '371'}}}
{'folder': 'VOC2007', 'filename': '000001.jpg', 'source': {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr', 'flickrid': '341012865'}, 'owner': {'flickrid': 'Fried Camels', 'name': 'Jinky the Fruit Bat'}, 'size': {'width': '353', 'height': '500', 'depth': '3'}, 'segmented': '0', 'object': {'name': 'person', 'pose': 'Left', 'truncated': '1', 'difficult': '0', 'bndbox': {'xmin': '8', 'ymin': '12', 'xmax': '352', 'ymax': '498'}}}
{'folder': 'VOC2007', 'filename': '000001.jpg', 'source': {'database': 'The VOC2007 Database', 'annotation': 'PASCAL VOC2007', 'image': 'flickr', 'flickrid': '341012865'}, 'owner': {'flickrid': 'Fried Camels', 'name': 'Jinky the Fruit Bat'}, 'size': {'width': '353', 'height': '500', 'depth': '3'}, 'segmented': '0', 'object': {'name': 'person', 'pose': 'Left', 'truncated': '1', 'difficult': '0', 'bndbox': {'xmin': '8', 'ymin': '12', 'xmax': '352', 'ymax': '498'}}}
注意,for循环相当于遍历同一层节点,比如这里就是:<folder>
,<filename>
,<source>
,<owner>
等
当
<folder>
遍历完之后,没有孩子节点了,因此回溯,注意此时是回溯到for循环下面(print(“aaa”)的地方),此时result是<folder>
遍历完之后的值,因为result是在for循环外面定义的,同理<filename>
也是当走到
<source>
的时候,他有子孩子,这个时候就会递归调用function,这个时候result就为空了,当这个for循环执行完之后,*有一个return,他会把result返回到<source>
入口处,就是子孩子的result作为返回值给了父节点<source>
child_result = parse_xml_to_dict(child) # 递归遍历标签信息
child_result就存了子孩子的result,同时result恢复为父节点的result