diff --git a/README.md b/README.md index b97dda3..277ed06 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # XmlToTxt ImageNet file xml format to [Darknet](https://github.com/pjreddie/darknet) text format. +Works well with directories and subdirectories. ### Installation ```bash @@ -41,3 +42,4 @@ Output text file. ### Motivation I used [Darknet](https://github.com/pjreddie/darknet) for real-time object detection and classification. Sometimes you need to collect your own training dataset for train your model. I collected training dataset images and fine awesome [tool](https://github.com/tzutalin/labelImg) for labeling images. But it generates xml files. So I needed to implement tool which translates from ImageNet xml format to Darknet text format. +Also compatible with latest [YOLOv5](https://github.com/ultralytics/yolov5) by Ultralytics. \ No newline at end of file diff --git a/objectmapper.py b/objectmapper.py index 1d0bbc1..bfc01b1 100644 --- a/objectmapper.py +++ b/objectmapper.py @@ -1,5 +1,5 @@ import logging - +import os import declxml as xml @@ -25,14 +25,16 @@ def __init__(self): xml.string("filename") ]) - def bind(self, xml_file_path): - return xml.parse_from_file(self.processor, xml_file_path=xml_file_path) + def bind(self, xml_file_path, xml_dir): + ann = xml.parse_from_file(self.processor, xml_file_path=os.path.join(xml_dir, xml_file_path)) + ann.filename = xml_file_path + return ann - def bind_files(self, xml_file_paths): + def bind_files(self, xml_file_paths, xml_dir): result = [] for xml_file_path in xml_file_paths: try: - result.append(self.bind(xml_file_path=xml_file_path)) + result.append(self.bind(xml_file_path=xml_file_path, xml_dir=xml_dir)) except Exception as e: logging.error("%s", e.args) return result diff --git a/out/image-0000001.txt b/out/subdir/image-0000001.txt similarity index 100% rename from out/image-0000001.txt rename to out/subdir/image-0000001.txt diff --git a/out/image-0000003.txt b/out/subdir/image-0000003.txt similarity index 100% rename from out/image-0000003.txt rename to out/subdir/image-0000003.txt diff --git a/reader.py b/reader.py index 109f5c0..bc54647 100644 --- a/reader.py +++ b/reader.py @@ -7,9 +7,12 @@ def __init__(self, xml_dir): def get_xml_files(self): xml_filenames = [] - for xml_filename in os.listdir(self.xml_dir): - if xml_filename.endswith(".xml"): - xml_filenames.append(os.path.join(self.xml_dir, xml_filename)) + for root, subdirectories, files in os.walk(self.xml_dir): + for filename in files: + if filename.endswith(".xml"): + file_path = os.path.join(root, filename) + file_path = os.path.relpath(file_path, start=self.xml_dir) + xml_filenames.append(file_path) return xml_filenames @staticmethod diff --git a/transformer.py b/transformer.py index 754846a..a9f1cea 100644 --- a/transformer.py +++ b/transformer.py @@ -14,12 +14,15 @@ def transform(self): xml_files = reader.get_xml_files() classes = reader.get_classes() object_mapper = ObjectMapper() - annotations = object_mapper.bind_files(xml_files) + annotations = object_mapper.bind_files(xml_files, xml_dir=self.xml_dir) self.write_to_txt(annotations, classes) def write_to_txt(self, annotations, classes): for annotation in annotations: - with open(os.path.join(self.out_dir, self.darknet_filename_format(annotation.filename)), "w+") as f: + output_path = os.path.join(self.out_dir, self.darknet_filename_format(annotation.filename)) + if not os.path.exists(os.path.dirname(output_path)): + os.makedirs(os.path.dirname(output_path)) + with open(output_path, "w+") as f: f.write(self.to_darknet_format(annotation, classes)) def to_darknet_format(self, annotation, classes): diff --git a/xml/image-0000001.xml b/xml/subdir/image-0000001.xml similarity index 100% rename from xml/image-0000001.xml rename to xml/subdir/image-0000001.xml diff --git a/xml/image-0000003.xml b/xml/subdir/image-0000003.xml similarity index 100% rename from xml/image-0000003.xml rename to xml/subdir/image-0000003.xml