Skip to content

Commit

Permalink
Fixed bug, refactored get_metadata, added and tested successfully has…
Browse files Browse the repository at this point in the history
…hing of files inside zip, linting, styling, myping
  • Loading branch information
atomprobe-tc committed Dec 19, 2023
1 parent a065978 commit 97c7f8a
Show file tree
Hide file tree
Showing 11 changed files with 366 additions and 320 deletions.
6 changes: 3 additions & 3 deletions imgs.batch.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_imgs/ikz_robert/"
datasource="../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_imgs/axon/"

# comments is detector mode
examples="kit/FeMoOx_AntiA_04_1k5x_CN.tif"
Expand All @@ -9,9 +9,9 @@ examples="ikz_martin/ALN_baoh_021.tif" # T2
examples="ikz_robert/T3_image.tif"
examples="ikz_robert/ETD_image.tif" # ETD
examples="ikz_martin/NavCam_normal_vis_light_ccd.tif" # NavCam

examples="0c8nA_3deg_003_AplusB_test.tif ALN_baoh_021.tif T3_image.tif ETD_image.tif NavCam_normal_vis_light_ccd.tif"

examples="axon/20210426T224437.049Raw0.png" #axon
examples="ReductionOfFeOx.zip"

for example in $examples; do
echo $example
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ def __init__(self):
self.tmp: Dict = {}
for entry in NX_IMAGE_REAL_SPACE_SET_HDF_PATH:
if entry.endswith("-field") is True:
self.tmp[entry[0:len(entry)-len("-field")]] = NxObject(eqv_hdf="dataset")
self.tmp[entry[0:len(entry) - len("-field")]] = NxObject(eqv_hdf="dataset")
elif entry.endswith("-attribute") is True:
self.tmp[entry[0:len(entry)-len("-attribute")]] = NxObject(eqv_hdf="attribute")
self.tmp[entry[0:len(entry) - len("-attribute")]] = NxObject(eqv_hdf="attribute")
else:
self.tmp[entry[0:len(entry)-len("-group")]] = NxObject(eqv_hdf="group")
self.tmp[entry[0:len(entry) - len("-group")]] = NxObject(eqv_hdf="group")
2 changes: 1 addition & 1 deletion pynxtools/dataconverter/readers/em/concepts/nxs_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self,
if (unit is not None) and (unit == ""):
raise ValueError(f"Value for argument unit needs to be a non-empty string !")
if (dtype is not None) and isinstance(dtype, type) is False:
raise ValueError(f"Value of argument dtype must not be None " \
raise ValueError(f"Value of argument dtype must not be None "
f" and a valid, ideally a numpy datatype !")
# self.doc = None # docstring
self.name = name # name of the field
Expand Down
10 changes: 5 additions & 5 deletions pynxtools/dataconverter/readers/em/concepts/nxs_spectrum_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
# pylint: disable=no-member,too-few-public-methods


from typing import Dict
from typing import Dict, List

from pynxtools.dataconverter.readers.em.concepts.nxs_object import NxObject


NX_SPECTRUM_SET_HDF_PATH = []
NX_SPECTRUM_SET_HDF_PATH: List = []
# this one needs an update !


Expand All @@ -34,8 +34,8 @@ def __init__(self):
self.tmp: Dict = {}
for entry in NX_SPECTRUM_SET_HDF_PATH:
if entry.endswith("-field") is True:
self.tmp[entry[0:len(entry)-len("-field")]] = NxObject(eqv_hdf="dataset")
self.tmp[entry[0:len(entry) - len("-field")]] = NxObject(eqv_hdf="dataset")
elif entry.endswith("-attribute") is True:
self.tmp[entry[0:len(entry)-len("-attribute")]] = NxObject(eqv_hdf="attribute")
self.tmp[entry[0:len(entry) - len("-attribute")]] = NxObject(eqv_hdf="attribute")
else:
self.tmp[entry[0:len(entry)-len("-group")]] = NxObject(eqv_hdf="group")
self.tmp[entry[0:len(entry) - len("-group")]] = NxObject(eqv_hdf="group")
3 changes: 2 additions & 1 deletion pynxtools/dataconverter/readers/em/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ def read(self,
# print("Create NeXus default plottable data...")
# em_default_plot_generator(template, 1)

if True is False:
run_block = False
if run_block is True:
nxs_plt = NxEmDefaultPlotResolver()
# if nxs_mtex is the sub-parser
resolved_path = nxs_plt.nxs_mtex_get_nxpath_to_default_plot(
Expand Down
4 changes: 2 additions & 2 deletions pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
for req in reqs:
if req not in fp[f"{src}/SPD"].attrs.keys(): # also check for shape
raise ValueError(f"Required attribute named {req} not found in {src}/SPD !")

nyxe = {"y": fp[f"{src}/SPD"].attrs["NumberOfLines"][0],
"x": fp[f"{src}/SPD"].attrs["NumberOfPoints"][0],
"e": fp[f"{src}/SPD"].attrs["NumberofChannels"][0]}
Expand All @@ -334,7 +334,7 @@ def parse_and_normalize_eds_spd(self, fp, src: str, ckey: str):
# thereby these EDAX energy count arrays are just some payload inside a set of compressed chunks
# without some extra logic to resolve the third (energy) dimension reading them can be super inefficient
# so let's read chunk-by-chunk to reuse chunk cache, hopefully...
chk_bnds = {"x": [], "y": []}
chk_bnds: Dict = {"x": [], "y": []}
chk_info = {"ny": nyxe["y"], "cy": fp[f"{src}/SPD"].chunks[0],
"nx": nyxe["x"], "cx": fp[f"{src}/SPD"].chunks[1]}
for dim in ["y", "x"]:
Expand Down
127 changes: 60 additions & 67 deletions pynxtools/dataconverter/readers/em/subparsers/image_png_protochips.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from typing import Dict
from PIL import Image
from zipfile import ZipFile
from collections import OrderedDict

from pynxtools.dataconverter.readers.em.subparsers.image_png_protochips_concepts import \
get_protochips_variadic_concept
Expand All @@ -34,28 +33,9 @@
import variadic_path_to_specific_path
from pynxtools.dataconverter.readers.em.subparsers.image_png_protochips_modifier import \
get_nexus_value
from pynxtools.dataconverter.readers.em.subparsers.image_base import \
ImgsBaseParser


def flatten_xml_to_dict(xml_content) -> dict:
# https://codereview.stackexchange.com/a/21035
# https://stackoverflow.com/questions/38852822/how-to-flatten-xml-file-in-python
def items():
for key, value in xml_content.items():
# nested subtree
if isinstance(value, dict):
for subkey, subvalue in flatten_xml_to_dict(value).items():
yield '{}.{}'.format(key, subkey), subvalue
# nested list
elif isinstance(value, list):
for num, elem in enumerate(value):
for subkey, subvalue in flatten_xml_to_dict(elem).items():
yield '{}.[{}].{}'.format(key, num, subkey), subvalue
# everything else (only leafs should remain)
else:
yield key, value
return OrderedDict(items())
from pynxtools.dataconverter.readers.em.subparsers.image_base import ImgsBaseParser
from pynxtools.dataconverter.readers.em.utils.xml_utils import flatten_xml_to_dict
from pynxtools.dataconverter.readers.shared.shared_utils import get_sha256_of_file_content


class ProtochipsPngSetSubParser(ImgsBaseParser):
Expand Down Expand Up @@ -99,8 +79,8 @@ def check_if_zipped_png_protochips(self):
try:
nparr = np.array(png)
self.png_info[file] = np.shape(nparr)
except:
raise ValueError(f"Loading image data in-place from {self.file_path}:{file} failed !")
except IOError:
print(f"Loading image data in-place from {self.file_path}:{file} failed !")
if method == "smart": # knowing where to hunt width and height in PNG metadata
# https://dev.exiv2.org/projects/exiv2/wiki/The_Metadata_in_PNG_files
magic = fp.read(8)
Expand All @@ -125,6 +105,53 @@ def check_if_zipped_png_protochips(self):
print("All tests passed successfully")
self.supported = True

def get_xml_metadata(self, file, fp):
try:
fp.seek(0)
with Image.open(fp) as png:
png.load()
if "MicroscopeControlImage" in png.info.keys():
meta = flatten_xml_to_dict(
xmltodict.parse(png.info["MicroscopeControlImage"]))
# first phase analyse the collection of Protochips metadata concept instance symbols and reduce to unique concepts
grpnm_lookup = {}
for concept, value in meta.items():
# not every key is allowed to define a concept
# print(f"{concept}: {value}")
idxs = re.finditer(r".\[[0-9]+\].", concept)
if (sum(1 for _ in idxs) > 0): # is_variadic
markers = [".Name", ".PositionerName"]
for marker in markers:
if concept.endswith(marker):
grpnm_lookup[f"{concept[0:len(concept)-len(marker)]}"] = value
else:
grpnm_lookup[concept] = value
# second phase, evaluate each concept instance symbol wrt to its prefix coming from the unique concept
self.tmp["meta"][file] = {}
for k, v in meta.items():
grpnms = None
idxs = re.finditer(r".\[[0-9]+\].", k)
if (sum(1 for _ in idxs) > 0): # is variadic
search_argument = k[0:k.rfind("].") + 1]
for parent_grpnm, child_grpnm in grpnm_lookup.items():
if parent_grpnm.startswith(search_argument):
grpnms = (parent_grpnm, child_grpnm)
break
if grpnms is not None:
if len(grpnms) == 2:
if "PositionerSettings" in k and k.endswith(".PositionerName") is False:
self.tmp["meta"][file][f"{grpnms[0]}.{grpnms[1]}{k[k.rfind('.') + 1:]}"] = v
if k.endswith(".Value"):
self.tmp["meta"][file][f"{grpnms[0]}.{grpnms[1]}"] = v
else:
self.tmp["meta"][file][f"{k}"] = v
# TODO::simplify and check that metadata end up correctly in self.tmp["meta"][file]
except ValueError:
print(f"Flattening XML metadata content {self.file_path}:{file} failed !")

def get_file_hash(self, file, fp):
self.tmp["meta"][file]["sha256"] = get_sha256_of_file_content(fp)

def parse_and_normalize(self):
"""Perform actual parsing filling cache self.tmp."""
if self.supported is True:
Expand All @@ -133,55 +160,21 @@ def parse_and_normalize(self):
with ZipFile(self.file_path) as zip_file_hdl:
for file in self.png_info.keys():
with zip_file_hdl.open(file) as fp:
try:
with Image.open(fp) as png:
png.load()
if "MicroscopeControlImage" in png.info.keys():
meta = flatten_xml_to_dict(
xmltodict.parse(png.info["MicroscopeControlImage"]))
# first phase analyse the collection of Protochips metadata concept instance symbols and reduce to unique concepts
self.tmp["meta"][file] = {}
for concept, value in meta.items():
# not every key is allowed to define a concept
# print(f"{concept}: {value}")
idxs = re.finditer(".\[[0-9]+\].", concept)
if (sum(1 for _ in idxs) > 0): # is_variadic
markers = [".Name", ".PositionerName"]
for marker in markers:
if concept.endswith(marker):
self.tmp["meta"][file][f"{concept[0:len(concept)-len(marker)]}"] = value
else:
self.tmp["meta"][file][concept] = value
# print(f"First phase of metadata parsing {self.file_path}:{file} successful")
# second phase, evaluate each concept instance symbol wrt to its prefix coming from the unique concept
for k, v in meta.items():
grpnms = None
idxs = re.finditer(".\[[0-9]+\].", k)
if (sum(1 for _ in idxs) > 0): # is variadic
search_argument = k[0:k.rfind("].")+1]
for parent_grpnm, child_grpnm in self.tmp["meta"][file].items():
if parent_grpnm.startswith(search_argument):
grpnms = (parent_grpnm, child_grpnm)
break
if grpnms is not None:
if len(grpnms) == 2:
if "PositionerSettings" in k and k.endswith(".PositionerName") is False:
print(f"vv: {grpnms[0]}.{grpnms[1]}{k[k.rfind('.') + 1:]}: {v}")
if k.endswith(".Value"):
print(f"vv: {grpnms[0]}.{grpnms[1]}: {v}")
else:
print(f"nv: {k}: {v}")
# TODO::simplify and check that metadata end up correctly in self.tmp["meta"][file]
except:
raise ValueError(f"Flattening XML metadata content {self.file_path}:{file} failed !")
self.get_xml_metadata(file, fp)
self.get_file_hash(file, fp)
# print(f"Debugging self.tmp.file.items {file}")
# for k, v in self.tmp["meta"][file].items():
# print(f"{k}: {v}")
print(f"{self.file_path} metadata within PNG collection processed "
f"successfully ({len(self.tmp['meta'].keys())} PNGs evaluated).")
else:
print(f"{self.file_path} is not a Protochips-specific "
f"PNG file that this parser can process !")

def process_into_template(self, template: dict) -> dict:
if self.supported is True:
self.process_event_data_em_metadata(template)
self.process_event_data_em_data(template)
# self.process_event_data_em_data(template)
return template

def process_event_data_em_metadata(self, template: dict) -> dict:
Expand Down
Loading

0 comments on commit 97c7f8a

Please sign in to comment.