From 5aea8945c580eaf9d7d50232eaf48ec884786c86 Mon Sep 17 00:00:00 2001 From: Benoit Coste Date: Fri, 29 Jan 2021 15:30:58 +0100 Subject: [PATCH] Handling missing duplicate points (#221) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Handling missing duplicate points ## What is a duplicate point ? When a child section starts with a point at the same coordinates as the parent section last point, we call this a 'duplicate point'. ℹ️ To consider a point as a duplicate, we only consider its spatial coordinates. The diameter is not taken into account. This means a duplicate is allowed to have any diameter value. ## Context: Some ASC files comes with duplicate points, some do not. So it seems that both format are valid and need to be supported. ## Specification The following specification has been created to exactly recreate NEURON's behavior: * When a duplicate point is **not** in the file (ie. a section does not start at the coordinate of its parent section last point), the duplicate is added. The diameter of the added point is the diameter of the point that comes right after it. * When a duplicate is already in the file, its coordinates and diameter value are used as they are. * When a single child section is merged with its parent section and the child section has a duplicate with a different diameter value, the diameter of the *parent* section is kept. --- src/readers/morphologyASC.cpp | 35 +++++---- tests/data/nested_single_children.asc | 11 +-- tests/test_2_neurolucida.py | 105 ++++++++++++++++++-------- 3 files changed, 100 insertions(+), 51 deletions(-) diff --git a/src/readers/morphologyASC.cpp b/src/readers/morphologyASC.cpp index a115f5236..acd729e6b 100644 --- a/src/readers/morphologyASC.cpp +++ b/src/readers/morphologyASC.cpp @@ -123,11 +123,10 @@ class NeurolucidaParser return_id = -1; } else { SectionType section_type = TokenSectionTypeMap.at(token); - insertLastPointParentSection(parent_id, properties); + insertLastPointParentSection(parent_id, properties, diameters); - // Condition to remove single point section that duplicate parent - // point See test_single_point_section_duplicate_parent for an - // example + // Condition to remove a single point section that is made of only the duplicate point + // See test_single_point_section_duplicate_parent for an example if (parent_id > -1 && properties._points.size() == 1) { return_id = parent_id; } else { @@ -151,33 +150,43 @@ class NeurolucidaParser /* Add the last point of parent section to the beginning of this section if not already present. + See https://github.com/BlueBrain/MorphIO/pull/221 + + The diameter is taken from the child section next point as does NEURON. + Here is the spec: + https://bbpteam.epfl.ch/project/issues/browse/NSETM-1178?focusedCommentId=135030&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-135030 + + In term of diameters, the result should look like the right picture of: + https://github.com/BlueBrain/NeuroM/issues/654#issuecomment-332864540 The idea is that these two structures should represent the same morphology: - (3 -8 0 2) and (3 -8 0 2) - (3 -10 0 2) (3 -10 0 2) + (3 -8 0 5) and (3 -8 0 5) + (3 -10 0 5) (3 -10 0 5) ( ( - (0 -10 0 2) (3 -10 0 2) <-- duplicate parent point - (-3 -10 0 2) (0 -10 0 2) + (0 -10 0 2) (3 -10 0 2) <-- duplicate parent point, note that the + (-3 -10 0 2) (0 -10 0 2) diameter is 2 and not 5 | (-3 -10 0 2) (6 -10 0 2) | - (9 -10 0 2) (3 -10 0 2) <-- duplicate parent point - ) (6 -10 0 2) + (9 -10 0 2) (3 -10 0 2) <-- duplicate parent point, note that the + ) (6 -10 0 2) diameter is 2 and not 5 (9 -10 0 2) ) */ - void insertLastPointParentSection(int32_t parentId, morphio::Property::PointLevel& properties) { + void insertLastPointParentSection(int32_t parentId, + morphio::Property::PointLevel& properties, + std::vector& diameters) { if (parentId < 0) // Discard root sections return; auto parent = nb_.section(static_cast(parentId)); auto lastParentPoint = parent->points()[parent->points().size() - 1]; - auto lastParentDiameter = parent->diameters()[parent->diameters().size() - 1]; + auto childSectionNextDiameter = diameters[0]; if (lastParentPoint == properties._points[0]) return; properties._points.insert(properties._points.begin(), lastParentPoint); - properties._diameters.insert(properties._diameters.begin(), lastParentDiameter); + properties._diameters.insert(properties._diameters.begin(), childSectionNextDiameter); } bool parse_neurite_section(int32_t parent_id, Token token) { diff --git a/tests/data/nested_single_children.asc b/tests/data/nested_single_children.asc index 97f760514..b709cdbd3 100644 --- a/tests/data/nested_single_children.asc +++ b/tests/data/nested_single_children.asc @@ -5,14 +5,15 @@ ( (Color Blue) (Axon) - (0 0 0 1) - (0 0 1 1) + (0 0 0 8) + (0 0 1 7) ( - (0 0 2 1) + (0 0 2 6) ( - (0 0 3 1) + (0 0 2 6) ; this duplicate should disapear in the merge + (0 0 3 5) ( - (0 0 4 1) + (0 0 4 4) ) ) ) diff --git a/tests/test_2_neurolucida.py b/tests/test_2_neurolucida.py index ba68d9bf8..8f371aadf 100644 --- a/tests/test_2_neurolucida.py +++ b/tests/test_2_neurolucida.py @@ -1,13 +1,13 @@ import os + import morphio import numpy as np -from nose.tools import eq_, ok_, assert_equal - -from numpy.testing import assert_array_equal -from nose import tools as nt from morphio import Morphology, RawDataError, SomaError, ostream_redirect +from nose import tools as nt +from nose.tools import assert_equal, eq_, ok_ +from numpy.testing import assert_array_equal -from . utils import tmp_asc_file, _test_asc_exception, captured_output, assert_substring +from .utils import _test_asc_exception, assert_substring, captured_output, tmp_asc_file _path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") @@ -135,39 +135,38 @@ def test_skip_header(): without_duplicate = ''' ((Dendrite) - (3 -4 0 2) - (3 -6 0 2) - (3 -8 0 2) - (3 -10 0 2) + (3 -4 0 5) + (3 -6 0 5) + (3 -8 0 5) + (3 -10 0 5) ( (0 -10 0 2) - (-3 -10 0 2) + (-3 -10 0 0.3) | (6 -10 0 2) - (9 -10 0 2) + (9 -10 0 0.3) ) ) ''' with_duplicate = ''' ((Dendrite) - (3 -4 0 2) - (3 -6 0 2) - (3 -8 0 2) - (3 -10 0 2) + (3 -4 0 5) + (3 -6 0 5) + (3 -8 0 5) + (3 -10 0 5) ( (3 -10 0 2) ; duplicate (0 -10 0 2) - (-3 -10 0 2) + (-3 -10 0 0.3) | (3 -10 0 2) ; duplicate (6 -10 0 2) - (9 -10 0 2) + (9 -10 0 0.3) ) ) ''' - def test_read_with_duplicates(): '''Section points are duplicated in the file what I think the @@ -190,11 +189,15 @@ def test_read_with_duplicates(): [[3, -10, 0], [0, -10, 0], [-3, -10, 0]]) + assert_array_equal(n.root_sections[0].children[0].diameters, + np.array([2, 2, 0.3], dtype=np.float32)) assert_array_equal(n.root_sections[0].children[1].points, [[3, -10, 0], [6, -10, 0], [9, -10, 0]]) + assert_array_equal(n.root_sections[0].children[1].diameters, + np.array([2, 2, 0.3], dtype=np.float32)) def test_read_without_duplicates(): @@ -210,6 +213,36 @@ def test_read_without_duplicates(): assert_array_equal(n_with_duplicate.root_sections[0].points, n_without_duplicate.root_sections[0].points) + assert_array_equal(n_with_duplicate.root_sections[0].children[0].diameters, + n_without_duplicate.root_sections[0].children[0].diameters) + + assert_array_equal(n_with_duplicate.root_sections[0].diameters, + n_without_duplicate.root_sections[0].diameters) + + +def test_explicit_duplicates_with_arbitrary_diameter(): + '''If the duplicate is already in the file with an arbitrary diameter, it should be preserved''' + with tmp_asc_file(''' + ((Dendrite) + (3 -4 0 5) + (3 -6 0 5) + (3 -8 0 5) + (3 -10 0 5) + ( + (3 -10 0 20) ; duplicate + (0 -10 0 2) + (-3 -10 0 0.3) + | + (3 -10 0 2) + (6 -10 0 2) + (9 -10 0 0.3) + ) + ) + ''') as tmp_file: + n = Morphology(tmp_file.name) + assert_array_equal(n.root_sections[0].children[0].diameters, + np.array([20, 2, 0.3], dtype=np.float32)) + def test_unfinished_file(): _test_asc_exception(''' @@ -235,14 +268,14 @@ def test_empty_sibling(): with captured_output() as (_, err): with ostream_redirect(stdout=True, stderr=True): with tmp_asc_file('''((Dendrite) - (3 -4 0 2) - (3 -6 0 2) - (3 -8 0 2) - (3 -10 0 2) + (3 -4 0 10) + (3 -6 0 9) + (3 -8 0 8) + (3 -10 0 7) ( - (3 -10 0 2) - (0 -10 0 2) - (-3 -10 0 2) + (3 -10 0 6) + (0 -10 0 5) + (-3 -10 0 4) | ; <-- empty sibling but still works ! ) ) @@ -262,13 +295,15 @@ def test_empty_sibling(): [0, -10, 0], [-3, -10, 0]], dtype=np.float32)) + assert_array_equal(n.root_sections[0].diameters, + np.array([10, 9, 8, 7, 5, 4], dtype=np.float32)) assert_equal(len(n.annotations), 1) annotation = n.annotations[0] assert_equal(annotation.type, morphio.AnnotationType.single_child) assert_equal(annotation.line_number, 6) assert_array_equal(annotation.points, [[3, -10, 0], [0, -10, 0], [-3, -10, 0]]) - assert_array_equal(annotation.diameters, [2, 2, 2]) + assert_array_equal(annotation.diameters, [6, 5, 4]) def test_nested_single_child(): with captured_output() as (_, err): @@ -280,8 +315,7 @@ def test_nested_single_child(): [0., 0., 2.], [0., 0., 3.], [0., 0., 4.]]) - - + assert_array_equal(n.root_sections[0].diameters, np.array([8, 7, 6, 5, 4], dtype=np.float32)) def test_section_single_point(): @@ -295,9 +329,9 @@ def test_section_single_point(): (3 -8 0 2) (3 -10 0 2) ( - (3 -10 2 4) ; single point section + (3 -10 2 4) ; single point section without duplicate | - (3 -10 0 2) ; normal section + (3 -10 0 2) ; normal section with duplicate (3 -10 1 2) (3 -10 2 2) ) @@ -309,6 +343,8 @@ def test_section_single_point(): assert_array_equal(n.root_sections[0].children[0].points, np.array([[3, -10, 0], [3, -10, 2]], dtype=np.float32)) + assert_array_equal(n.root_sections[0].children[0].diameters, + np.array([4, 4], dtype=np.float32)) assert_array_equal(n.root_sections[0].children[1].points, np.array([[3, -10, 0], @@ -325,9 +361,9 @@ def test_single_children(): (3 -8 0 2) (3 -10 0 2) ( - (3 -10 0 2) ; merged with parent section - (0 -10 0 2) ; merged with parent section - (-3 -10 0 2) ; merged with parent section + (3 -10 0 4) ; merged with parent section + (0 -10 0 4) ; merged with parent section + (-3 -10 0 3) ; merged with parent section ( (-5 -5 5 5) | @@ -356,6 +392,8 @@ def test_single_children(): [0, -10, 0], [-3, -10, 0]], dtype=np.float32)) + assert_array_equal(n.root_sections[0].diameters, + np.array([2, 2, 2, 2, 4, 3], dtype=np.float32)) assert_equal(len(n.root_sections[0].children), 2) @@ -396,6 +434,7 @@ def test_single_point_section_duplicate_parent(): assert_array_equal(neuron.root_sections[0].points, [[ 3., -4., 0.], [ 3., -10., 0.]]) + assert_array_equal(neuron.root_sections[0].diameters, np.array([2, 2], dtype=np.float32)) def test_single_point_section_duplicate_parent_complex():