From cf7308d5fb5b1ec3b34063e715cc0440541ddf8f Mon Sep 17 00:00:00 2001 From: Robert Langlois Date: Mon, 5 Apr 2021 08:17:19 -0700 Subject: [PATCH] Issue-259: Add simple functions with doc test examples (#261) * Issue-259: Add simple functions with doc test examples Initial port Try to debug new bindings Ensure errors are reported Try to work around issue Fix swig bug Most tests fixed Fix all tests Cherry pick over fixes * Fix bug * Fix warning and unit test * Fix doctest again --- docs/Doxyfile.in | 4 +- docs/src/changes.md | 7 + docs/src/python_binding.md | 8 + interop/logic/plot/plot_sample_qc.h | 2 +- .../metrics/corrected_intensity_metric.h | 32 +- interop/model/metrics/extraction_metric.h | 27 + interop/model/metrics/index_metric.h | 30 + src/ext/python/CMakeLists.txt | 19 +- src/ext/python/__init__.py.in | 3 + src/ext/python/__main__.py.in | 2 + src/ext/python/core.py | 1097 +++++++++++++++++ src/ext/swig/arrays/arrays_numpy_impl.i | 25 + src/ext/swig/run.i | 1 + src/ext/swig/run_metrics.i | 1 + src/ext/swig/summary.i | 1 + src/ext/swig/table.i | 14 + src/interop/logic/plot/plot_sample_qc.cpp | 2 +- .../csharp/metrics/ExtendedTileMetricsTest.cs | 2 +- src/tests/csharp/metrics/PerformanceTest.cs | 2 +- src/tests/csharp/run/RunInfoTest.cs | 2 +- src/tests/csharp/run/RunParametersTest.cs | 2 +- src/tests/python/CMakeLists.txt | 4 +- tools/package.bat | 6 +- tools/package.sh | 2 + tools/prereqs/docker-centos5-install.sh | 3 +- tools/prereqs/docker-centos7-install.sh | 7 - tools/prereqs/env_windows.bat | 2 +- .../pull_request/centos7_gcc-485-debug.sh | 6 + .../pull_request/centos7_gcc-485-ppc.sh | 23 + tools/teamcity/pull_request/msvc.bat | 55 + .../pull_request/msvc_2015_py27_dotnet.bat | 2 + .../pull_request/msvc_2017_py27_dotnet.bat | 2 + .../msvc_2017_py27_dotnetstandard.bat | 2 + .../pull_request/msvc_2017_py34_dotnet.bat | 2 + .../pull_request/msvc_2017_py35_dotnet.bat | 2 + .../pull_request/msvc_2017_py36_dotnet.bat | 2 + .../pull_request/msvc_2017_py37_dotnet.bat | 2 + .../pull_request/msvc_2017_py38_dotnet.bat | 2 + .../pull_request/msvc_2019_py27_dotnet.bat | 2 + 39 files changed, 1382 insertions(+), 27 deletions(-) create mode 100644 src/ext/python/core.py create mode 100644 tools/teamcity/pull_request/centos7_gcc-485-debug.sh create mode 100644 tools/teamcity/pull_request/centos7_gcc-485-ppc.sh create mode 100644 tools/teamcity/pull_request/msvc.bat create mode 100644 tools/teamcity/pull_request/msvc_2015_py27_dotnet.bat create mode 100644 tools/teamcity/pull_request/msvc_2017_py27_dotnet.bat create mode 100644 tools/teamcity/pull_request/msvc_2017_py27_dotnetstandard.bat create mode 100644 tools/teamcity/pull_request/msvc_2017_py34_dotnet.bat create mode 100644 tools/teamcity/pull_request/msvc_2017_py35_dotnet.bat create mode 100644 tools/teamcity/pull_request/msvc_2017_py36_dotnet.bat create mode 100644 tools/teamcity/pull_request/msvc_2017_py37_dotnet.bat create mode 100644 tools/teamcity/pull_request/msvc_2017_py38_dotnet.bat create mode 100644 tools/teamcity/pull_request/msvc_2019_py27_dotnet.bat diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index eef33f936..0ceb8174b 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -793,7 +793,7 @@ INPUT_ENCODING = UTF-8 # *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, # *.vhdl, *.ucf, *.qsf, *.as and *.js. -FILE_PATTERNS = *.h *.cpp *.md *.cs +FILE_PATTERNS = *.h *.cpp *.md *.cs *.py # TODO: *.py # The RECURSIVE tag can be used to specify whether or not subdirectories should @@ -825,7 +825,7 @@ EXCLUDE_SYMLINKS = NO # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* -EXCLUDE_PATTERNS = +EXCLUDE_PATTERNS = cmake-build-* # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the diff --git a/docs/src/changes.md b/docs/src/changes.md index 317691e22..ba982f172 100644 --- a/docs/src/changes.md +++ b/docs/src/changes.md @@ -1,5 +1,12 @@ # Changes {#changes} +## v1.1.22 + +Date | Description +---------- | ----------- +2021-03-24 | Issue-259: Add simple functions with doc test examples + + ## v1.1.21 Date | Description diff --git a/docs/src/python_binding.md b/docs/src/python_binding.md index 38b2def58..5944cbcb2 100644 --- a/docs/src/python_binding.md +++ b/docs/src/python_binding.md @@ -30,6 +30,14 @@ If you see this error: Then upgrade numpy and try again. +## New simplified interface + + from interop import * + ar = imaging("path/to/run_folder") + +See new [interop.core](namespacecore.html) wrapper for the simplified interface + + ## Tips & Tricks 1. To see which methods/fields are available: diff --git a/interop/logic/plot/plot_sample_qc.h b/interop/logic/plot/plot_sample_qc.h index 16883db96..d9071d3b5 100644 --- a/interop/logic/plot/plot_sample_qc.h +++ b/interop/logic/plot/plot_sample_qc.h @@ -27,7 +27,7 @@ namespace illumina { namespace interop { namespace logic { namespace plot void plot_sample_qc(model::metrics::run_metrics &metrics, const size_t lane, model::plot::plot_data &data) - INTEROP_THROW_SPEC((model::index_out_of_bounds_exception)); + INTEROP_THROW_SPEC((model::index_out_of_bounds_exception, std::bad_alloc)); }}}} diff --git a/interop/model/metrics/corrected_intensity_metric.h b/interop/model/metrics/corrected_intensity_metric.h index f41c3bb16..9d88a881c 100644 --- a/interop/model/metrics/corrected_intensity_metric.h +++ b/interop/model/metrics/corrected_intensity_metric.h @@ -213,6 +213,28 @@ namespace illumina { namespace interop { namespace model { namespace metrics INTEROP_ASSERT(m_corrected_int_called.size() == constants::NUM_OF_BASES); INTEROP_ASSERT(m_called_counts.size() == constants::NUM_OF_BASES_AND_NC); } + + /** Constructor + * + * @note Version 4 + * @param lane lane number + * @param tile tile number + * @param cycle cycle number + * @param called_count_vec number of clusters called per base + */ + corrected_intensity_metric(const uint_t lane, + const uint_t tile, + const uint_t cycle, + const uint_array_t& called_count) : + metric_base::base_cycle_metric(lane, tile, cycle), + m_average_cycle_intensity(std::numeric_limits::max()), + m_corrected_int_all(constants::NUM_OF_BASES, std::numeric_limits::max()), + m_corrected_int_called(constants::NUM_OF_BASES, std::numeric_limits::quiet_NaN()), + m_called_counts(called_count), + m_signal_to_noise(std::numeric_limits::quiet_NaN()) + { + INTEROP_ASSERT(called_count.size() == static_cast(constants::NUM_OF_BASES_AND_NC)); + } /** Constructor * * @note Version 4 @@ -220,19 +242,23 @@ namespace illumina { namespace interop { namespace model { namespace metrics * @param tile tile number * @param cycle cycle number * @param called_counts number of clusters called per base + * @param num_of_counts number of bases */ corrected_intensity_metric(const uint_t lane, const uint_t tile, const uint_t cycle, - const uint_array_t& called_counts) : + const ::uint32_t* called_counts, + const size_t num_of_counts, + const size_t /*dummy*/, /* dummy parameters work around swig bug */ + const size_t /*dummy*/) : metric_base::base_cycle_metric(lane, tile, cycle), m_average_cycle_intensity(std::numeric_limits::max()), m_corrected_int_all(constants::NUM_OF_BASES, std::numeric_limits::max()), m_corrected_int_called(constants::NUM_OF_BASES, std::numeric_limits::quiet_NaN()), - m_called_counts(called_counts), + m_called_counts(called_counts, called_counts + std::min(num_of_counts, static_cast(constants::NUM_OF_BASES_AND_NC))), m_signal_to_noise(std::numeric_limits::quiet_NaN()) { - INTEROP_ASSERT(called_counts.size() == constants::NUM_OF_BASES_AND_NC); + INTEROP_ASSERT(num_of_counts== static_cast(constants::NUM_OF_BASES_AND_NC)); } public: diff --git a/interop/model/metrics/extraction_metric.h b/interop/model/metrics/extraction_metric.h index a0bc26e94..5cc339367 100644 --- a/interop/model/metrics/extraction_metric.h +++ b/interop/model/metrics/extraction_metric.h @@ -262,6 +262,33 @@ namespace illumina { namespace interop { namespace model { namespace metrics { } + /** Constructor + * + * @note Version 3 + * @param lane lane number + * @param tile tile number + * @param cycle cycle number + * @param intensity_values 90th percentile of intensities for the given channel + * @param intensity_count number of channels + * @param focus_scores focus score for the given channel + * @param focus_count number of channels + */ + extraction_metric(const uint_t lane, + const uint_t tile, + const uint_t cycle, + const ::uint16_t* intensity_values, + const size_t intensity_count, + const float* focus_scores, + const size_t focus_count, + const size_t /*dummy*/) : + metric_base::base_cycle_metric(lane, tile, cycle), + m_date_time_csharp(0), + m_date_time(0), + m_max_intensity_values(intensity_values, intensity_values+intensity_count), + m_focus_scores(focus_scores, focus_scores+focus_count) + { + } + public: /** Setter * diff --git a/interop/model/metrics/index_metric.h b/interop/model/metrics/index_metric.h index 8375c587e..c11870e27 100644 --- a/interop/model/metrics/index_metric.h +++ b/interop/model/metrics/index_metric.h @@ -358,6 +358,36 @@ namespace illumina { namespace interop { namespace model { namespace metrics static const char *prefix() { return "Index"; } + /** Percentage of PF clusters on the tile that have been demultiplexed + * + * Dependent on tile_metric + * + * @return percent of demultiplexed PF clusters + */ + float percent_demultiplexed(const std::string& sample_id) const + { + uint64_t total_demultiplexed_clusters = 0; + if (sample_id.empty()) + { + for (size_t index_array_counter = 0; index_array_counter < m_indices.size(); ++index_array_counter) + { + total_demultiplexed_clusters += m_indices[index_array_counter].cluster_count(); + } + } + else + { + for (size_t index_array_counter = 0; index_array_counter < m_indices.size(); ++index_array_counter) + { + if(m_indices[index_array_counter].sample_id() == sample_id) + { + total_demultiplexed_clusters = m_indices[index_array_counter].cluster_count(); + break; + } + } + } + return static_cast(total_demultiplexed_clusters)/m_cluster_count_pf * 100; + } + private: index_array_t m_indices; float m_cluster_count; // Derived from tile metric diff --git a/src/ext/python/CMakeLists.txt b/src/ext/python/CMakeLists.txt index 1c700d7e6..b2b4fbef8 100644 --- a/src/ext/python/CMakeLists.txt +++ b/src/ext/python/CMakeLists.txt @@ -146,7 +146,7 @@ string(REPLACE ";" "','" PY_GEN_LIST "${py_gens}") set(INTEROP_VERSION ${VERSION_SHORT}${VERSION_DEV}) configure_file(__init__.py.in interop/__init__.py @ONLY) configure_file(__main__.py.in interop/__main__.py @ONLY) -file(COPY ${CMAKE_SOURCE_DIR}/src/tests/python/CoreTests.py DESTINATION interop) + add_custom_command( TARGET python_lib POST_BUILD COMMAND ${CMAKE_COMMAND} @@ -155,8 +155,21 @@ add_custom_command( TARGET python_lib POST_BUILD -DCONFIG_INPUT_FILE=${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in -DCONFIG_OUTPUT_FILE=${CMAKE_CURRENT_BINARY_DIR}/setup.py -P ${CMAKE_SOURCE_DIR}/cmake/ConfigureFile.cmake + COMMAND ${CMAKE_COMMAND} + -DINTEROP_VERSION=${VERSION_SHORT}${VERSION_DEV} + -DINTEROP_LIB_LIST="${PY_GEN_LIST}" + -DCONFIG_INPUT_FILE=${CMAKE_CURRENT_SOURCE_DIR}/__main__.py.in + -DCONFIG_OUTPUT_FILE=${CMAKE_CURRENT_BINARY_DIR}/__main__.py + -P ${CMAKE_SOURCE_DIR}/cmake/ConfigureFile.cmake + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${CMAKE_CURRENT_SOURCE_DIR}/core.py + ${CMAKE_CURRENT_BINARY_DIR}/interop/core.py + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${CMAKE_SOURCE_DIR}/src/tests/python/CoreTests.py + ${CMAKE_CURRENT_BINARY_DIR}/interop/CoreTests.py ) + install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DESTINATION share/illumina/interop/src FILES_MATCHING @@ -173,7 +186,7 @@ set(SWIG_GEN_PYTHON_SOURCE_FILES ${py_files} CACHE INTERNAL "Python scripts gene if(NOT PYTHONINTERP_FOUND) - message(WARNING "Cannot find Python Interpretor, cannot create wheel package") + message(WARNING "Cannot find Python Interpreter, cannot create wheel package") return() endif() @@ -193,4 +206,4 @@ if(NOT SKIP_PACKAGE_ALL_WHEEL) endif() -set(PYTHON_BUILD_AVAILABLE "Python ${PYTHON_VERSION_STRING}" CACHE INTERNAL "All dependencies are satisfied for the Java Build" FORCE) \ No newline at end of file +set(PYTHON_BUILD_AVAILABLE "Python ${PYTHON_VERSION_STRING}" CACHE INTERNAL "All dependencies are satisfied for the Java Build" FORCE) diff --git a/src/ext/python/__init__.py.in b/src/ext/python/__init__.py.in index 5317456c5..fe59b1ea4 100644 --- a/src/ext/python/__init__.py.in +++ b/src/ext/python/__init__.py.in @@ -7,3 +7,6 @@ __doc_url__ = "http://illumina.github.io/interop/index.html" __version__ = "@INTEROP_VERSION@" __maintainer__ = "Illumina, inc." __contact__ = "https://github.com/Illumina/interop/issues" + +from interop.core import * + diff --git a/src/ext/python/__main__.py.in b/src/ext/python/__main__.py.in index be5c8ce42..a843d5cae 100644 --- a/src/ext/python/__main__.py.in +++ b/src/ext/python/__main__.py.in @@ -1,4 +1,5 @@ import interop +import interop.core import unittest import argparse from interop import @PY_MOD_NAME_LIST@ @@ -16,6 +17,7 @@ def execute_from_commandline(): if param.test: testsuite = unittest.makeSuite(CoreTests) unittest.TextTestRunner(verbosity=1).run(testsuite) + interop.core._run_doctests() if __name__ == "__main__": execute_from_commandline(); diff --git a/src/ext/python/core.py b/src/ext/python/core.py new file mode 100644 index 000000000..8e1eb66e8 --- /dev/null +++ b/src/ext/python/core.py @@ -0,0 +1,1097 @@ +"""@package interop {#interop_core} +Core routines to simplify using the InterOp Library + +InterOp is built around a single data structure alled a `run_metrics` object. This contains the full set of InterOps +along with the RunInfo.xml and some of the RunParameters.xml. + +A run metrics object can be read in as follows: +>>> from interop import read +>>> run_metrics = read("some/path/run_folder_name") # doctest: +SKIP + +Core routines take the run_metrics object and convert it into a table represented by a structured NumPy array. This can, +in turn, be converted to a pandas DataFrame or any other data structure. + +The core routines include the following: + +>>> from interop import index_summary +>>> index_summary(run_metrics_with_indexing) +array([(1, 0.4556, 507.7778, 260.3334, 768.1111, 1800., 2000.)], + dtype=[('Lane', '>> from interop import summary +>>> summary(run_metrics_example) +array([(0.36666667, 6.6666665, 0.)], + dtype=[('Error Rate', '>> from interop import indexing +>>> indexing(run_metrics_with_indexing) +array([(1., 1101., 'ATCACGAC-AAGGTTCA', '1', 4570., 900., 507.77777), + (1., 1101., 'ATCACGAC-GGGGGGGG', '2', 2343., 900., 260.33334)], + dtype=[('Lane', '>> from interop import imaging +>>> imaging(run_metrics_example) +rec.array([(1., 1101., 1., 1., 1., 0.1, 10., 10., 25. , 33.3, 33.3, 33.3, 0., 10., 10., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.), + (1., 1101., 2., 1., 2., 0.2, 5., 15., 12.5, 42.9, 28.6, 28.6, 0., 5., 15., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.), + (1., 1101., 3., 1., 3., 0.3, 10., 10., 25. , 33.3, 50. , 16.7, 0., 10., 10., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.), + (1., 1101., 4., 2., 1., 0.4, 10., 5., 25. , 16.7, 50. , 33.3, 0., 10., 5., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.), + (1., 1101., 5., 3., 1., 0.5, 15., 5., 37.5, 20. , 40. , 40. , 0., 15., 5., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.)], + dtype=[('Lane', '>> ar = index_summary("some/path/run_folder_name") # doctest: +SKIP + +The structured NumPy array can be converted to a Pandas DataFrame just so: + +>>> import pandas as pd # doctest: +SKIP +>>> df = pd.DataFrame(ar) # doctest: +SKIP + +For more information see the documentation around each function below. +""" + +import interop.py_interop_run_metrics as interop_metrics +import interop.py_interop_metrics as interop_metric_sets +import interop.py_interop_run as interop_run +import interop.py_interop_table as interop_table +import interop.py_interop_summary as interop_summary +import numpy as np +import os + +_summary_levels = ('Total', 'NonIndex', 'Read', 'Lane', 'Surface') +_index_summary_levels = ('Lane', 'Barcode') + + +def index_summary(run_metrics, level='Lane', columns=None, dtype='f4', **extra): + """ Index summary table + + >>> from interop import index_summary + >>> ar = index_summary("some/path/run_folder_name") # doctest: +SKIP + + >>> index_summary(run_metrics_with_indexing) + array([(1, 0.4556, 507.7778, 260.3334, 768.1111, 1800., 2000.)], + dtype=[('Lane', '>> index_summary(run_metrics_with_indexing, level='Barcode') + array([(1, 9140., 507.7778, 1., 'ATCACGAC', 'AAGGTTCA', 'TSCAIndexes', '1'), + (1, 4686., 260.3334, 2., 'ATCACGAC', 'GGGGGGGG', 'TSCAIndexes', '2')], + dtype=[('Lane', '>> index_summary(run_metrics_with_indexing, columns=['Total Fraction Mapped Reads']) + array([(1, 768.1111)], + dtype=[('Lane', '>> index_summary(run_metrics_with_indexing, columns=['Incorrect']) + Traceback (most recent call last): + ... + ValueError: Column `Incorrect` not found in: ['Mapped Reads Cv', 'Max Mapped Reads', 'Min Mapped Reads', 'Total Fraction Mapped Reads', 'Total Pf Reads', 'Total Reads'] - column not consistent with level or misspelled + + >>> index_summary(run_metrics_with_indexing, level='Incorrect') + Traceback (most recent call last): + ... + ValueError: level=Incorrect not in ('Lane', 'Barcode') + + :param run_metrics: py_interop_run_metrics.run_metrics or string run folder path + :param level: level of the data to summarize, valid values include: 'Total', 'NonIndex', 'Read', 'Lane', 'Surface' (Default: Total) + :param columns: list of columns (valid values depend on the level) see `summary_columns` + :param dtype: data type for the array (Default: 'f4') + :param extra: all extra parameters are passed to `read` if the first parameter is a str file path to a run folder + :return: structured with column names and dype - np.array + """ + + if columns is None: + columns = index_summary_columns(level) + else: + if level not in _index_summary_levels: + raise ValueError("level={} not in {}".format(str(level), str(_index_summary_levels))) + + extra['valid_to_load'] = create_valid_to_load(('Index', )) + run_metrics = read(run_metrics, **extra) + if run_metrics.empty(): + return np.asarray([]) + + if not isinstance(run_metrics, interop_metrics.run_metrics): + raise ValueError("Expected interop.py_interop_run_metrics.run_metrics or str for `run_metrics`") + + if not isinstance(dtype, str): + dtype = np.dtype(dtype).str + summary_obj = interop_summary.index_flowcell_summary() + interop_summary.summarize_index_metrics(run_metrics, summary_obj) + + if isinstance(columns, str): + columns = (columns, ) + column_map = index_summary_columns(level, ret_dict=True) + for col in columns: + if col not in column_map: + raise ValueError("Column `{}` not found in: {} - column not consistent with level or misspelled".format( + col, str(sorted([k for k in column_map.keys()])))) + + def summarize(summary_object, column_list, extra_row=None): + + row_vals = [] + column_header = [] + if extra_row is None: + extra_row = [] + for column in column_list: + method_name = column_map[column] + val = getattr(summary_object, method_name)() + column_header.append(column) + row_vals.append(val) + return tuple(extra_row+row_vals), column_header + + extra_cols = [('Lane', np.uint16)] + if level == 'Lane': + data = [] + lane_count = summary_obj.size() + column_count = None + for lane_index in range(lane_count): + lane_summary = summary_obj.at(lane_index) + lane_number = lane_index+1 + row, header = summarize(lane_summary + , columns + , [lane_number]) + if column_count is None: + column_count = len(row) + else: + assert column_count == len(row) + data.append(row) + if len(data) == 0: + return np.asarray([]) + return np.asarray(data, dtype=extra_cols+[(col, dtype) for col in header]) + + data = [] + lane_count = summary_obj.size() + column_count = None + for lane_index in range(lane_count): + lane_summary = summary_obj.at(lane_index) + lane_number = lane_index+1 + sample_count = lane_summary.size() + for sample_index in range(sample_count): + sample_summary = lane_summary.at(sample_index) + row, header = summarize(sample_summary + , columns + , [lane_number]) + if column_count is None: + column_count = len(row) + else: + assert column_count == len(row) + data.append(row) + if len(data) == 0: + return np.asarray([]) + _str_types = ('Index1', 'Index2', 'Sample Id', 'Project Name') + return np.asarray(data, dtype=extra_cols+[(col, dtype if col not in _str_types else np.object) for col in header]) + + +def index_summary_columns(level='Lane', ret_dict=False): + """ List the columns of the `index_summary` table + + >>> from interop import index_summary_columns + >>> index_summary_columns() + ('Mapped Reads Cv', 'Max Mapped Reads', 'Min Mapped Reads', 'Total Fraction Mapped Reads', 'Total Pf Reads', 'Total Reads') + + + >>> index_summary_columns('Barcode') + ('Cluster Count', 'Fraction Mapped', 'Id', 'Index1', 'Index2', 'Project Name', 'Sample Id') + + :param level: level of the data to summarize, valid values include: 'Lane', 'Barcode' (Default: Lane) + :param ret_dict: if true, return a dict mapping from column name to method name (Default: False) + :return: tuple of columns (or dictionary mapping column name to method depending on `ret_dict` parameter) + """ + + if level not in _index_summary_levels: + raise ValueError("level={} not in {}".format(str(level), str(_index_summary_levels))) + summary_obj = interop_summary.index_lane_summary() if level == 'Lane' else interop_summary.index_count_summary() + exclude_attrs = ('this', '_s', 'at', 'size', 'resize', 'lane', 'surface', 'cycle_state', 'clear', 'reserve', 'sort', 'push_back', 'set', 'add', 'update_fraction_mapped') + methods = tuple([v for v in dir(summary_obj) if not v.startswith('__') and v not in exclude_attrs]) + + def to_column_name(method): + return " ".join([v.capitalize().replace("Percent", "%") .replace("Gt", ">=") for v in method.split('_')]) + + if ret_dict: + return dict([(to_column_name(v), v) for v in methods]) + return tuple([to_column_name(c) for c in methods]) + + +def summary(run_metrics, level='Total', columns=None, dtype='f4', ignore_missing_columns=True, **extra): + """ Generate a summary table with the given level, columns and dtype from a run_metrics object or run_folder path + + Note that not all columns will be included if InterOp files are missing or purposing excluded using `valid_to_load`. + + The following examples show the different levels that one can summarize the data including: + + - Total (Default) + - NonIndex + - Read + - Lane + - Summary + + >>> from interop import summary + >>> ar = summary("some/path/run_folder_name") # doctest: +SKIP + >>> ar = summary("some/path/run_folder_name", valid_to_load=['Error']) # doctest: +SKIP + + + >>> summary(run_metrics_example) + array([(0.36666667, 6.6666665, 0.)], + dtype=[('Error Rate', '>> summary(run_metrics_example, 'Total') + array([(0.36666667, 6.6666665, 0.)], + dtype=[('Error Rate', '>> summary(run_metrics_example, 'NonIndex') + array([(0.2, 10., 0.)], + dtype=[('Error Rate', '>> summary(run_metrics_example, 'Read') + array([(1, 78, 0.2, 10., 0.), (2, 89, 0.4, 5., 0.), + (3, 89, 0.5, 5., 0.)], + dtype=[('ReadNumber', '>> summary(run_metrics_example, 'Lane') + array([(1, 78, 1, 0.2, 10., 0., 0., 0., 1.), + (2, 89, 1, 0.4, 5., 0., 0., 0., 1.), + (3, 89, 1, 0.5, 5., 0., 0., 0., 1.)], + dtype=[('ReadNumber', '>> summary(run_metrics_example, 'Surface') + array([], dtype=float64) + + We can select specific columns using the `columns` parameter + >>> summary(run_metrics_example, 'Total', columns=['First Cycle Intensity', 'Error Rate']) + array([(6.6666665, 0.36666667)], + dtype=[('First Cycle Intensity', '>> summary(run_metrics_example, 'Total', columns=['% Aligned', 'Error Rate']) + array([(0.36666667,)], dtype=[('Error Rate', '>> summary(run_metrics_example, 'Total', ignore_missing_columns=False, columns=['% Aligned', 'Error Rate']) + array([(nan, 0.36666667)], + dtype=[('% Aligned', '>> summary(run_metrics_example, 'Total', columns=['Incorrect']) + Traceback (most recent call last): + ... + ValueError: Column `Incorrect` not found in: ['Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Yield G'] - column not consistent with level or misspelled + + + :param run_metrics: py_interop_run_metrics.run_metrics or string run folder path + :param level: level of the data to summarize, valid values include: 'Total', 'NonIndex', 'Read', 'Lane', 'Surface' (Default: Total) + :param columns: list of columns (valid values depend on the level) see `summary_columns` + :param dtype: data type for the array (Default: 'f4') + :param ignore_missing_columns: ignore missing columns, e.g. those with NaN values (Default: True) + :param extra: all extra parameters are passed to `read` if the first parameter is a str file path to a run folder + :return: structured with column names and dype - np.array + """ + + if columns is None: + columns = summary_columns(level) + else: + if level not in _summary_levels: + raise ValueError("level={} not in {}".format(str(level), str(_summary_levels))) + + if isinstance(run_metrics, str): + if extra.get('valid_to_load', None) is None: + extra['valid_to_load'] = load_summary_metrics() + run_metrics = read(run_metrics, **extra) + if run_metrics.empty(): + return np.asarray([]) + + if not isinstance(run_metrics, interop_metrics.run_metrics): + raise ValueError("Expected interop.py_interop_run_metrics.run_metrics or str for `run_metrics`") + + run_summary = interop_summary.run_summary() + interop_summary.summarize_run_metrics(run_metrics, run_summary, False, False) + + if isinstance(columns, str): + columns = (columns, ) + column_map = summary_columns(level, ret_dict=True) + for col in columns: + if col not in column_map: + raise ValueError("Column `{}` not found in: {} - column not consistent with level or misspelled".format( + col, str(sorted([k for k in column_map.keys()])))) + if not isinstance(dtype, str): + dtype = np.dtype(dtype).str + + def summarize(summary_object, column_list, extra_row=None, ignore_missing=ignore_missing_columns): + row_vals = [] + column_header = [] + column_subset = [] + if extra_row is None: + extra_row = [] + for column in column_list: + method_name, subcols = column_map[column] + if callable(subcols): + subcols = subcols(run_metrics.run_info()) + val = getattr(summary_object, method_name)() + if hasattr(val, 'mean'): + val = val.mean() + if ignore_missing and not np.any(np.isfinite(val)): + continue + if len(subcols) > 1: + assert len(val) == len(subcols) + column_header.extend(subcols) + row_vals.extend(val) + else: + column_header.append(column) + row_vals.append(val) + column_subset.append(column) + return tuple(extra_row+row_vals), column_header, column_subset + + if level in ('Total', 'NonIndex'): + summary_obj = run_summary.total_summary() if level == 'Total' else run_summary.nonindex_summary() + row, header, _ = summarize(summary_obj, columns) + header = [(col, dtype) for col in header] + return np.asarray([row], dtype=header) + + extra_cols = [('ReadNumber', np.uint16), ('IsIndex', np.uint8)] + if level == 'Read': + data = [] + read_count = run_summary.size() + column_count = None + for read_index in range(read_count): + read_summary = run_summary.at(read_index) + read_info = read_summary.read() + read_number = read_info.number() + is_index = read_info.is_index() + read_summary = read_summary.summary() + row, header, columns_sel = summarize(read_summary + , columns + , [read_number, int(ord('Y' if is_index else 'N'))] + , ignore_missing_columns and column_count is None) + if column_count is None: + column_count = len(row) + columns = columns_sel + data.append(row) + return np.asarray(data, dtype=extra_cols+[(col, dtype) for col in header]) + + extra_cols += [('Lane', np.uint16)] + if level == 'Lane': + data = [] + read_count = run_summary.size() + column_count = None + for read_index in range(read_count): + read_summary = run_summary.at(read_index) + read_info = read_summary.read() + read_number = read_info.number() + is_index = read_info.is_index() + for lane_index in range(read_summary.size()): + lane_summary = read_summary.at(lane_index) + lane_number = lane_summary.lane() + row, header, columns_sel = summarize(lane_summary + , columns + , [read_number, int(ord('Y' if is_index else 'N')), lane_number] + , ignore_missing_columns and column_count is None) + if column_count is None: + column_count = len(row) + columns = columns_sel + data.append(row) + return np.asarray(data, dtype=extra_cols+[(col, dtype) for col in header]) + + data = [] + read_count = run_summary.size() + column_count = None + extra_cols += [('Surface', np.uint16)] + for read_index in range(read_count): + read_summary = run_summary.at(read_index) + read_info = read_summary.read() + read_number = read_info.number() + is_index = read_info.is_index() + for lane_index in range(read_summary.size()): + lane_summary = read_summary.at(lane_index) + lane_number = lane_summary.lane() + for surface_index in range(lane_summary.size()): + surface_summary = lane_summary.at(surface_index) + surface_number = surface_summary.surface() + row, header, columns_sel = summarize(surface_summary + , columns + , [read_number, int(ord('Y' if is_index else 'N')), lane_number, surface_number] + , ignore_missing_columns and column_count is None) + if column_count is None: + column_count = len(row) + columns = columns_sel + data.append(row) + if len(data) == 0: + return np.asarray([]) + return np.asarray(data, dtype=extra_cols+[(col, dtype) for col in header]) + + +def load_summary_metrics(): + """ List of valid summary metrics to load + + >>> from interop import load_to_string_list + >>> from interop import load_summary_metrics + >>> load_to_string_list(load_summary_metrics()) + ['CorrectedInt', 'Error', 'Extraction', 'Q', 'Tile', 'QByLane', 'QCollapsed', 'EmpiricalPhasing', 'ExtendedTile'] + + :return: valid_to_load + """ + + valid_to_load = interop_run.uchar_vector(interop_run.MetricCount, 0) + interop_metrics.list_summary_metrics_to_load(valid_to_load, interop_run.NovaSeq) + return valid_to_load + + +def summary_columns(level='Total', ret_dict=False): + """ Get a list of column names supported at each level of the summary table + + >>> from interop import summary_columns + + The default columns are for the Run/Read level + >>> summary_columns() + ('Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Yield G') + >>> summary_columns(level='Total') + ('Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Yield G') + >>> summary_columns(level='NonIndex') + ('Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Yield G') + >>> summary_columns(level='Read') + ('Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Yield G') + + The lane/surface level give another set of columns for the summary table + >>> summary_columns(level='Lane') + ('Cluster Count', 'Cluster Count Pf', 'Density', 'Density Pf', 'Error Rate', 'Error Rate 100', 'Error Rate 35', 'Error Rate 50', 'Error Rate 75', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupied', '% Pf', 'Phasing', 'Phasing Offset', 'Phasing Slope', 'Prephasing', 'Prephasing Offset', 'Prephasing Slope', 'Projected Yield G', 'Reads', 'Reads Pf', 'Tile Count', 'Yield G') + >>> summary_columns(level='Surface') + ('Cluster Count', 'Cluster Count Pf', 'Density', 'Density Pf', 'Error Rate', 'Error Rate 100', 'Error Rate 35', 'Error Rate 50', 'Error Rate 75', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupied', '% Pf', 'Phasing', 'Phasing Offset', 'Phasing Slope', 'Prephasing', 'Prephasing Offset', 'Prephasing Slope', 'Projected Yield G', 'Reads', 'Reads Pf', 'Tile Count', 'Yield G') + + :param level: level of the data to summarize, valid values include: 'Run', 'Read', 'Lane', 'Surface' (Default: Run) + :param ret_dict: if true, return a dict mapping from column name to method name (Default: False) + :return: tuple of columns - each column is a tuple, or a tuple of lambda functions that take the run_info as an argument + """ + + if level not in _summary_levels: + raise ValueError("level={} not in {}".format(str(level), str(_summary_levels))) + if level == 'Lane' or level == 'Surface': + summary_obj = interop_summary.lane_summary() + else: + summary_obj = interop_summary.read_summary().summary() + exclude_attrs = ('this', '_s', 'at', 'size', 'resize', 'resize_stat', 'lane', 'surface', 'cycle_state') + methods = tuple([v for v in dir(summary_obj) if not v.startswith('__') and v not in exclude_attrs]) + + def to_column_name(method): + return " ".join([v.capitalize().replace("Percent", "%") .replace("Gt", ">=") for v in method.split('_')]) + + def sub_cols(method): + + column_name = to_column_name(method) + if method == 'percent_base': + base_map = {interop_run.A: 'A', interop_run.C: 'C', interop_run.G: 'G', interop_run.T: 'T'} + return tuple([column_name+" "+base_map[i] for i in range(4)]) + if method == 'fwhm': + def fwhm_columns(run_info): + return tuple([column_name+" "+channel for channel in run_info.channels()]) + fwhm_columns.__name__ = 'fwhm_columns' + return fwhm_columns + if method == 'resynthesis': + def resynthesis_columns(run_info): + return tuple([column_name+" "+channel for channel in run_info.channels()]) + resynthesis_columns.__name__ = 'resynthesis_columns' + return resynthesis_columns + return tuple() + + if ret_dict: + return dict([(to_column_name(v), (v, sub_cols(v))) for v in methods]) + return tuple([to_column_name(c) for c in methods]) + + +def indexing(run_metrics, per_sample=True, dtype='f4', stype='O', **extra): + """ Convert InterOp run_metrics (or read run_metrics from disk) to a numpy structured array containing an + indexing table + + We can read an indexing table directly from a run folder. Note, this does not load all metrics, only those required + by the indexing table, e.g. IndexMetricsOut.bin + + >>> from interop import indexing + >>> ar = indexing("some/path/run_folder_name") # doctest: +SKIP + + Note that `valid_to_load` in `read` is ignored. + + + We can also convert a `run_metrics` object to an indexing table as follows + >>> ar = indexing(run_metrics_with_indexing) + >>> ar + array([(1., 1101., 'ATCACGAC-AAGGTTCA', '1', 4570., 900., 507.77777), + (1., 1101., 'ATCACGAC-GGGGGGGG', '2', 2343., 900., 260.33334)], + dtype=[('Lane', '>> ar = indexing(run_metrics_with_indexing, per_sample=False) + >>> ar + array([(1., 1101., 1000., 900., 768.11115)], + dtype=[('Lane', ' 0 else 1 + k = 0 + for i in range(index_metric_set.size()): + metric = index_metric_set.at(i) + if metric.read() != select_read: + continue + if per_sample: + for index_info in metric.indices(): + table[k] = (metric.lane() + , metric.tile() + , index_info.index_seq() + , index_info.sample_id() + , index_info.cluster_count() + , metric.cluster_count_pf() + , float(index_info.cluster_count()) / metric.cluster_count_pf() * 100.0 + ) + k += 1 + else: + table[i] = (metric.lane() + , metric.tile() + , metric.cluster_count() + , metric.cluster_count_pf() + , metric.percent_demultiplexed("") + ) + return table + + +def imaging(run_metrics, dtype='f4', **extra): + """ Convert InterOp run_metrics (or read run_metrics from disk) to a numpy structured array containing the imaging + table + + We can read an imaging table directly from a run folder. Note, this does not load all metrics, only those required + by the imaging table. See `load_imaging_metrics` for that list. + + Also note that loading only tile level metrics (e.g. metrics without cycles) will result in an empty table. This is + a limitation of the imaging table. + + >>> from interop import imaging + >>> from interop import load_imaging_metrics + >>> import interop.py_interop_run_metrics as interop_metrics + >>> import numpy as np + >>> ar = imaging("some/path/run_folder_name") # doctest: +SKIP + + The above function is equivalent to + >>> ar = imaging("some/path/run_folder_name", valid_to_load=load_imaging_metrics()) # doctest: +SKIP + + We can select a subset of metrics to include based on metric groups + >>> ar = imaging("some/path/run_folder_name", valid_to_load=['Error']) # doctest: +SKIP + + See `read` below for more examples. + + The following example will rely on an existing run_metrics object (possibly created by the `read` function below). + + >>> ar = imaging(run_metrics_example) + >>> ar + rec.array([(1., 1101., 1., 1., 1., 0.1, 10., 10., 25. , 33.3, 33.3, 33.3, 0., 10., 10., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.), + (1., 1101., 2., 1., 2., 0.2, 5., 15., 12.5, 42.9, 28.6, 28.6, 0., 5., 15., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.), + (1., 1101., 3., 1., 3., 0.3, 10., 10., 25. , 33.3, 50. , 16.7, 0., 10., 10., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.), + (1., 1101., 4., 2., 1., 0.4, 10., 5., 25. , 16.7, 50. , 33.3, 0., 10., 5., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.), + (1., 1101., 5., 3., 1., 0.5, 15., 5., 37.5, 20. , 40. , 40. , 0., 15., 5., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.)], + dtype=[('Lane', '>> ar.dtype + dtype((numpy.record, [('Lane', '>> import pandas as pd # doctest: +SKIP + >>> df = pd.DataFrame(ar) # doctest: +SKIP + >>> df # doctest: +SKIP + Lane ... Tile Number + 0 1.0 ... 1.0 + 1 1.0 ... 1.0 + 2 1.0 ... 1.0 + 3 1.0 ... 1.0 + 4 1.0 ... 1.0 + + [5 rows x 27 columns] + + You can also change the dtype of the resulting data array table. + >>> imaging(run_metrics_example, dtype=np.float32) + rec.array([(1., 1101., 1., 1., 1., 0.1, 10., 10., 25. , 33.3, 33.3, 33.3, 0., 10., 10., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.), + (1., 1101., 2., 1., 2., 0.2, 5., 15., 12.5, 42.9, 28.6, 28.6, 0., 5., 15., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.), + (1., 1101., 3., 1., 3., 0.3, 10., 10., 25. , 33.3, 50. , 16.7, 0., 10., 10., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.), + (1., 1101., 4., 2., 1., 0.4, 10., 5., 25. , 16.7, 50. , 33.3, 0., 10., 5., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.), + (1., 1101., 5., 3., 1., 0.5, 15., 5., 37.5, 20. , 40. , 40. , 0., 15., 5., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.)], + dtype=[('Lane', '>> imaging(interop_metrics.run_metrics()) + array([], dtype=float64) + + Here is an example exception if an improper input is given + >>> imaging(None) + Traceback (most recent call last): + ... + ValueError: Expected interop.py_interop_run_metrics.run_metrics or str for `run_metrics` + + :param run_metrics: py_interop_run_metrics.run_metrics or str file path to a run folder + :param dtype: data type for the array (Default: 'f4') + :param extra: all extra parameters are passed to `read` if the first parameter is a str file path to a run folder + :return: structured with column names and dype - np.array + """ + + if isinstance(run_metrics, str): + if extra.get('valid_to_load', None) is None: + extra['valid_to_load'] = load_imaging_metrics() + run_metrics = read(run_metrics, **extra) + if not isinstance(run_metrics, interop_metrics.run_metrics): + raise ValueError("Expected interop.py_interop_run_metrics.run_metrics or str for `run_metrics`") + + if run_metrics.empty(): + return np.asarray([]) + + columns = interop_table.imaging_column_vector() + interop_table.create_imaging_table_columns(run_metrics, columns) + row_offsets = interop_table.map_id_offset() + interop_table.count_table_rows(run_metrics, row_offsets) + column_count = interop_table.count_table_columns(columns) + data = np.zeros((len(row_offsets), column_count), dtype=dtype) + interop_table.populate_imaging_table_data(run_metrics, columns, row_offsets, data.ravel()) + + headers = [] + for i in range(columns.size()): + column = columns[i] + if column.has_children(): + headers.extend([str(column.name()) + "/" + str(subname).strip() for subname in column.subcolumns()]) + else: + headers.append(str(column.name())) + + if not isinstance(dtype, str): + dtype = np.dtype(dtype).str + + return np.core.records.fromarrays(data.transpose() + , names=",".join(headers) + , formats=",".join([dtype for _ in headers])) + + +def read(run, valid_to_load=None, requires=None, search_paths=None, **extra): + """ Read InterOp metrics into a run_metrics object + + - List of validate valid_to_load names can be gotten using `list_interop_files` + - If run is `interop.py_interop_run_metrics.run_metrics` then run is returned. + - If an InterOp file is missing from the `requires` list, then an empty run_metrics object is returned + + Read in all metrics from a run folder + >>> from interop import read + >>> metrics = read("some/path/run_folder_name") # doctest: +SKIP + + Read in only ErrorMetricsOut.bin in a run folder + >>> metrics = read("some/path/run_folder_name", valid_to_load=['Error']) # doctest: +SKIP + + Read in ErrorMetricsOut.bin and ExtractionMetricsOut.bin but if ErrorMetricsOut.bin is missing return an empty + >>> metrics = read("some/path/run_folder_name", valid_to_load=['Error', 'Extraction'], requires=['Error']) # doctest: +SKIP + + Read in IndexMetricsOut.bin and search for it outside the run folder in `fastq/reports` + >>> metrics = read("some/path/run_folder_name", valid_to_load=['Index'], search_paths=['fastq/reports']) # doctest: +SKIP + + Read in a run folder that is not found + >>> metrics = read("some/non/existing/run_folder_name") + Traceback (most recent call last): + ... + interop.py_interop_run.xml_file_not_found_exception: cannot open file some/non/existing/run_folder_name/RunInfo.xml + + Read from a None object + >>> metrics = read(None) + Traceback (most recent call last): + ... + ValueError: invalid null reference in method 'run_metrics_read', argument 2 of type 'std::string const &' + + :param run: string path including name of run folder (or run_metrics object) + :param valid_to_load: list of strings containing InterOp metric names (Default: None, load everything) + :param requires: list of required metric (Default: None, check nothing) + :param search_paths: list of paths to search when looking for `IndexMetricsOut.bin` (Default: None, do not search) + :return: interop.py_interop_run_metrics.run_metrics + """ + + if isinstance(run, interop_metrics.run_metrics): + return run + + if search_paths is None: + search_paths = (os.path.join('Analysis', '1', 'Data', 'Reports')) + if isinstance(search_paths, str): + search_paths = [search_paths] + if valid_to_load is None: + valid_to_load = [] + if requires is None: + requires = [] + + run_metrics = interop_metrics.run_metrics() + valid_to_load = create_valid_to_load(valid_to_load) + if valid_to_load is not None: + run_metrics.read(run, valid_to_load) + else: + run_metrics.read(run) + + if (valid_to_load is None or 'Index' in load_to_string_list(valid_to_load)) and run_metrics.index_metric_set().empty(): + for path in search_paths: + filename = os.path.join(run, path, "IndexMetricsOut.bin") + if not os.path.exists(filename): + continue + if run_metrics.run_info().name() == "": + run_metrics.read_xml(run) + run_metrics = read_metric(filename, run_metrics=run_metrics, finalize=True) + break + for group in requires: + if run_metrics.is_group_empty(group): + return interop_metrics.run_metrics() + + return run_metrics + + +def read_metric(filename, run_metrics=None, finalize=False): + """ Read a specific metric from a file into a run_metrics object + + This function allows incremental reading of metric files from disk. The last call should set + `finalize=True`. + + Read in `ErrorMetricsOut.bin` into a run_metrics object and finalize since this is the only metric we plan to read + + >>> from interop import read_metric + >>> metrics = read_metric("some/path/run_folder_name/InterOp/ErrorMetricsOut.bin", finalize=True) # doctest: +SKIP + + :param filename: path to InterOp file + :param run_metrics: existing run_metrics object (Default None, one will be created) + :param finalize: if true, then call finalize_after_load (last call to `read_metric` should set finalize=True) + :return: interop.py_interop_run_metrics.run_metrics + """ + + if run_metrics is None: + run_metrics = interop_metrics.run_metrics() + metric_group = group_from_filename(filename) + data = np.fromfile(filename, dtype=np.uint8) + run_metrics.read_metrics_from_buffer(metric_group, data) + if finalize: + run_metrics.finalize_after_load() + return run_metrics + + +def create_valid_to_load(interop_prefixes): + """ Create list of metrics valid to load by the InterOp library + + List of validate metric_names can be gotten using `list_interop_files` + + >>> from interop import create_valid_to_load + >>> int(create_valid_to_load(['Extraction'])[0]) + 0 + >>> create_valid_to_load(0) + Traceback (most recent call last): + ... + TypeError: Parameter valid_to_load must be a collection of values + + :param interop_prefixes: list of strings containing InterOp metric names + :return: py_interop_run.uchar_vector + """ + + if not hasattr(interop_prefixes, '__len__'): + raise TypeError("Parameter valid_to_load must be a collection of values") + + if interop_prefixes is None or len(interop_prefixes) == 0: + return None + + if isinstance(interop_prefixes, interop_run.uchar_vector): + return interop_prefixes + + valid_to_load = interop_run.uchar_vector(interop_run.MetricCount, 0) + enable_metrics(valid_to_load, interop_prefixes) + + return valid_to_load + + +def enable_metrics(valid_to_load, interop_prefixes): + """ Enable metrics in valid_to_load + + >>> from interop import enable_metrics, load_to_string_list + >>> import interop.py_interop_run as interop_run + >>> valid_to_load = interop_run.uchar_vector(interop_run.MetricCount, 0) + >>> load_to_string_list(enable_metrics(valid_to_load, 'Extraction')) + ['Extraction'] + >>> load_to_string_list(enable_metrics(valid_to_load, ['Error', 'Q'])) + ['Error', 'Extraction', 'Q'] + + Nothing changes when passing in an empty list + >>> load_to_string_list(enable_metrics(valid_to_load, [])) + ['Error', 'Extraction', 'Q'] + + Here are some example exceptions when the improper parameter is given + + >>> enable_metrics(valid_to_load, None) + Traceback (most recent call last): + ... + TypeError: 'NoneType' object is not iterable + >>> enable_metrics(None, []) + Traceback (most recent call last): + ... + TypeError: Parameter valid_to_load must be of type interop.py_interop_run.uchar_vector + >>> enable_metrics("None", []) + Traceback (most recent call last): + ... + TypeError: Parameter valid_to_load must be of type interop.py_interop_run.uchar_vector + + + :param valid_to_load: interop_run.uchar_vector (boolean array) + :param interop_prefixes: list of metrics to enable + :return: interop_run.uchar_vector (It is updated in-place so the return can be ignored) + """ + if not isinstance(valid_to_load, interop_run.uchar_vector): + raise TypeError("Parameter valid_to_load must be of type interop.py_interop_run.uchar_vector") + + if isinstance(interop_prefixes, str): + interop_prefixes = [interop_prefixes] + + for metric_name in interop_prefixes: + if interop_run.parse_metric_group(metric_name) >= interop_run.MetricCount: + raise ValueError("Cannot parse metric file name: {}".format(metric_name)) + valid_to_load[interop_run.parse_metric_group(metric_name)] = 1 + return valid_to_load + + +def load_to_string_list(valid_to_load): + """ Create a string list of names for each enabled metric in `valid_to_load` + + >>> from interop import create_valid_to_load, load_to_string_list + >>> import interop.py_interop_run as interop_run + >>> valid_to_load = create_valid_to_load('Extraction') + >>> load_to_string_list(valid_to_load) + ['Extraction'] + >>> valid_to_load = interop_run.uchar_vector(interop_run.MetricCount, 1) + >>> load_to_string_list(valid_to_load) + ['CorrectedInt', 'Error', 'Extraction', 'Image', 'Index', 'Q', 'Tile', 'QByLane', 'QCollapsed', 'EmpiricalPhasing', 'DynamicPhasing', 'ExtendedTile', 'SummaryRun'] + + :param valid_to_load: boolean buffer + :return: list of strings containing the name of each metric enabled in `valid_to_load` + """ + + if not isinstance(valid_to_load, interop_run.uchar_vector): + raise TypeError("Parameter valid_to_load must be of type interop.py_interop_run.uchar_vector") + + names = [] + for i in range(interop_run.MetricCount): + if valid_to_load[i] > 0: + names.append(interop_run.to_string_metric_group(i)) + return names + + +def group_from_filename(filename): + """ Get the metric group id from an InterOp filename path + + >>> from interop import group_from_filename + >>> import interop.py_interop_run as interop_run + >>> group_from_filename("some/path/run/InterOp/ExtractionMetricsOut.bin") + 2 + >>> interop_run.Extraction + 2 + + This group id can be used to load a metric from a binary buffer as in `interop.core.read_metric` + + :param filename: path to interop metric + :return: interop_run.metric_group + """ + + metric_name = os.path.basename(filename) + metric_name, ext = os.path.splitext(metric_name) + if ext != '.bin': + raise ValueError("InterOp file must have `bin` extension: {}".format(filename)) + if metric_name.endswith('Out'): + metric_name = metric_name[:-3] + if metric_name.endswith('Metrics'): + metric_name = metric_name[:-7] + else: + if ext != 'bin': + raise ValueError("InterOp file must have `Metrics.bin` or `MetricsOut.bin` suffix: {}".format(filename)) + group = interop_run.parse_metric_group(metric_name) + if group >= interop_run.MetricCount: + raise ValueError("Cannot identify InteropMetric from: {}".format(filename)) + return group + + +def load_imaging_metrics(): + """ List of valid imaging metrics to load + + >>> from interop import load_to_string_list + >>> from interop import load_imaging_metrics + >>> load_to_string_list(load_imaging_metrics()) + ['CorrectedInt', 'Error', 'Extraction', 'Image', 'Q', 'Tile', 'QByLane', 'QCollapsed', 'EmpiricalPhasing', 'DynamicPhasing', 'ExtendedTile'] + + :return: valid_to_load + """ + + valid_to_load = interop_run.uchar_vector(interop_run.MetricCount, 0) + interop_table.list_imaging_table_metrics_to_load(valid_to_load) + return valid_to_load + +######################################################################################################################## +# Functions and other code to support doc tests +######################################################################################################################## + + +def _run_info_example_fixture(): + """Fixture used for doctests""" + + run_name = "111111_UNKNOWN_1_XXYT" + run_info_version = 6 + run_date, instrument_name, run_number, flowcell_id = run_name.split('_') + lane_count = 1 + surface_count = 1 + swath_count = 1 + tile_count = 1 + sections_per_lane = 1 + lanes_per_section = 1 + naming_method = interop_run.FourDigit + tiles = ['1_1101'] + flowcell_layout = interop_run.flowcell_layout(lane_count + , surface_count + , swath_count + , tile_count + , sections_per_lane + , lanes_per_section + , tiles + , naming_method + , flowcell_id) + channels = ['green', 'blue'] + width = 7875 + height = 10500 + image_dimensions = interop_run.image_dimensions(width, height) + reads = interop_run.read_info_vector() + is_reverse_complement = False + first_cycle = 1 + for read_num, cycle_count, is_index in [(1, 3, False), (2, 1, True), (3, 1, True)]: + last_cycle = first_cycle + cycle_count + reads.push_back(interop_run.read_info(read_num, first_cycle, last_cycle, is_index, is_reverse_complement)) + first_cycle = last_cycle + run_info = interop_run.info(run_name + , run_date + , instrument_name + , int(run_number) + , run_info_version + , flowcell_layout + , channels + , image_dimensions + , reads) + return run_info + + +def _run_metrics_example_fixture(): + """Fixture used for doctests""" + + run_info = _run_info_example_fixture() + metrics = interop_metrics.run_metrics(run_info) + adapter_rate = np.nan + tile_num = 1101 + lane_num = 1 + + error_metric_set = metrics.error_metric_set() + for cycle, error_rate in enumerate([0.1, 0.2, 0.3, 0.4, 0.5]): + error_metric_set.insert(interop_metric_sets.error_metric(lane_num, tile_num, cycle+1, error_rate, adapter_rate)) + + corrected_int_metric_set = metrics.corrected_intensity_metric_set() + for cycle, call_counts in enumerate([[10, 10, 10, 10, 0], [5, 15, 10, 10, 0], [10, 10, 15, 5, 0], [10, 5, 15, 10, 0], [15, 5, 10, 10, 0]]): + call_counts = np.array(call_counts, dtype=np.uint32) + corrected_int_metric_set.insert(interop_metric_sets.corrected_intensity_metric( + lane_num, tile_num, cycle+1, call_counts, 0, 0)) # Dummy 0s are to work around swig bug + + extraction_metric_set = metrics.extraction_metric_set() + for cycle, call_counts in enumerate([[10, 10], [5, 15], [10, 10], [10, 5], [15, 5]]): + intensity_array = np.array(call_counts, dtype=np.uint16) + focus_array = np.array(call_counts, dtype=np.float32) + extraction_metric_set.insert(interop_metric_sets.extraction_metric( + lane_num, tile_num, cycle+1, intensity_array, focus_array, 0)) + return metrics + + +def _index_metrics_example_fixture(): + """Fixture used for doctests""" + + run_info = _run_info_example_fixture() + metrics = interop_metrics.run_metrics(run_info) + index_metric_set = metrics.index_metric_set() + + indices = interop_metric_sets.index_info_vector() + for barcode, sample_id, sample_proj, cluster_count in [ + ("ATCACGAC-AAGGTTCA", "1", "TSCAIndexes", 4570) + , ("ATCACGAC-GGGGGGGG", "2", "TSCAIndexes", 2343) + ]: + indices.push_back(interop_metric_sets.index_info(barcode, sample_id, sample_proj, cluster_count)) + tile_num = 1101 + lane_num = 1 + + for read_num in [2, 3]: + index_metric_set.insert(interop_metric_sets.index_metric(lane_num, tile_num, read_num, indices)); + + tile_metric_set = metrics.tile_metric_set() + reads = interop_metric_sets.read_metric_vector() + cluster_density = 1000 + cluster_density_pf = 900 + cluster_count = 1000 + cluster_count_pf = 900 + tile_metric_set.insert(interop_metric_sets.tile_metric(lane_num + , tile_num + , cluster_density + , cluster_density_pf + , cluster_count + , cluster_count_pf + , reads)) + + metrics.finalize_after_load() + return metrics + + +# class RunFolderToDiskFixture(object): +# +# def __init__(self): +# +# self.run_folder = os.path.absolue("./210326_") +# run = _run_metrics_example_fixture() +# run.write +# +# def __del__(self): +# if self.run_folder is not None: +# import shutil +# shutil.deltree(self.run_folder) + + +def _run_doctests(): + + import interop.core + import doctest + import sys + failure_count, test_count = doctest.testmod(interop.core + , optionflags=doctest.IGNORE_EXCEPTION_DETAIL + , globs=dict( + run_metrics_with_indexing=_index_metrics_example_fixture(), + run_metrics_example=_run_metrics_example_fixture() + )) + if failure_count > 0: + sys.exit(1) + + + +if __name__ == "__main__": + + _run_doctests() diff --git a/src/ext/swig/arrays/arrays_numpy_impl.i b/src/ext/swig/arrays/arrays_numpy_impl.i index ef96487f6..b3ba232db 100644 --- a/src/ext/swig/arrays/arrays_numpy_impl.i +++ b/src/ext/swig/arrays/arrays_numpy_impl.i @@ -12,4 +12,29 @@ import_array(); %apply (float* INPLACE_ARRAY1, int DIM1) {(float* buffer, size_t buffer_size)} %apply (unsigned int* INPLACE_ARRAY1, int DIM1) {(::uint32_t* id_buffer, size_t id_buffer_size)} %apply (float* INPLACE_ARRAY1, int DIM1) {(float* data_beg, const size_t n)} +%apply (float* INPLACE_ARRAY1, int DIM1) {(float* input, size_t input_size)} +%apply (float* INPLACE_ARRAY1, int DIM1) {(float* output, size_t output_size)} +%apply (float* INPLACE_ARRAY1, int DIM1) {(float* matrix, size_t matrix_size)} + +%apply (unsigned int* INPLACE_ARRAY1, int DIM1) {(const ::uint32_t* called_counts, const size_t num_of_counts)} +%apply (unsigned short* INPLACE_ARRAY1, int DIM1) {(const ::uint16_t* intensity_values, const size_t intensity_count)} +%apply (float* INPLACE_ARRAY1, int DIM1) {(const float* focus_scores, const size_t focus_count)} + +%apply (unsigned short* INPLACE_ARRAY1, int DIM1) {(const ::uint16_t* buffer, const size_t count)} +%apply (unsigned char* INPLACE_ARRAY1, int DIM1) {(const ::uint8_t* buffer, const size_t count)} +%apply (int* INPLACE_ARRAY1, int DIM1) {(const ::int32_t* buffer, const size_t num_of_counts)} +%apply (short* INPLACE_ARRAY1, int DIM1) {(const ::int16_t* buffer, const size_t count)} +%apply (char* INPLACE_ARRAY1, int DIM1) {(const ::int8_t* buffer, const size_t count)} +%apply (float* INPLACE_ARRAY1, int DIM1) {(const float* buffer, const size_t count)} +%apply (double* INPLACE_ARRAY1, int DIM1) {(const double* buffer, const size_t count)} + +#if defined(SWIGWORDSIZE64) +%apply (unsigned long* INPLACE_ARRAY1, int DIM1) {(::uint64_t* data_int_beg, const size_t n)} +%apply (unsigned long* INPLACE_ARRAY1, int DIM1) {(::uint64_t* buffer, const size_t count)} +%apply (long* INPLACE_ARRAY1, int DIM1) {(::int64_t* buffer, const size_t count)} +#else +%apply (unsigned long long* INPLACE_ARRAY1, int DIM1) {(::uint64_t* data_int_beg, const size_t n)} +%apply (unsigned long long* INPLACE_ARRAY1, int DIM1) {(::uint64_t* buffer, const size_t count)} +%apply (long long* INPLACE_ARRAY1, int DIM1) {(const ::int64_t* buffer, const size_t count)} +#endif diff --git a/src/ext/swig/run.i b/src/ext/swig/run.i index 0930ac3c1..9fe2f4291 100644 --- a/src/ext/swig/run.i +++ b/src/ext/swig/run.i @@ -4,6 +4,7 @@ %include %include %include +%include "src/ext/swig/arrays/arrays_impl.i" %include "src/ext/swig/exceptions/exceptions_impl.i" %include "util/operator_overload.i" diff --git a/src/ext/swig/run_metrics.i b/src/ext/swig/run_metrics.i index c1bb4ad3e..48c8b8c91 100644 --- a/src/ext/swig/run_metrics.i +++ b/src/ext/swig/run_metrics.i @@ -76,6 +76,7 @@ RUN_METRICS_EXCEPTION_WRAPPER(WRAP_EXCEPTION) %{ +#include "interop/config.h" #include "interop/logic/metric/extraction_metric.h" #include "interop/logic/metric/q_metric.h" #include "interop/logic/utils/metric_type_ext.h" diff --git a/src/ext/swig/summary.i b/src/ext/swig/summary.i index d4da6ec6d..48448038f 100644 --- a/src/ext/swig/summary.i +++ b/src/ext/swig/summary.i @@ -55,6 +55,7 @@ RUN_METRICS_EXCEPTION_WRAPPER(WRAP_EXCEPTION_IMPORT) // Summary model //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// %{ +#include "interop/config.h" #include "interop/model/summary/cycle_state_summary.h" #include "interop/model/summary/stat_summary.h" #include "interop/model/summary/surface_summary.h" diff --git a/src/ext/swig/table.i b/src/ext/swig/table.i index fdacd3f95..7de7ed809 100644 --- a/src/ext/swig/table.i +++ b/src/ext/swig/table.i @@ -67,6 +67,7 @@ TABLE_EXCEPTION_WRAPPER(WRAP_EXCEPTION) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// %{ +#include "interop/config.h" #include "interop/logic/table/create_imaging_table_columns.h" #include "interop/logic/table/create_imaging_table.h" %} @@ -76,6 +77,7 @@ TABLE_EXCEPTION_WRAPPER(WRAP_EXCEPTION) %include "interop/model/table/table_exceptions.h" + %template(imaging_column_vector) std::vector< illumina::interop::model::table::imaging_column >; @@ -84,3 +86,15 @@ TABLE_EXCEPTION_WRAPPER(WRAP_EXCEPTION) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// %include "interop/logic/table/create_imaging_table.h" %include "interop/logic/table/create_imaging_table_columns.h" + +%define WRAP_TABLE_ENUM(ENUM) +%template(list_##ENUM) illumina::interop::constants::list_enum_names< illumina::interop::model::table:: ENUM >; +%template(parse_##ENUM) illumina::interop::constants::parse< illumina::interop::model::table:: ENUM >; +#if defined(SWIGPYTHON) +%template(to_string_##ENUM) illumina::interop::constants::to_string< illumina::interop::model::table:: ENUM >; +#else +%template(to_string) illumina::interop::constants::to_string< illumina::interop::model::table:: ENUM >; +#endif +%enddef + +WRAP_TABLE_ENUM(column_id) \ No newline at end of file diff --git a/src/interop/logic/plot/plot_sample_qc.cpp b/src/interop/logic/plot/plot_sample_qc.cpp index 7e86a99b4..23e3461b1 100644 --- a/src/interop/logic/plot/plot_sample_qc.cpp +++ b/src/interop/logic/plot/plot_sample_qc.cpp @@ -80,7 +80,7 @@ namespace illumina { namespace interop { namespace logic { namespace plot void plot_sample_qc(model::metrics::run_metrics &metrics, const size_t lane, model::plot::plot_data &data) - INTEROP_THROW_SPEC((model::index_out_of_bounds_exception)) + INTEROP_THROW_SPEC((model::index_out_of_bounds_exception, std::bad_alloc)) { typedef model::plot::series bar_series_t; data.clear(); diff --git a/src/tests/csharp/metrics/ExtendedTileMetricsTest.cs b/src/tests/csharp/metrics/ExtendedTileMetricsTest.cs index 259173c75..2fdcf3b54 100644 --- a/src/tests/csharp/metrics/ExtendedTileMetricsTest.cs +++ b/src/tests/csharp/metrics/ExtendedTileMetricsTest.cs @@ -4,7 +4,7 @@ using Illumina.InterOp.Metrics; using Illumina.InterOp.Comm; -namespace illumina.interop.csharp.unittest +namespace Illumina.InterOp.Interop.UnitTest { /// /// Confirm that the Extended Tile metrics InterOp works properly in C# diff --git a/src/tests/csharp/metrics/PerformanceTest.cs b/src/tests/csharp/metrics/PerformanceTest.cs index 8ded3f92e..9265760a3 100644 --- a/src/tests/csharp/metrics/PerformanceTest.cs +++ b/src/tests/csharp/metrics/PerformanceTest.cs @@ -4,7 +4,7 @@ using Illumina.InterOp.Metrics; using Illumina.InterOp.RunMetrics; -namespace Illumina.InterOp.UnitTest +namespace Illumina.InterOp.Interop.UnitTest { /// /// Test the performance of tabulating a large number of extraction metrics diff --git a/src/tests/csharp/run/RunInfoTest.cs b/src/tests/csharp/run/RunInfoTest.cs index 7cc664e94..3b5615c33 100644 --- a/src/tests/csharp/run/RunInfoTest.cs +++ b/src/tests/csharp/run/RunInfoTest.cs @@ -4,7 +4,7 @@ using System.Collections.Generic; using Illumina.InterOp.Run; -namespace illumina.interop.csharp.unittest +namespace Illumina.InterOp.Interop.UnitTest { /// /// Confirm that the run info XML parsing works properly in C# diff --git a/src/tests/csharp/run/RunParametersTest.cs b/src/tests/csharp/run/RunParametersTest.cs index 03e334dd3..684e1c28f 100644 --- a/src/tests/csharp/run/RunParametersTest.cs +++ b/src/tests/csharp/run/RunParametersTest.cs @@ -4,7 +4,7 @@ using System.Collections.Generic; using Illumina.InterOp.Run; -namespace illumina.interop.csharp.unittest +namespace Illumina.InterOp.Interop.UnitTest { /// /// Confirm that the run info XML parsing works properly in C# diff --git a/src/tests/python/CMakeLists.txt b/src/tests/python/CMakeLists.txt index 5f9accbad..26e8c7469 100644 --- a/src/tests/python/CMakeLists.txt +++ b/src/tests/python/CMakeLists.txt @@ -24,12 +24,12 @@ if(NOT PYTHONINTERP_FOUND) endif() set(PYTHON_TEST_ENABLED ON PARENT_SCOPE) + add_custom_target(check_python COMMENT "Running Python Unit tests" COMMAND ${CMAKE_COMMAND} -E copy_directory $ ${CMAKE_CURRENT_BINARY_DIR}/interop - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/CoreTests.py ${CMAKE_CURRENT_BINARY_DIR}/interop COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/DepTests.py ${CMAKE_CURRENT_BINARY_DIR}/interop - COMMAND ${PYTHON_EXECUTABLE} interop/CoreTests.py + COMMAND ${PYTHON_EXECUTABLE} -m interop --test COMMAND ${PYTHON_EXECUTABLE} interop/DepTests.py WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) diff --git a/tools/package.bat b/tools/package.bat index 1e463c9d1..e4522ad88 100644 --- a/tools/package.bat +++ b/tools/package.bat @@ -37,7 +37,7 @@ set SUFFIX='] set python_version= if NOT "%1" == "" ( -set BUILD_TYPE=%1 +set BUILD_TYPE=%1% ) if NOT '%2' == '' ( set COMPILER=%2% @@ -65,8 +65,12 @@ echo "Create environment: %python_version%" conda create -n py%python_version% python=%python_version% numpy wheel -y || echo "Environment exists" echo "Activate py%python_version%" call activate py%python_version% +if %errorlevel% neq 0 exit /b %errorlevel% +pip install --upgrade --force-reinstall numpy +pip install pandas :SKIP_CONDA_UPDATE + rem Clean build and dist directories if exist %BUILD_DIR% rd /s /q %BUILD_DIR% if exist %DIST_DIR% rd /s /q %DIST_DIR% diff --git a/tools/package.sh b/tools/package.sh index 28ef6ae50..13e61b190 100644 --- a/tools/package.sh +++ b/tools/package.sh @@ -154,6 +154,7 @@ if [ -z $PYTHON_VERSION ] && [ -e /opt/python ] ; then if [[ "$PYBUILD" == cp33* ]]; then continue fi + ${PYTHON_BIN}/python -m pip install pandas rm -fr ${BUILD_PATH}/src/ext/python/* run "Configure ${PYBUILD}" cmake $SOURCE_PATH -B${BUILD_PATH} -DPYTHON_EXECUTABLE=${PYTHON_BIN}/python ${CMAKE_EXTRA_FLAGS} -DSKIP_PACKAGE_ALL_WHEEL=ON -DPYTHON_WHEEL_PREFIX=${ARTIFACT_PATH}/tmp @@ -205,6 +206,7 @@ if [ "$PYTHON_VERSION" != "" ] && [ "$PYTHON_VERSION" != "Disable" ] && [ "$PYTH fi conda install numpy -y --name py${python_version} conda install wheel -y --name py${python_version} + conda install pandas -y --name py${python_version} elif hash pyenv 2> /dev/null; then export PATH=$(pyenv root)/shims:${PATH} diff --git a/tools/prereqs/docker-centos5-install.sh b/tools/prereqs/docker-centos5-install.sh index ca148d44e..7153194c0 100644 --- a/tools/prereqs/docker-centos5-install.sh +++ b/tools/prereqs/docker-centos5-install.sh @@ -35,7 +35,6 @@ JAVA_URL="https://download.oracle.com/otn/java/jdk/8u131-b11/d54c1d3a095b4ff2b66 JAVA_URL="https://ussd.artifactory.illumina.com/list/generic-bioinformatics/BuildDeps/interop/jdk-8u131-linux-x64.rpm" # TODO use openjdk? yum install java-1.8.0-openjdk VALGRIND_URL="http://www.valgrind.org/downloads/valgrind-3.14.0.tar.bz2" -DOXYGEN_URL="https://sourceforge.net/projects/doxygen/files/rel-1.8.10/doxygen-1.8.10.linux.bin.tar.gz" DOXYGEN_URL="https://sourceforge.net/projects/doxygen/files/rel-1.8.10/doxygen-1.8.10.linux.bin.tar.gz/download?use_mirror=managedway&r=&use_mirror=managedway#" WGET_URL="http://ftp.gnu.org/gnu/wget/wget-1.19.tar.gz" PROG_HOME=/opt @@ -70,7 +69,7 @@ else if [[ "$PYBUILD" == cp33* ]]; then continue fi - "/opt/python/${PYBUILD}/bin/pip" install numpy + "/opt/python/${PYBUILD}/bin/pip" install numpy pandas done # Current version 1.7.0 of auditwheel fails when building a fake pure Python lib with shared libs in data diff --git a/tools/prereqs/docker-centos7-install.sh b/tools/prereqs/docker-centos7-install.sh index 001754b06..07a297373 100644 --- a/tools/prereqs/docker-centos7-install.sh +++ b/tools/prereqs/docker-centos7-install.sh @@ -43,12 +43,6 @@ yum install -y python-devel mkdir /opt/dotnet wget --no-check-certificate --quiet -O - ${DOTNET_URL} | tar --strip-components=1 -xz -C /opt/dotnet -curl "https://bootstrap.pypa.io/get-pip.py" -o "get-pip.py" -python get-pip.py - -pip install auditwheel==1.5.0 -pip install numpy - if hash cmake 2> /dev/null; then echo "Found CMake" else @@ -150,7 +144,6 @@ which java which cmake which mono - gcc --version swig -version java -version diff --git a/tools/prereqs/env_windows.bat b/tools/prereqs/env_windows.bat index a7b632a13..5d233a400 100644 --- a/tools/prereqs/env_windows.bat +++ b/tools/prereqs/env_windows.bat @@ -144,7 +144,7 @@ call conda create --no-default-packages -n py%python_version% python=%python_ver echo "Activate py%python_version%" call activate py%python_version% if %errorlevel% neq 0 exit /b %errorlevel% -call conda install numpy wheel -y +call conda install numpy wheel pandas -y if %errorlevel% neq 0 exit /b %errorlevel% :SKIP_CONDA_UPDATE diff --git a/tools/teamcity/pull_request/centos7_gcc-485-debug.sh b/tools/teamcity/pull_request/centos7_gcc-485-debug.sh new file mode 100644 index 000000000..75819387f --- /dev/null +++ b/tools/teamcity/pull_request/centos7_gcc-485-debug.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +build_number=$1 +sh ./tools/package_docker.sh docker-bioinformatics.dockerhub.illumina.com/interop_centos7 "${build_number}" "-DENABLE_VERACODE=ON" Debug + + diff --git a/tools/teamcity/pull_request/centos7_gcc-485-ppc.sh b/tools/teamcity/pull_request/centos7_gcc-485-ppc.sh new file mode 100644 index 000000000..a4d0191c5 --- /dev/null +++ b/tools/teamcity/pull_request/centos7_gcc-485-ppc.sh @@ -0,0 +1,23 @@ +#!/bin/bash + + +branch=$1 +build_num=$2 +docker_image="multiarch/qemu-user-static:register" + +docker ps 1>/dev/null 2>&1 || prefix=sudo; + +${prefix} docker pull ${docker_image} +if [ "$?" != "0" ] ; then + yes | ${prefix} docker system prune -a +fi + +${prefix} docker run --rm --privileged ${docker_image} --reset +if [ "$?" != "0" ] ; then + exit 1 +fi + +sh ./tools/package_docker.sh docker-bioinformatics.dockerhub.illumina.com/ppc64le/interop_centos7 ${build_num} "-DDISABLE_DYNAMIC_ARRAY=ON -DENABLE_FLOAT_COMPRESSION=OFF -DENABLE_DOCS=OFF -DENABLE_SWIG=OFF -DBUILD_SAMPLE_SHEET_ONLY=ON -DENABLE_EXAMPLES=OFF -DENABLE_APPS=OFF" +if [ "$?" != "0" ] ; then + exit 1 +fi diff --git a/tools/teamcity/pull_request/msvc.bat b/tools/teamcity/pull_request/msvc.bat new file mode 100644 index 000000000..0fc48b9b4 --- /dev/null +++ b/tools/teamcity/pull_request/msvc.bat @@ -0,0 +1,55 @@ +set GENERATOR="Visual Studio 14 2015" +set PY= +set CS_BUILD="DotNet" +set TARGET=bundle + +set build_number=%1% +if NOT '%2' == '' ( +set GENERATOR=%2% +) +if NOT "%3" == "" ( +set CS_BUILD=%3% +) +if NOT "%4" == "" ( +set PY=%4% +) +if NOT "%5" == "" ( +set TARGET=%5% +) + +if "%PY%" == "2.7" ( +set EXTRA="conda=4.5.3" +) + +echo "Generator: %GENERATOR%" +echo "CS_BUILD: %CS_BUILD%" +echo "PY: %PY%" +echo "EXTRA: %EXTRA%" + + +echo "##teamcity[blockOpened name='Install Deps']" +call tools\prereqs\env_windows.bat %PY% %EXTRA% +if %errorlevel% neq 0 exit /b %errorlevel% +echo "##teamcity[blockClosed name='Install Deps']" + +echo "##teamcity[blockOpened name='Disable Float Compression %PY%']" +call tools\package.bat Release %GENERATOR% %TARGET% "-Ax64 -DBUILD_NUMBER=%build_number% -DENABLE_FLOAT_COMPRESSION=OFF -DCSBUILD_TOOL=%CS_BUILD%" %PY% +set errorcode=%errorlevel% +type %CD%\build\CMakeFiles\CMakeOutput.log +if %errorcode% neq 0 exit /b %errorcode% +echo "##teamcity[blockClosed name='Disable Float Compression %PY%']" + +echo "##teamcity[blockOpened name='Disable Dynamic Array %PY%']" +call tools\package.bat Release %GENERATOR% %TARGET% "-Ax64 -DBUILD_NUMBER=%build_number% -DDISABLE_DYNAMIC_ARRAY=OFF -DCSBUILD_TOOL=%CS_BUILD%" %PY% +if %errorlevel% neq 0 exit /b %errorlevel% +echo "##teamcity[blockClosed name='Disable Dynamic Array %PY%']" + +echo "##teamcity[blockOpened name='Python %PY%']" +call tools\package.bat Release %GENERATOR% %TARGET% "-Ax64 -DBUILD_NUMBER=%build_number% -DCSBUILD_TOOL=%CS_BUILD%" %PY% +if %errorlevel% neq 0 exit /b %errorlevel% +echo "##teamcity[blockClosed name='Python %PY%']" + +echo "##teamcity[blockOpened name='RTA Settings %PY%']" +call tools\package.bat Release %GENERATOR% %TARGET% "-Ax64 -DENABLE_BACKWARDS_COMPATIBILITY=OFF -DENABLE_EXAMPLES=OFF -DENABLE_DOCS=OFF -DENABLE_SWIG=OFF -DENABLE_TEST=OFF -DENABLE_APPS=OFF -DENABLE_PYTHON=OFF -DDISABLE_DYNAMIC_ARRAY=ON -DENABLE_FLOAT_COMPRESSION=OFF -DENABLE_PORTABLE=ON" %PY% +if %errorlevel% neq 0 exit /b %errorlevel% +echo "##teamcity[blockClosed name='RTA Settings %PY%']" \ No newline at end of file diff --git a/tools/teamcity/pull_request/msvc_2015_py27_dotnet.bat b/tools/teamcity/pull_request/msvc_2015_py27_dotnet.bat new file mode 100644 index 000000000..9769541da --- /dev/null +++ b/tools/teamcity/pull_request/msvc_2015_py27_dotnet.bat @@ -0,0 +1,2 @@ +set build_number=%1% +call tools\teamcity\pull_request\msvc.bat %build_number% "Visual Studio 14 2015" DotNet 2.7 diff --git a/tools/teamcity/pull_request/msvc_2017_py27_dotnet.bat b/tools/teamcity/pull_request/msvc_2017_py27_dotnet.bat new file mode 100644 index 000000000..4d2520055 --- /dev/null +++ b/tools/teamcity/pull_request/msvc_2017_py27_dotnet.bat @@ -0,0 +1,2 @@ +set build_number=%1% +call tools\teamcity\pull_request\msvc.bat %build_number% "Visual Studio 15 2017" DotNet 2.7 diff --git a/tools/teamcity/pull_request/msvc_2017_py27_dotnetstandard.bat b/tools/teamcity/pull_request/msvc_2017_py27_dotnetstandard.bat new file mode 100644 index 000000000..f0c22a52d --- /dev/null +++ b/tools/teamcity/pull_request/msvc_2017_py27_dotnetstandard.bat @@ -0,0 +1,2 @@ +set build_number=%1% +call tools\teamcity\pull_request\msvc.bat %build_number% "Visual Studio 15 2017" DotNetStandard 2.7 diff --git a/tools/teamcity/pull_request/msvc_2017_py34_dotnet.bat b/tools/teamcity/pull_request/msvc_2017_py34_dotnet.bat new file mode 100644 index 000000000..1cf99b73b --- /dev/null +++ b/tools/teamcity/pull_request/msvc_2017_py34_dotnet.bat @@ -0,0 +1,2 @@ +set build_number=%1% +call tools\teamcity\pull_request\msvc.bat %build_number% "Visual Studio 15 2017" DotNet 3.4 package_wheel diff --git a/tools/teamcity/pull_request/msvc_2017_py35_dotnet.bat b/tools/teamcity/pull_request/msvc_2017_py35_dotnet.bat new file mode 100644 index 000000000..3b8895f6c --- /dev/null +++ b/tools/teamcity/pull_request/msvc_2017_py35_dotnet.bat @@ -0,0 +1,2 @@ +set build_number=%1% +call tools\teamcity\pull_request\msvc.bat %build_number% "Visual Studio 15 2017" DotNet 3.5 package_wheel diff --git a/tools/teamcity/pull_request/msvc_2017_py36_dotnet.bat b/tools/teamcity/pull_request/msvc_2017_py36_dotnet.bat new file mode 100644 index 000000000..1ebdce559 --- /dev/null +++ b/tools/teamcity/pull_request/msvc_2017_py36_dotnet.bat @@ -0,0 +1,2 @@ +set build_number=%1% +call tools\teamcity\pull_request\msvc.bat %build_number% "Visual Studio 15 2017" DotNet 3.6 package_wheel diff --git a/tools/teamcity/pull_request/msvc_2017_py37_dotnet.bat b/tools/teamcity/pull_request/msvc_2017_py37_dotnet.bat new file mode 100644 index 000000000..be09fe9cb --- /dev/null +++ b/tools/teamcity/pull_request/msvc_2017_py37_dotnet.bat @@ -0,0 +1,2 @@ +set build_number=%1% +call tools\teamcity\pull_request\msvc.bat %build_number% "Visual Studio 15 2017" DotNet 3.7 package_wheel diff --git a/tools/teamcity/pull_request/msvc_2017_py38_dotnet.bat b/tools/teamcity/pull_request/msvc_2017_py38_dotnet.bat new file mode 100644 index 000000000..35a7f6ee1 --- /dev/null +++ b/tools/teamcity/pull_request/msvc_2017_py38_dotnet.bat @@ -0,0 +1,2 @@ +set build_number=%1% +call tools\teamcity\pull_request\msvc.bat %build_number% "Visual Studio 15 2017" DotNet 3.8 package_wheel diff --git a/tools/teamcity/pull_request/msvc_2019_py27_dotnet.bat b/tools/teamcity/pull_request/msvc_2019_py27_dotnet.bat new file mode 100644 index 000000000..777135076 --- /dev/null +++ b/tools/teamcity/pull_request/msvc_2019_py27_dotnet.bat @@ -0,0 +1,2 @@ +set build_number=%1% +call tools\teamcity\pull_request\msvc.bat %build_number% "Visual Studio 16 2019" DotNet 3.8