diff --git a/Analysis_Platform(x64).lnk b/Analysis_Platform(x64).lnk
new file mode 100644
index 0000000..4cc22e0
Binary files /dev/null and b/Analysis_Platform(x64).lnk differ
diff --git a/Analysis_Platform(x86).lnk b/Analysis_Platform(x86).lnk
new file mode 100644
index 0000000..d592df8
Binary files /dev/null and b/Analysis_Platform(x86).lnk differ
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..4f0b560
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,2415 @@
+# License of Analysis Platform + DN7 (Digital Native QC 7 Tools)
+
+The MIT License (MIT)
+
+Copyright (c) 2019-2021 DENSO Corporation, F-IoT DAC Team & Rainbow7 + Bridge7, Tatsunori Kojo, Genta Kikuchi, Sho Takahashi, Takero Arakawa, Le Sy Khanh Duy, Tran Ngoc Tinh, Nguyen Van Hoai, Ho Hoang Tung, Pham Minh Hoang, Tran Thi Kim Tuyen, Toshikuni Shinohara and DENSO contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+
+
+
+# Redistributed Content
+The Content includes items that have been sourced from third parties as follows:
+
+## MIT
+
+### js/Chart.bundle.min.js
+---------------------
+
+* Chart.js v2.9.4
+* https://www.chartjs.org
+* (c) 2020 Chart.js Contributors
+
+> licensed under the MIT License
+
+### js/Chart.min.js
+---------------------
+
+* Chart.js v3.3.0
+* https://www.chartjs.org
+* (c) 2021 Chart.js Contributors
+
+> licensed under the MIT License
+
+### css/all.min.css
+---------------------
+
+* Font Awesome Free 5.9.0 by @fontawesome - https://fontawesome.com
+* License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License)
+
+### js/all.min.js
+---------------------
+
+* Font Awesome Free 5.9.0 by @fontawesome - https://fontawesome.com
+* License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License)
+
+### css/bootstrap-select.min.css
+---------------------
+
+* Bootstrap-select v1.12.4 (http://silviomoreto.github.io/bootstrap-select)
+*
+* Copyright 2013-2017 bootstrap-select
+* Licensed under MIT (https://github.com/silviomoreto/bootstrap-select/blob/master/LICENSE)
+
+### js/bootstrap-select.min.js
+---------------------
+
+* Bootstrap-select v1.12.4 (http://silviomoreto.github.io/bootstrap-select)
+*
+* Copyright 2013-2017 bootstrap-select
+* Licensed under MIT (https://github.com/silviomoreto/bootstrap-select/blob/master/LICENSE)
+
+> licensed under the MIT License
+
+### js/bootstrap-table-filter-control.min.js
+---------------------
+
+* bootstrap-table - An extended table to integration with some of the most widely used CSS frameworks. (Supports Bootstrap, Semantic UI, Bulma, Material Design, Foundation)
+*
+* @version v1.16.0
+* @homepage https://bootstrap-table.com
+* @author wenzhixin (http://wenzhixin.net.cn/)
+* @license MIT
+
+> licensed under the MIT License
+
+### js/bootstrap-table-locale-all.min.js
+---------------------
+
+* bootstrap-table - An extended table to integration with some of the most widely used CSS frameworks. (Supports Bootstrap, Semantic UI, Bulma, Material Design, Foundation)
+*
+* @version v1.16.0
+* @homepage https://bootstrap-table.com
+* @author wenzhixin (http://wenzhixin.net.cn/)
+* @license MIT
+
+> licensed under the MIT License
+
+### css/bootstrap-table.min.css
+---------------------
+
+* bootstrap-table - An extended table to integration with some of the most widely used CSS frameworks. (Supports Bootstrap, Semantic UI, Bulma, Material Design, Foundation)
+*
+* @version v1.16.0
+* @homepage https://bootstrap-table.com
+* @author wenzhixin (http://wenzhixin.net.cn/)
+* @license MIT
+
+> licensed under the MIT License
+
+### js/bootstrap-table.min.js
+---------------------
+
+* bootstrap-table - An extended table to integration with some of the most widely used CSS frameworks. (Supports Bootstrap, Semantic UI, Bulma, Material Design, Foundation)
+*
+* @version v1.16.0
+* @homepage https://bootstrap-table.com
+* @author wenzhixin (http://wenzhixin.net.cn/)
+* @license MIT
+
+> licensed under the MIT License
+
+### css/bootstrap-theme.css
+---------------------
+
+* Bootstrap v3.3.7 (http://getbootstrap.com)
+* Copyright 2011-2016 Twitter, Inc.
+* Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
+
+> licensed under the MIT License
+
+### css/bootstrap-theme.css.map
+---------------------
+
+* Bootstrap v3.3.7 (http://getbootstrap.com)
+* Copyright 2011-2016 Twitter, Inc.
+* Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
+
+> licensed under the MIT License
+
+### css/bootstrap-theme.min.css
+---------------------
+
+* Bootstrap v3.3.7 (http://getbootstrap.com)
+* Copyright 2011-2016 Twitter, Inc.
+* Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
+
+> licensed under the MIT License
+
+### css/bootstrap-theme.min.css.map
+---------------------
+
+* Bootstrap v3.3.7 (http://getbootstrap.com)
+* Copyright 2011-2016 Twitter, Inc.
+* Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
+
+> licensed under the MIT License
+
+### css/bootstrap.css
+---------------------
+
+* Bootstrap v3.3.7 (http://getbootstrap.com)
+* Copyright 2011-2016 Twitter, Inc.
+* Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
+*
+* normalize.css v3.0.3 | MIT License | github.com/necolas/normalize.css
+
+> licensed under the MIT License
+
+### css/bootstrap.css.map
+---------------------
+
+* Bootstrap v3.3.7 (http://getbootstrap.com)
+* Copyright 2011-2016 Twitter, Inc.
+* Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
+*
+* normalize.css v3.0.3 | MIT License | github.com/necolas/normalize.css
+
+> licensed under the MIT License
+
+### js/bootstrap.js
+---------------------
+
+* Bootstrap v3.3.7 (http://getbootstrap.com)
+* Copyright 2011-2016 Twitter, Inc.
+
+> licensed under the MIT License
+
+### css/bootstrap.min.css
+---------------------
+
+* Bootswatch v4.3.1
+* Homepage: https://bootswatch.com
+* Copyright 2012-2019 Thomas Park
+* Licensed under MIT
+* Based on Bootstrap
+*
+* Bootstrap v4.3.1 (https://getbootstrap.com/)
+* Copyright 2011-2019 The Bootstrap Authors
+* Copyright 2011-2019 Twitter, Inc.
+* Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
+
+### css/bootstrap.min.css.map
+---------------------
+
+* Bootstrap v3.3.7 (http://getbootstrap.com)
+* Copyright 2011-2016 Twitter, Inc.
+* Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
+*
+* normalize.css v3.0.3 | MIT License | github.com/necolas/normalize.css
+
+### js/bootstrap.min.js
+---------------------
+
+* Bootstrap v4.3.1 (https://getbootstrap.com/)
+* Copyright 2011-2019 The Bootstrap Authors (https://github.com/twbs/bootstrap/graphs/contributors)
+* Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
+
+> licensed under the MIT License
+
+### js/chartjs-adapter-moment.min.js
+---------------------
+
+* chartjs-adapter-moment v1.0.0
+* https://www.chartjs.org
+* (c) 2021 chartjs-adapter-moment Contributors
+
+> licensed under the MIT License
+
+### js/chartjs-plugin-annotation-latest.min.js
+---------------------
+
+* chartjs-plugin-annotation v1.0.1
+* https://www.chartjs.org/chartjs-plugin-annotation/index
+* (c) 2021 chartjs-plugin-annotation Contributors
+
+> licensed under the MIT License
+
+### js/chartjs-plugin-annotation.min.js
+---------------------
+
+* chartjs-plugin-annotation.js
+* http://chartjs.org/
+* Version: 0.5.7
+*
+* Copyright 2016 Evert Timberg
+* https://github.com/chartjs/Chart.Annotation.js/blob/master/LICENSE.md
+
+> licensed under the MIT License
+
+### js/clipboard.min.js
+---------------------
+
+* clipboard.js v2.0.8
+* https://clipboardjs.com/
+*
+* Licensed MIT © Zeno Rocha
+
+### sigmajs/conrad.js
+---------------------
+
+* conrad.js is a tiny JavaScript jobs scheduler,
+*
+* Version: 0.1.0
+* Sources: http://github.com/jacomyal/conrad.js
+* Doc: http://github.com/jacomyal/conrad.js#readme
+*
+* License:
+* --------
+* Copyright © 2013 Alexis Jacomy, Sciences-Po médialab
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+*
+* The Software is provided "as is", without warranty of any kind, express or
+implied, including but not limited to the warranties of merchantability,
+fitness for a particular purpose and noninfringement. In no event shall the
+authors or copyright holders be liable for any claim, damages or other
+liability, whether in an action of contract, tort or otherwise, arising
+from, out of or in connection with the software or the use or other dealings
+in the Software.
+
+### js/dataTables.fixedHeader.min.js
+---------------------
+
+* Copyright 2009-2021 SpryMedia Ltd.
+*
+* This source file is free software, available under the following license:
+* MIT license - http://datatables.net/license/mit
+*
+* This source file is distributed in the hope that it will be useful, but
+* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+* or FITNESS FOR A PARTICULAR PURPOSE. See the license files for details.
+*
+* For details please refer to: http://www.datatables.net
+* FixedHeader 3.1.9
+* ©2009-2021 SpryMedia Ltd - datatables.net/license
+
+> licensed under the MIT License
+
+### js/datepicker.js
+---------------------
+
+* jQuery UI Datepicker 1.12.1
+* http://jqueryui.com
+*
+* Copyright jQuery Foundation and other contributors
+* http://jquery.org/license
+
+> licensed under the MIT License
+
+
+### date-range-picker/daterangepicker.js
+
+* copyright: Copyright (c) 2012-2019 Dan Grossman. All rights reserved.
+* license: Licensed under the MIT license.
+* See http://www.opensource.org/licenses/mit-license.php
+* website: http://www.daterangepicker.com/
+
+> licensed under the MIT License
+
+### webfonts/fa-brands-400.svg
+---------------------
+
+* Font Awesome Free 5.9.0 by @fontawesome - https://fontawesome.com
+* License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License)
+* Created by FontForge 20190112 at Tue Jun 4 15:16:44 2019
+* By Robert Madole
+* Copyright (c) Font Awesome
+
+### webfonts/fa-regular-400.svg
+---------------------
+
+* Font Awesome Free 5.9.0 by @fontawesome - https://fontawesome.com
+* License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License)
+* Created by FontForge 20190112 at Tue Jun 4 15:16:44 2019
+* By Robert Madole
+* Copyright (c) Font Awesome
+
+### webfonts/fa-solid-900.svg
+---------------------
+
+* Font Awesome Free 5.9.0 by @fontawesome - https://fontawesome.com
+* License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License)
+* Created by FontForge 20190112 at Tue Jun 4 15:16:44 2019
+* By Robert Madole
+* Copyright (c) Font Awesome
+
+### js/html2canvas.min.js
+---------------------
+
+* html2canvas 1.3.2
+* Copyright (c) 2021 Niklas von Hertzen
+* Released under MIT License
+*
+* Copyright (c) Microsoft Corporation.
+*
+* Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted.
+*
+* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+
+> licensed under the MIT License
+
+### jquery-ui-timepicker-addon/index.html
+---------------------
+
+1. jQuery
+2. jQueryUI (with datepicker and slider wigits)
+3. Timepicker
+
+* Version 1.6.3
+* Last updated on 2016-04-20
+* jQuery Timepicker Addon is currently available for use in all personal or commercial projects under the MIT license.
+
+> licensed under the MIT License
+
+### js/jexcel.js
+---------------------
+
+* Jspreadsheet v4.7.3
+*
+* Website: https://bossanova.uk/jspreadsheet/
+* Description: Create amazing web based spreadsheets.
+*
+* This software is distribute under MIT License
+*
+* Copyright and license
+* Jspreadsheet is released under the MIT license.
+* The software is registered under UK law. Contact contact@jspreadsheet.com
+
+> licensed under the MIT License
+
+### jquery-ui-timepicker-addon/jquery-ui-sliderAccess.js
+---------------------
+
+* jQuery UI Slider Access
+* By: Trent Richardson [http://trentrichardson.com]
+* Version 0.3
+* Last Modified: 10/20/2012
+*
+* Copyright 2011 Trent Richardson
+* Dual licensed under the MIT and GPL licenses.
+* http://trentrichardson.com/Impromptu/GPL-LICENSE.txt
+* http://trentrichardson.com/Impromptu/MIT-LICENSE.txt
+
+> licensed under the MIT License
+
+### jquery-ui-timepicker-addon/i18n/jquery-ui-timepicker-addon-i18n.js
+---------------------
+
+* jQuery Timepicker Addon - v1.6.3 - 2016-04-20
+* http://trentrichardson.com/examples/timepicker
+* Copyright (c) 2016 Trent Richardson; Licensed MIT
+*
+* source: src/i18n/jquery-ui-timepicker-af.js
+* Afrikaans translation for the jQuery Timepicker Addon
+Written by Deon Heyns
+
+> licensed under the MIT License
+
+### jquery-ui-timepicker-addon/i18n/jquery-ui-timepicker-addon-i18n.min.js
+---------------------
+
+* jQuery Timepicker Addon - v1.6.3 - 2016-04-20
+* http://trentrichardson.com/examples/timepicker
+* Copyright (c) 2016 Trent Richardson; Licensed MIT
+
+> licensed under the MIT License
+
+### jquery-ui-timepicker-addon/jquery-ui-timepicker-addon.js
+---------------------
+
+* jQuery Timepicker Addon - v1.6.3 - 2016-04-20
+* http://trentrichardson.com/examples/timepicker
+* Copyright (c) 2016 Trent Richardson; Licensed MIT
+
+> licensed under the MIT License
+
+### jquery-ui-timepicker-addon/jquery-ui-timepicker-addon.min.css
+---------------------
+
+* jQuery Timepicker Addon - v1.6.3 - 2016-04-20
+* http://trentrichardson.com/examples/timepicker
+* Copyright (c) 2016 Trent Richardson; Licensed MIT
+
+> licensed under the MIT License
+
+### jquery-ui-timepicker-addon/jquery-ui-timepicker-addon.min.js
+---------------------
+
+* jQuery Timepicker Addon - v1.6.3 - 2016-04-20
+* http://trentrichardson.com/examples/timepicker
+* Copyright (c) 2016 Trent Richardson; Licensed MIT
+
+> licensed under the MIT License
+
+### custom-jquery/jquery-ui.css
+---------------------
+
+* jQuery UI - v1.12.1 - 2019-07-17
+* http://jqueryui.com
+* Includes: core.css, datepicker.css, theme.css
+* To view and modify this theme, visit http://jqueryui.com/themeroller/?scope=&folderName=custom-theme&bgImgOpacityError=&bgImgOpacityHighlight=&bgImgOpacityActive=&bgImgOpacityHover=&bgImgOpacityDefault=&bgImgOpacityContent=&bgImgOpacityHeader=&cornerRadiusShadow=8px&offsetLeftShadow=0px&offsetTopShadow=0px&thicknessShadow=5px&opacityShadow=30&bgImgOpacityShadow=0&bgTextureShadow=flat&bgColorShadow=%23666666&opacityOverlay=30&bgImgOpacityOverlay=0&bgTextureOverlay=flat&bgColorOverlay=%23aaaaaa&iconColorError=%23cc0000&fcError=%235f3f3f&borderColorError=%23f1a899&bgTextureError=flat&bgColorError=%23fddfdf&iconColorHighlight=%23ffffff&fcHighlight=%23ffffff&borderColorHighlight=%23375a7f&bgTextureHighlight=flat&bgColorHighlight=%23375a7f&iconColorActive=%23ffffff&fcActive=%23ffffff&borderColorActive=%23375a7f&bgTextureActive=flat&bgColorActive=%23375a7f&iconColorHover=%23ffffff&fcHover=%23ffffff&borderColorHover=%23375a7f&bgTextureHover=flat&bgColorHover=%23375a7f&iconColorDefault=%23ffffff&fcDefault=%23ffffff&borderColorDefault=%23303030&bgTextureDefault=flat&bgColorDefault=%23303030&iconColorContent=%23ffffff&fcContent=%23ffffff&borderColorContent=%23222222&bgTextureContent=flat&bgColorContent=%23222222&iconColorHeader=%23ffffff&fcHeader=%23fff&borderColorHeader=%23303030&bgTextureHeader=flat&bgColorHeader=%23303030&cornerRadius=3px&fwDefault=normal&fsDefault=1em&ffDefault=Arial%2CHelvetica%2Csans-serif
+* Copyright jQuery Foundation and other contributors; Licensed MI
+
+### css/jquery-ui.css
+---------------------
+
+* jQuery UI - v1.10.4 - 2014-01-17
+* http://jqueryui.com
+* Includes: jquery.ui.core.css, jquery.ui.accordion.css, jquery.ui.autocomplete.css, jquery.ui.button.css, jquery.ui.datepicker.css, jquery.ui.dialog.css, jquery.ui.menu.css, jquery.ui.progressbar.css, jquery.ui.resizable.css, jquery.ui.selectable.css, jquery.ui.slider.css, jquery.ui.spinner.css, jquery.ui.tabs.css, jquery.ui.tooltip.css, jquery.ui.theme.css
+* To view and modify this theme, visit http://jqueryui.com/themeroller/?ffDefault=Lucida%20Grande%2CLucida%20Sans%2CArial%2Csans-serif&fwDefault=bold&fsDefault=1.1em&cornerRadius=5px&bgColorHeader=5c9ccc&bgTextureHeader=gloss_wave&bgImgOpacityHeader=55&borderColorHeader=4297d7&fcHeader=ffffff&iconColorHeader=d8e7f3&bgColorContent=fcfdfd&bgTextureContent=inset_hard&bgImgOpacityContent=100&borderColorContent=a6c9e2&fcContent=222222&iconColorContent=469bdd&bgColorDefault=dfeffc&bgTextureDefault=glass&bgImgOpacityDefault=85&borderColorDefault=c5dbec&fcDefault=2e6e9e&iconColorDefault=6da8d5&bgColorHover=d0e5f5&bgTextureHover=glass&bgImgOpacityHover=75&borderColorHover=79b7e7&fcHover=1d5987&iconColorHover=217bc0&bgColorActive=f5f8f9&bgTextureActive=inset_hard&bgImgOpacityActive=100&borderColorActive=79b7e7&fcActive=e17009&iconColorActive=f9bd01&bgColorHighlight=fbec88&bgTextureHighlight=flat&bgImgOpacityHighlight=55&borderColorHighlight=fad42e&fcHighlight=363636&iconColorHighlight=2e83ff&bgColorError=fef1ec&bgTextureError=glass&bgImgOpacityError=95&borderColorError=cd0a0a&fcError=cd0a0a&iconColorError=cd0a0a&bgColorOverlay=aaaaaa&bgTextureOverlay=flat&bgImgOpacityOverlay=0&opacityOverlay=30&bgColorShadow=aaaaaa&bgTextureShadow=flat&bgImgOpacityShadow=0&opacityShadow=30&thicknessShadow=8px&offsetTopShadow=-8px&offsetLeftShadow=-8px&cornerRadiusShadow=8px
+* Copyright 2014 jQuery Foundation and other contributors; Licensed MI
+
+### js/jquery-ui.js
+---------------------
+
+* jQuery UI - v1.12.1 - 2019-07-14
+* http://jqueryui.com
+* Includes: widget.js, position.js, data.js, disable-selection.js, focusable.js, form-reset-mixin.js, jquery-1-7.js, keycode.js, labels.js, scroll-parent.js, tabbable.js, unique-id.js, widgets/draggable.js, widgets/droppable.js, widgets/resizable.js, widgets/selectable.js, widgets/sortable.js, widgets/accordion.js, widgets/autocomplete.js, widgets/button.js, widgets/checkboxradio.js, widgets/controlgroup.js, widgets/datepicker.js, widgets/dialog.js, widgets/menu.js, widgets/mouse.js, widgets/progressbar.js, widgets/selectmenu.js, widgets/slider.js, widgets/spinner.js, widgets/tabs.js, widgets/tooltip.js, effect.js, effects/effect-blind.js, effects/effect-bounce.js, effects/effect-clip.js, effects/effect-drop.js, effects/effect-explode.js, effects/effect-fade.js, effects/effect-fold.js, effects/effect-highlight.js, effects/effect-puff.js, effects/effect-pulsate.js, effects/effect-scale.js, effects/effect-shake.js, effects/effect-size.js, effects/effect-slide.js, effects/effect-transfer.js
+* Copyright jQuery Foundation and other contributors; Licensed MIT
+*
+* jQuery UI Widget 1.12.1
+* http://jqueryui.com
+*
+* Copyright jQuery Foundation and other contributors
+* http://jquery.org/license
+
+> licensed under the MIT License
+
+
+### custom-jquery/jquery-ui.js
+---------------------
+
+* jQuery UI - v1.12.1 - 2019-07-17
+* http://jqueryui.com
+* Includes: keycode.js, widgets/datepicker.js
+* Copyright jQuery Foundation and other contributors; Licensed MIT
+*
+* jQuery UI Keycode 1.12.1
+* http://jqueryui.com
+*
+* Copyright jQuery Foundation and other contributors
+* http://jquery.org/license
+
+> licensed under the MIT License
+
+
+### js/jquery-ui.min.js
+---------------------
+
+* jQuery UI - v1.12.0 - 2016-08-17
+* http://jqueryui.com
+* Includes: widget.js, data.js, scroll-parent.js, widgets/sortable.js, widgets/mouse.js
+* Copyright jQuery Foundation and other contributors; Licensed MIT
+
+> licensed under the MIT License
+
+### js-datatables/images/Sorting icons.psd
+
+* Sorting icons.psd
+* Copyright (c) 1998 Hewlett-Packard Company
+
+### js-datatables/lib/jquery.dataTables.min.js
+---------------------
+
+* Copyright 2008-2019 SpryMedia Ltd.
+*
+* This source file is free software, available under the following license:
+* MIT license - http://datatables.net/license
+*
+* This source file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY;
+* without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the license files for details.
+*
+* For details please refer to: http://www.datatables.net
+* DataTables 1.10.20
+* ©2008-2019 SpryMedia Ltd - datatables.net/license
+
+> licensed under the MIT License
+
+### js/jquery.dataTables.min.js
+---------------------
+
+* Copyright 2008-2020 SpryMedia Ltd.
+*
+* This source file is free software, available under the following license:
+* MIT license - http://datatables.net/license
+*
+* This source file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY;
+* without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the license files for details.
+*
+* For details please refer to: http://www.datatables.net
+* DataTables 1.10.21
+* ©2008-2020 SpryMedia Ltd - datatables.net/license
+
+> licensed under the MIT License
+
+### js/jquery.floatThead.js
+---------------------
+
+* @preserve jQuery.floatThead 2.0.3 - http://mkoryak.github.io/floatThead/
+* Copyright (c) 2012 - 2017 Misha Koryak
+* @license MIT
+*
+* @author Misha Koryak
+* @projectDescription lock a table header in place while scrolling - without breaking styles or events bound to the header
+*
+* Dependencies:
+* jquery 1.9.0 + [required] OR jquery 1.7.0 + jquery UI core
+*
+* http://mkoryak.github.io/floatThead/
+
+> licensed under the MIT License
+
+### js/jquery.js
+---------------------
+
+* jQuery JavaScript Library v3.4.1
+* https://jquery.com/
+*
+* Includes Sizzle.js
+* https://sizzlejs.com/
+*
+* Copyright JS Foundation and other contributors
+* https://jquery.org/license
+*
+* Date: 2019-05-01T21:04Z
+
+> licensed under the MIT License
+
+### js/jquery.ui.datepicker-ja.min.js
+---------------------
+
+* jQuery UI - v1.10.4 - 2014-01-17
+* http://jqueryui.com
+* Copyright 2014 jQuery Foundation and other contributors; Licensed MIT
+
+> licensed under the MIT License
+
+### css/jsuites.css
+---------------------
+
+* (c) jSuites Javascript Web Components
+*
+* Website: https://jsuites.net
+* Description: Create amazing web based applications.
+*
+* MIT License
+*
+* Copyright and license
+* This software is distributed as MIT. Contact: contact@jsuites.net
+
+> licensed under the MIT License
+
+### js/jsuites.js
+---------------------
+
+* (c) jSuites Javascript Web Components
+*
+* Website: https://jsuites.net
+* Description: Create amazing web based applications.
+*
+* MIT License
+*
+* Copyright and license
+* This software is distributed as MIT. Contact: contact@jsuites.net
+
+> licensed under the MIT License
+
+### js/pagination.min.js
+
+* Copyright 2014-2100, superRaytin
+
+> licensed under the MIT License
+
+### js/plotly.min.js
+
+* Copyright 2012-2022, Plotly, Inc. All rights reserved. Licensed under the MIT license
+*
+* (c) Kyle Simpson MIT License: http://getify.mit-license.org
+*
+* (c) Sindre Sorhus license MIT
+*
+* copyright 2016 Sean Connelly (@voidqk), http://syntheti.cc license MIT preserve Project
+* Home: https://github.com/voidqk/polybooljs
+*
+* copyright OpenStreetMap
+* https://www.openstreetmap.org/copyright
+*
+* Copyright (c) 2014-2015, Jon Schlinkert. Licensed under the MIT License.
+
+> licensed under the MIT License
+
+### js/plotly-latest.min.js
+
+* plotly.js v1.58.4
+* Copyright 2012-2020, Plotly, Inc.
+* All rights reserved.
+* Licensed under the MIT license
+*
+* @author Feross Aboukhadijeh
+* @license MIT
+
+> licensed under the MIT License
+
+### popper/umd/popper-utils.js
+
+* @fileOverview Kickass library to create and place poppers near their reference elements.
+* @version 1.14.7
+* @license
+* Copyright (c) 2016 Federico Zivolo and contributors
+
+> licensed under the MIT License
+
+### popper/umd/popper-utils.min.js
+---------------------
+
+* Copyright (C) Federico Zivolo 2019
+* Distributed under the MIT License (license terms are at http://opensource.org/licenses/MIT).
+
+> licensed under the MIT License
+
+### popper/umd/popper.js
+---------------------
+
+* @fileOverview Kickass library to create and place poppers near their reference elements.
+* @version 1.14.7
+* @license
+* Copyright (c) 2016 Federico Zivolo and contributors
+
+> licensed under the MIT License
+
+### popper/umd/popper.min.js
+---------------------
+
+* Copyright (C) Federico Zivolo 2019
+* Distributed under the MIT License (license terms are at http://opensource.org/licenses/MIT).
+
+> licensed under the MIT License
+
+### sigmajs/build/sigma.min.js
+---------------------
+
+* sigma.js - A JavaScript library dedicated to graph drawing. - Version: 1.2.1
+* Author: Alexis Jacomy, Sciences-Po Médialab - License: MIT
+
+> licensed under the MIT License
+
+### sigmajs/utils/sigma.polyfills.js
+---------------------
+
+* http://paulirish.com/2011/requestanimationframe-for-smart-animating/
+* http://my.opera.com/emoller/blog/2011/12/20/requestanimationframe-for-smart-er-animating
+* requestAnimationFrame polyfill by Erik Möller.
+* fixes from Paul Irish and Tino Zijdel
+* MIT license
+
+> licensed under the MIT License
+
+### sigmajs/build/sigma.require.js
+---------------------
+
+* conrad.js is a tiny JavaScript jobs scheduler,
+*
+* Version: 0.1.0
+* Sources: http://github.com/jacomyal/conrad.js
+* Doc: http://github.com/jacomyal/conrad.js#readme
+*
+* License:
+* --------
+* Copyright © 2013 Alexis Jacomy, Sciences-Po médialab
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+*
+* The Software is provided "as is", without warranty of any kind, express or
+implied, including but not limited to the warranties of merchantability,
+fitness for a particular purpose and noninfringement. In no event shall the
+authors or copyright holders be liable for any claim, damages or other
+liability, whether in an action of contract, tort or otherwise, arising
+from, out of or in connection with the software or the use or other dealings
+in the Software.
+*
+* http://paulirish.com/2011/requestanimationframe-for-smart-animating/
+* http://my.opera.com/emoller/blog/2011/12/20/requestanimationframe-for-smart-er-animating
+* requestAnimationFrame polyfill by Erik Möller.
+* fixes from Paul Irish and Tino Zijdel
+* MIT license
+
+> licensed under the MIT License
+
+### js/socket.io.js
+---------------------
+
+* Socket.IO v2.2.0
+* (c) 2014-2018 Guillermo Rauch
+
+> licensed under the MIT License
+
+### vis/vis-network.min.js
+---------------------
+
+* vis.js
+* https://github.com/almende/vis
+*
+* A dynamic, browser-based visualization library.
+*
+* @version 4.19.0
+* @date 2017-03-18
+*
+* @license
+* Copyright (C) 2011-2017 Almende B.V, http://almende.com
+*
+* Vis.js is dual licensed under both
+*
+* * The Apache 2.0 License
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* and
+*
+* * The MIT License
+* http://opensource.org/licenses/MIT
+*
+* Vis.js may be distributed under either license.
+
+> licensed under the MIT License
+
+## OFL-1.1
+
+### css/all.min.css
+---------------------
+
+* Font Awesome Free 5.9.0 by @fontawesome - https://fontawesome.com
+* License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License)
+
+### js/all.min.js
+---------------------
+
+* Font Awesome Free 5.9.0 by @fontawesome - https://fontawesome.com
+* License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License)
+
+> licensed under the MIT License
+
+### webfonts/fa-brands-400.svg
+---------------------
+
+* Font Awesome Free 5.9.0 by @fontawesome - https://fontawesome.com
+* License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License)
+*
+* Created by FontForge 20190112 at Tue Jun 4 15:16:44 2019
+* By Robert Madole
+* Copyright (c) Font Awesome
+
+### webfonts/fa-regular-400.svg
+---------------------
+
+* Font Awesome Free 5.9.0 by @fontawesome - https://fontawesome.com
+* License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License)
+*
+* Created by FontForge 20190112 at Tue Jun 4 15:16:44 2019
+* By Robert Madole
+* Copyright (c) Font Awesome
+
+### webfonts/fa-solid-900.svg
+---------------------
+
+* Font Awesome Free 5.9.0 by @fontawesome - https://fontawesome.com
+* License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License)
+*
+* Created by FontForge 20190112 at Tue Jun 4 15:16:44 2019
+* By Robert Madole
+* Copyright (c) Font Awesome
+
+### webfonts/fa-brands-400.ttf
+
+* Font Awesome Free 5.9.0 by @fontawesome - https://fontawesome.com
+* License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License)
+* Copyright (c) Font AwesomeFont Awesome 5 Brands RegularFont
+
+### webfonts/fa-regular-400.ttf
+
+* Font Awesome Free 5.9.0 by @fontawesome - https://fontawesome.com
+* License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License)
+* Copyright (c) Font AwesomeFont Awesome 5 Brands RegularFont
+
+### webfonts/fa-solid-900.ttf
+
+* Font Awesome Free 5.9.0 by @fontawesome - https://fontawesome.com
+* License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License)
+* Copyright (c) Font AwesomeFont Awesome 5 Brands RegularFont
+
+## See-URL
+
+### js/dataTables.bootstrap4.min.js
+---------------------
+
+* DataTables Bootstrap 4 integration
+* ©2011-2017 SpryMedia Ltd - datatables.net/license
+*
+* DataTables
+* DataTables 1.10 and newer is available under the terms MIT license. This places almost no
+ restrictions on what you can do with DataTables, and you are free to use it in any way (including commercial projects), as long as the copyright header is left intact. Please see the MIT license page for the complete license.
+* DataTables 1.9 and earlier
+* DataTables 1.9 (and previous versions) were made available under both the GPL v2 license and the BSD (3-point) style license. These licenses still apply to those software releases, but newer versions use the MIT license (see above).
+
+> licensed under the MIT License
+
+### js/jquery.min.js
+---------------------
+
+* jQuery v1.9.1 | (c) 2005, 2012 jQuery Foundation, Inc. | jquery.org/license
+* @ sourceMappingURL=jquery.min.map
+
+### js/lodash.min.js
+---------------------
+
+* @license
+* Lodash lodash.com/license | Underscore.js 1.8.3 underscorejs.org/LICENSE
+* Copyright (c) 2009-2021 Jeremy Ashkenas, Julian Gonggrijp, and DocumentCloud and Investigative Reporters & Editors
+
+> licensed under the MIT License
+
+### js/moment-with-locales.js
+---------------------
+
+* Copyright (c) JS Foundation and other contributors
+
+> licensed under the MIT License
+
+## Dual-license
+
+### jquery-ui-timepicker-addon/jquery-ui-sliderAccess.js
+---------------------
+
+* jQuery UI Slider Access
+* By: Trent Richardson [http://trentrichardson.com]
+* Version 0.3
+* Last Modified: 10/20/2012
+*
+* Copyright 2011 Trent Richardson
+* Dual licensed under the MIT and GPL licenses.
+* http://trentrichardson.com/Impromptu/GPL-LICENSE.txt
+* http://trentrichardson.com/Impromptu/MIT-LICENSE.txt
+
+> licensed under the MIT License
+
+### vis/vis-network.min.js
+---------------------
+
+* vis.js
+* https://github.com/almende/vis
+*
+* A dynamic, browser-based visualization library.
+*
+* @version 4.19.0
+* @date 2017-03-18
+*
+* @license
+* Copyright (C) 2011-2017 Almende B.V, http://almende.com
+*
+* Vis.js is dual licensed under both
+*
+* * The Apache 2.0 License
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* and
+*
+* * The MIT License
+* http://opensource.org/licenses/MIT
+*
+* Vis.js may be distributed under either license.
+
+> licensed under the MIT License
+
+## Apache-2.0
+
+### js/gtag.js
+---------------------
+
+* Copyright 2012 Google Inc. All rights reserved.
+
+> licensed under the Apache-2.0 License
+
+### vis/vis-network.min.js
+---------------------
+
+* vis.js
+* https://github.com/almende/vis
+*
+* A dynamic, browser-based visualization library.
+*
+* @version 4.19.0
+* @date 2017-03-18
+*
+* @license
+* Copyright (C) 2011-2017 Almende B.V, http://almende.com
+*
+* Vis.js is dual licensed under both
+*
+* * The Apache 2.0 License
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* and
+*
+* * The MIT License
+* http://opensource.org/licenses/MIT
+*
+* Vis.js may be distributed under either license.
+
+> licensed under the MIT License
+
+## Public-domain-ref
+
+### sigmajs/utils/sigma.polyfills.js
+---------------------
+
+* http://paulirish.com/2011/requestanimationframe-for-smart-animating/
+* http://my.opera.com/emoller/blog/2011/12/20/requestanimationframe-for-smart-er-animating
+* requestAnimationFrame polyfill by Erik Möller.
+* fixes from Paul Irish and Tino Zijdel
+* MIT license
+
+> licensed under the MIT License
+
+## CC0-1.0
+
+### js/sizeof.compressed.js
+---------------------
+
+* sizeof.js
+*
+* A function to calculate the approximate memory usage of objects
+*
+* Created by Kate Morley - http://code.iamkate.com/ - and released under the terms
+* of the CC0 1.0 Universal legal code:
+*
+* http://creativecommons.org/publicdomain/zero/1.0/legalcode
+
+## 0BSD
+
+### js/html2canvas.min.js
+---------------------
+
+* html2canvas 1.3.2
+* Copyright (c) 2021 Niklas von Hertzen
+* Released under MIT License
+*
+* Copyright (c) Microsoft Corporation.
+*
+* Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted.
+*
+* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+
+> licensed under the MIT License
+
+## Python Related
+
+### APScheduler
+
+* Version: 3.9.1
+* Copyright (c) 2009 Alex Grönholm
+
+> licensed under the MIT License
+
+### Babel
+
+* Version: 2.10.1
+* https://babel.pocoo.org/en/latest/license.html#babel-license
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### Brotli
+
+* Version: 1.0.9
+* Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors.
+
+> licensed under the MIT License
+
+### Flask
+
+* Version: 1.1.1
+* https://flask.palletsprojects.com/en/1.1.x/license/
+* This license applies to all files in the Flask repository and source distribution. This includes Flask’s source code, the examples, and tests, as well as the documentation.
+* Copyright 2010 Pallets
+*
+* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+### Flask-APScheduler
+
+* Version: 1.11.0
+* Copyright 2015 Vinicius Chiele
+* http://www.apache.org/licenses/LICENSE-2.0
+
+> licensed under the Apache License Version 2.0
+
+### Flask-Babel
+
+* Version: 1.0.0
+* Copyright (c) 2010 by Armin Ronacher.
+* Some rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+* * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+* * The names of the contributors may not be used to endorse or promote products derived from this software without specific prior written permission.
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+### Flask-Compress
+
+* Version: 1.10.1
+* Copyright (c) 2013-2017 William Fagan
+
+> licensed under the MIT License
+
+### Flask-Migrate
+
+* Version: 2.5.2
+* Copyright (c) 2013 Miguel Grinberg
+
+> licensed under the MIT License
+
+### Flask-SQLAlchemy
+
+* Version: 2.4.1
+* Copyright 2010 Pallets
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### Flask-WTF
+
+* Version: 0.14.3
+* Copyright 2010 WTForms
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### Jinja2
+
+* Version: 2.11.1
+* Copyright 2007 Pallets
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### Mako
+
+* Version: 1.2.0
+* Copyright 2006-2020 the Mako authors and contributors \.
+
+> licensed under the MIT License
+
+### MarkupSafe
+
+* Version: 1.1.1
+* Copyright 2010 Pallets
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### PyMySQL
+
+* Version: 0.9.3
+* Copyright (c) 2010, 2013 PyMySQL contributors
+
+> licensed under the MIT License
+
+### SQLAlchemy
+
+* Version: 1.3.24
+* Copyright 2005-2021 SQLAlchemy authors and contributors \.
+
+> licensed under the MIT License
+
+### WTForms
+
+* Version: 2.3.3
+* Copyright 2008 WTForms
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### WTForms-JSON
+
+* Version: 0.3.5
+* Copyright (c) 2012-2014, Konsta Vesterinen
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+* * The names of the contributors may not be used to endorse or promote products derived from this software without specific prior written permission.
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+### Werkzeug
+
+* Version: 1.0.0
+* Copyright 2007 Pallets
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### alembic
+
+* Version: 1.4.3
+* TM & © 2009-2015 Lucasfilm Entertainment Company Ltd. or Lucasfilm Ltd.
+* All rights reserved.
+*
+* Industrial Light & Magic, ILM and the Bulb and Gear design logo are all
+registered trademarks or service marks of Lucasfilm Ltd.
+*
+* © 2009-2015 Sony Pictures Imageworks Inc. All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+* * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+* * Neither the name of Industrial Light & Magic nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+* ------------------------------------------------------------------------
+* ALEMBIC ATTACHMENT A —
+* REQUIRED NOTICES FOR DISTRIBUTION
+*
+* The Alembic Software is distributed along with certain third party
+components licensed under various open source software licenses ("Open
+Source Components"). In addition to the warranty disclaimers contained
+in the open source licenses found below, Industrial Light & Magic, a
+division of Lucasfilm Entertainment Company Ltd. ("ILM") makes the
+following disclaimers regarding the Open Source Components on behalf of
+itself, the copyright holders, contributors, and licensors of such Ope
+* Source Components:
+* TO THE FULLEST EXTENT PERMITTED UNDER APPLICABLE LAW, THE OPEN SOURCE
+COMPONENTS ARE PROVIDED BY THE COPYRIGHT HOLDERS, CONTRIBUTORS,
+LICENSORS, AND ILM "AS IS" AND ANY REPRESENTATIONS OR WARRANTIES OF ANY
+KIND, WHETHER ORAL OR WRITTEN, WHETHER EXPRESS, IMPLIED, OR ARISING BY
+STATUTE, CUSTOM, COURSE OF DEALING, OR TRADE USAGE, INCLUDING WITHOUT
+LIMITATION THE IMPLIED WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR
+A PARTICULAR PURPOSE, AND NON-INFRINGEMENT, ARE DISCLAIMED. IN NO EVENT
+WILL THE COPYRIGHT OWNER, CONTRIBUTORS, LICENSORS, OR ILM AND/OR ITS
+AFFILIATES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION), HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THE OPEN
+SOURCE COMPONENTS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Boost C++ Libraries
+* ------------------------------------------------------------------------
+* Boost Software License – Version 1.0 August 17th, 2003 Permission is
+hereby granted, free of charge, to any person or organization obtaining
+a copy of the software and accompanying documentation covered by this
+license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of
+the Software, and to permit third-parties to whom the Software i
+furnished to do so, all subject to the following:
+*
+* The copyright notices in the Software and this entire statement,
+including the above license grant, this restriction and the following
+disclaimer, must be included in all copies of the Software, in whole or
+in part, and all derivative works of the Software, unless such copies or
+derivative works are solely in the form of machine-executable object
+code generated by a source language processor.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES O
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
+NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE
+DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY,
+WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN TH
+SOFTWARE.
+
+### backports.zoneinfo
+
+* Version: 0.2.1
+* Copyright (c) 2020, Paul Ganssle (Google)
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+
+> licensed under the Apache License Version 2.0
+
+### cffi
+
+* Version: 1.15.0
+* Copyright (C) 2005-2007, James Bielman
+
+> licensed under the MIT License
+
+### chardet
+
+* Version: 3.0.4
+* Copyright (C) 2006, 2007, 2008 Mark Pilgrim
+*
+* This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+02110-1301 USA
+*
+* -----------------------------------------------------------
+* The Original Code is mozilla.org code.
+*
+* The Initial Developer of the Original Code is
+Netscape Communications Corporation.
+Portions created by the Initial Developer are Copyright (C) 1998, 2005
+the Initial Developer. All Rights Reserved.
+*
+* Contributor(s):
+* Mark Pilgrim - port to Python
+*
+* This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+02110-1301 USA
+*
+* -----------------------------------------------------------
+* The Universal Encoding Detector documentation is copyright (C) 2006-2009 Mark Pilgrim.
+* All rights reserved.
+*
+* Redistribution and use in source (XML DocBook) and "compiled" forms (SGML,
+HTML, PDF, PostScript, RTF and so forth) with or without modification, are
+permitted provided that the following conditions are met: Redistributions of
+source code (XML DocBook) must retain the above copyright notice, this list of
+conditions and the following disclaimer unmodified. Redistributions in
+compiled form (transformed to other DTDs, converted to PDF, PostScript, RTF and
+other formats) must reproduce the above copyright notice, this list of
+conditions and the following disclaimer in the documentation and/or other
+materials provided with the distribution.
+*
+* THIS DOCUMENTATION IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+IN ANY WAY OUT OF THE USE OF THIS DOCUMENTATION, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+### click
+
+* Version: 8.1.3
+* Copyright 2014 Pallets
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### colorama
+
+* Version: 0.4.4
+* Copyright (c) 2010 Jonathan Hartley
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+* * Neither the name of the copyright holders, nor those of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### cryptography
+
+* Version: 2.8
+* Copyright and licensing of the Python Cryptography Toolkit ("PyCrypto"):
+*
+* Previously, the copyright and/or licensing status of the Python
+Cryptography Toolkit ("PyCrypto") had been somewhat ambiguous. The
+original intention of Andrew M. Kuchling and other contributors has
+been to dedicate PyCrypto to the public domain, but that intention was
+not necessarily made clear in the original disclaimer (see
+LEGAL/copy/LICENSE.orig).
+*
+* Additionally, some files within PyCrypto had specified their own
+licenses that differed from the PyCrypto license itself. For example,
+the original RIPEMD.c module simply had a copyright statement and
+warranty disclaimer, without clearly specifying any license terms.
+(An updated version on the author's website came with a license that
+contained a GPL-incompatible advertising clause.)
+*
+* To rectify this situation for PyCrypto 2.1, the following steps have
+been taken:
+* 1. Obtaining explicit permission from the original contributors to dedicate their contributions to the public domain if they have not already done so. (See the "LEGAL/copy/stmts" directory for contributors' statements.)
+* 2. Replacing some modules with clearly-licensed code from other sources (e.g. the DES and DES3 modules were replaced with new ones
+ based on Tom St. Denis's public-domain LibTomCrypt library.)
+* 3. Replacing some modules with code written from scratch (e.g. the RIPEMD and Blowfish modules were re-implemented from their respective algorithm specifications without reference to the old implementations).
+* 4. Removing some modules altogether without replacing them.
+*
+* To the best of our knowledge, with the exceptions noted below or
+within the files themselves, the files that constitute PyCrypto are in
+the public domain. Most are distributed with the following notice:
+*
+* The contents of this file are dedicated to the public domain. To
+the extent that dedication to the public domain is not available,
+everyone is granted a worldwide, perpetual, royalty-free,
+non-exclusive license to exercise all rights associated with the
+contents of this file for any purpose whatsoever.
+No rights are reserved.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*
+* Exceptions:
+*
+* - Portions of HMAC.py and setup.py are derived from Python 2.2, and are therefore Copyright (c) 2001, 2002, 2003 Python Software Foundation (All Rights Reserved). They are licensed by the PSF under the terms of the Python 2.2 license. (See the file LEGAL/copy/LICENSE.python-2.2 for details.)
+* - The various GNU autotools (autoconf, automake, aclocal, etc.) are used during the build process. This includes macros from autoconf-archive, which are located in the m4/ directory. As is customary, some files from the GNU autotools are included in the source tree (in the root directory, and in the build-aux/directory). These files are merely part of the build process, and are not included in binary builds of the software.
+*
+* EXPORT RESTRICTIONS:
+*
+* Note that the export or re-export of cryptographic software and/or
+source code may be subject to regulation in your jurisdiction.
+
+### cutlet
+
+* Version: 0.1.19
+* Copyright (c) 2020 Paul O'Leary McCann
+
+> licensed under the MIT License
+
+### cx-Oracle
+
+* Version: 7.3.0
+* LICENSE AGREEMENT FOR CX_ORACLE
+*
+* Copyright 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+*
+* Portions Copyright 2007-2015, Anthony Tuininga. All rights reserved.
+*
+* Portions Copyright 2001-2007, Computronix (Canada) Ltd., Edmonton, Alberta,
+Canada. All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the disclaimer that follows.
+* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution.
+* 3. Neither the names of the copyright holders nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+* DISCLAIMER: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+*AS IS* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* Computronix is a registered trademark of Computronix (Canada) Ltd.
+
+### detect-delimiter
+
+* Version: 0.1.1
+* Copyright (c) 2015 Tim Ojo
+
+> licensed under the MIT License
+
+### flask-marshmallow
+
+* Version: 0.14.0
+* Copyright 2014-2020 Steven Loria and contributors
+
+> licensed under the MIT License
+
+### fugashi
+
+* Version: 1.1.2
+* Copyright (c) 2019 Paul O'Leary McCann
+
+> licensed under the MIT License
+
+### group-lasso
+
+* Version: 1.5.0
+* Copyright (c) 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+* http://www.apache.org/licenses/LICENSE-2.0
+* Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+limitations under the License.
+
+> licensed under the Apache License Version 2.0
+
+### importlib-metadata
+
+* Version: 4.11.4
+* Copyright (c) 2015-2019, conda-forge
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### itsdangerous
+
+* Version: 1.1.0
+* Copyright 2011 Pallets
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### jaconv
+
+* Version: 0.2.4
+* Copyright (c) 2014 Yukino Ikegami
+
+> licensed under the MIT License
+
+### joblib
+
+* Version: 1.1.0
+* Copyright (c) 2008-2021, The joblib developers.
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are met:
+* * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+* * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### loguru
+
+* Version: 0.5.1
+* Copyright (c) 2017
+
+> licensed under the MIT License
+
+### markdown2
+
+* Version: 2.3.10
+* This implementation of Markdown is licensed under the MIT License:
+*
+* The MIT License
+*
+* Copyright (c) 2012 Trent Mick
+* Copyright (c) 2010 ActiveState Software Inc.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to permit
+persons to whom the Software is furnished to do so, subject to the
+following conditions:
+*
+* The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
+NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+*
+*
+* All files in a *source package* of markdown2 (i.e. those available on
+pypi.python.org and the Google Code project "downloads" page) are under the
+MIT license. However, in the *subversion repository* there are some files
+(used for performance and testing purposes) that are under different licenses
+as follows:
+* - perf/recipes.pprint
+* - - Python License. This file includes a number of real-world examples of Markdown from the ActiveState Python Cookbook, used for doing some performance testing of markdown2.py.
+* - test/php-markdown-cases/...
+* - test/php-markdown-extra-cases/...
+* - - GPL. These are from the MDTest package announced here:
+* - - http://six.pairlist.net/pipermail/markdown-discuss/2007-July/000674.html
+* - test/markdown.py
+* - - GPL 2 or BSD. A copy (currently old) of Python-Markdown -- the other Python Markdown implementation.
+* - test/markdown.php
+* - - BSD-style. This is PHP Markdown (http://michelf.com/projects/php-markdown/).
+* - test/Markdown.pl: BSD-style
+* - - A copy of Perl Markdown (http://daringfireball.net/projects/markdown/).
+
+> licensed under the MIT License
+
+### marshmallow
+
+* Version: 3.9.1
+* Copyright 2021 Steven Loria and contributors
+
+> licensed under the MIT License
+
+### marshmallow-sqlalchemy
+
+* Version: 0.28.0
+* Copyright 2015-2022 Steven Loria and contributors
+
+> licensed under the MIT License
+
+### mecab-python3
+
+* Version: 1.0.5
+* MeCab is copyrighted free software by Taku Kudo and
+Nippon Telegraph and Telephone Corporation, and is released under
+any of the GPL (see the file GPL), the LGPL (see the file LGPL), or the
+BSD License (see the file BSD).
+
+### mojimoji
+
+* Version: 0.0.12
+* Copyright 2013 Studio Ousia
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+* http://www.apache.org/licenses/LICENSE-2.0
+* Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+limitations under the License.
+
+> licensed under the Apache License Version 2.0
+
+### numpy
+
+* Version: 1.19.3
+* Copyright (c) 2005-2022, NumPy Developers.
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+* * Redistributions of source code must retain the above copyrightnotice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+* * Neither the name of the NumPy Developers nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### pandas
+
+* Version: 1.2.5
+* BSD 3-Clause License
+*
+* Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
+* All rights reserved.
+*
+* Copyright (c) 2011-2022, Open source contributors.
+*
+* Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+* * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### psycopg2
+
+* Version: 2.8.4
+* psycopg2 and the LGPL
+* ---------------------
+* psycopg2 is free software: you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+*
+* psycopg2 is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+*
+* In addition, as a special exception, the copyright holders give
+permission to link this program with the OpenSSL library (or with
+modified versions of OpenSSL that use the same license as OpenSSL),
+and distribute linked combinations including the two.
+*
+* You must obey the GNU Lesser General Public License in all respects for
+all of the code used other than OpenSSL. If you modify file(s) with this
+exception, you may extend this exception to your version of the file(s),
+but you are not obligated to do so. If you do not wish to do so, delete
+this exception statement from your version. If you delete this exception
+statement from all source files in the program, then also delete it here.
+*
+* You should have received a copy of the GNU Lesser General Public License
+along with psycopg2 (see the doc/ directory.)
+* If not, see .
+*
+*
+* Alternative licenses
+* --------------------
+* The following BSD-like license applies (at your option) to the files following
+the pattern ``psycopg/adapter*.{h,c}`` and ``psycopg/microprotocol*.{h,c}``:
+*
+* Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+* 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+* 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+* 3. This notice may not be removed or altered from any source distribution.
+
+### pycparser
+
+* Version: 2.21
+* pycparser -- A C parser in Python
+*
+* Copyright (c) 2008-2020, Eli Bendersky
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+* * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+* * Neither the name of Eli Bendersky nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+### pymssql
+
+* Version: 2.1.4
+* Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+> licensed under the GNU Lesser General Public License v2.1
+
+### python-dateutil
+
+* Version: 2.8.2
+* Copyright 2017- Paul Ganssle
+* Copyright 2017- dateutil contributors (see AUTHORS file)
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+* http://www.apache.org/licenses/LICENSE-2.0
+* Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+limitations under the License.
+*
+* The above license applies to all contributions after 2017-12-01, as well as
+all contributions that have been re-licensed (see AUTHORS file for the list of
+contributors who have re-licensed their code).
+* --------------------------------------------------------------------------------
+* dateutil - Extensions to the standard Python datetime module.
+*
+* Copyright (c) 2003-2011 - Gustavo Niemeyer
+* Copyright (c) 2012-2014 - Tomi Pieviläinen
+* Copyright (c) 2014-2016 - Yaron de Leeuw
+* Copyright (c) 2015- - Paul Ganssle
+* Copyright (c) 2015- - dateutil contributors (see AUTHORS file)
+*
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+* * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* The above BSD License Applies to all code, even that also covered by Apache 2.0.
+
+### python-editor
+
+* Version: 1.0.4
+* Copyright 2020 University of Helsinki
+
+> licensed under the Apache License Version 2.0
+
+### pytz
+
+* Version: 2021.3
+* Copyright (c) 2003-2005 Stuart Bishop
+
+> licensed under the MIT License
+
+### pytz-deprecation-shim
+
+* Version: 0.1.0.post0
+* Apache Software License 2.0
+*
+* Copyright (c) 2020, Paul Ganssle (Google)
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+limitations under the License.
+
+> licensed under the Apache License Version 2.0
+
+### ruamel.yaml
+
+* Version: 0.16.5
+* Copyright (c) 2014-2021 Anthon van der Neut, Ruamel bvba
+
+> licensed under the MIT License
+
+### ruamel.yaml.clib
+
+* Version: 0.2.6
+* Copyright (c) 2019-2021 Anthon van der Neut, Ruamel bvba
+
+> licensed under the MIT License
+
+### scikit-learn
+
+* Version: 0.24.2
+* BSD 3-Clause License
+*
+* Copyright (c) 2007-2021 The scikit-learn developers.
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+* * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### scipy
+
+* Version: 1.4.1
+* Copyright (c) 2001-2002 Enthought, Inc. 2003-2022, SciPy Developers.
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### simplejson
+
+* Version: 3.17.6
+* simplejson is dual-licensed software. It is available under the terms
+of the MIT license, or the Academic Free License version 2.1. The full
+text of each license agreement is included below. This code is also
+licensed to the Python Software Foundation (PSF) under a Contributor
+Agreement.
+*
+* MIT License
+* ===========
+*
+* Copyright (c) 2006 Bob Ippolito
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*
+* Academic Free License v. 2.1
+* ============================
+* Copyright (c) 2006 Bob Ippolito. All rights reserved.
+*
+* This Academic Free License (the "License") applies to any original work of authorship (the "Original Work") whose owner (the "Licensor") has placed the following notice immediately following the copyright notice for the Original Work:
+*
+* Licensed under the Academic Free License version 2.1
+*
+* 1) Grant of Copyright License. Licensor hereby grants You a world-wide, royalty-free, non-exclusive, perpetual, sublicenseable license to do the following:
+* * * a) to reproduce the Original Work in copies;
+* * * b) to prepare derivative works ("Derivative Works") based upon the Original Work;
+* * * c) to distribute copies of the Original Work and Derivative Works to the public;
+* * * d) to perform the Original Work publicly; and
+* * * e) to display the Original Work publicly.
+* 2) Grant of Patent License. Licensor hereby grants You a world-wide, royalty-free, non-exclusive, perpetual, sublicenseable license, under patent claims owned or controlled by the Licensor that are embodied in the Original Work as furnished by the Licensor, to make, use, sell and offer for sale the Original Work and Derivative Works.
+* 3) Grant of Source Code License. The term "Source Code" means the preferred form of the Original Work for making modifications to it and all available documentation describing how to modify the Original Work. Licensor hereby agrees to provide a machine-readable copy of the Source Code of the Original Work along with each copy of the Original Work that Licensor distributes. Licensor reserves the right to satisfy this obligation by placing a machine-readable copy of the Source Code in an information repository reasonably calculated to permit inexpensive and convenient access by You for as long as Licensor continues to distribute the Original Work, and by publishing the address of that information repository in a notice immediately following the copyright notice that applies to the Original Work.
+* 4) Exclusions From License Grant. Neither the names of Licensor, nor the names of any contributors to the Original Work, nor any of their trademarks or service marks, may be used to endorse or promote products derived from this Original Work without express prior written permission of the Licensor. Nothing in this License shall be deemed to grant any rights to trademarks, copyrights, patents, trade secrets or any other intellectual property of Licensor except as expressly stated herein. No patent license is granted to make, use, sell or offer to sell embodiments of any patent claims other than the licensed claims defined in Section 2. No right is granted to the trademarks of Licensor even if such marks are included in the Original Work. Nothing in this License shall be interpreted to prohibit Licensor from licensing under different terms from this License any Original Work that Licensor otherwise would have a right to license.
+* 5) This section intentionally omitted.
+* 6) Attribution Rights. You must retain, in the Source Code of any Derivative Works that You create, all copyright, patent or trademark notices from the Source Code of the Original Work, as well as any notices of licensing and any descriptive text identified therein as an "Attribution Notice." You must cause the Source Code for any Derivative Works that You create to carry a prominent Attribution Notice reasonably calculated to inform recipients that You have modified the Original Work.
+* 7) Warranty of Provenance and Disclaimer of Warranty. Licensor warrants that the copyright in and to the Original Work and the patent rights granted herein by Licensor are owned by the Licensor or are sublicensed to You under the terms of this License with the permission of the contributor(s) of those copyrights and patent rights. Except as expressly stated in the immediately proceeding sentence, the Original Work is provided under this License on an "AS IS" BASIS and WITHOUT WARRANTY, either express or implied, including, without limitation, the warranties of NON-INFRINGEMENT, MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY OF THE ORIGINAL WORK IS WITH YOU. This DISCLAIMER OF WARRANTY constitutes an essential part of this License. No license to Original Work is granted hereunder except under this disclaimer.
+* 8) Limitation of Liability. Under no circumstances and under no legal theory, whether in tort (including negligence), contract, or otherwise, shall the Licensor be liable to any person for any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or the use of the Original Work including, without limitation, damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses. This limitation of liability shall not apply to liability for death or personal injury resulting from Licensor's negligence to the extent applicable law prohibits such limitation. Some jurisdictions do not allow the exclusion or limitation of incidental or consequential damages, so this exclusion and limitation may not apply to You.
+* 9) Acceptance and Termination. If You distribute copies of the Original Work or a Derivative Work, You must make a reasonable effort under the circumstances to obtain the express assent of recipients to the terms of this License. Nothing else but this License (or another written agreement between Licensor and You) grants You permission to create Derivative Works based upon the Original Work or to exercise any of the rights granted in Section 1 herein, and any attempt to do so except under the terms of this License (or another written agreement between Licensor and You) is expressly prohibited by U.S. copyright law, the equivalent laws of other countries, and by international treaty. Therefore, by exercising any of the rights granted to You in Section 1 herein, You indicate Your acceptance of this License and all of its terms and conditions.
+* 10) Termination for Patent Action. This License shall terminate automatically and You may no longer exercise any of the rights granted to You by this License as of the date You commence an action, including a cross-claim or counterclaim, against Licensor or any licensee alleging that the Original Work infringes a patent. This termination provision shall not apply for an action alleging patent infringement by combinations of the Original Work with other software or hardware.
+* 11) Jurisdiction, Venue and Governing Law. Any action or suit relating to this License may be brought only in the courts of a jurisdiction wherein the Licensor resides or in which Licensor conducts its primary business, and under the laws of that jurisdiction excluding its conflict-of-law provisions. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any use of the Original Work outside the scope of this License or after its termination shall be subject to the requirements and penalties of the U.S. Copyright Act, 17 U.S.C. § 101 et seq., the equivalent laws of other countries, and international treaty. This section shall survive the termination of this License.
+* 12) Attorneys Fees. In any action to enforce the terms of this License or seeking damages relating thereto, the prevailing party shall be entitled to recover its costs and expenses, including, without limitation, reasonable attorneys' fees and costs incurred in connection with such action, including any appeal of such action. This section shall survive the termination of this License.
+* 13) Miscellaneous. This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable.
+* 14) Definition of "You" in This License. "You" throughout this License, whether in upper or lower case, means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with you. For purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
+* 15) Right to Use. You may use the Original Work in all ways not otherwise restricted or conditioned by this License or by law, and Licensor promises not to interfere with or be responsible for such uses by You.
+*
+* This license is Copyright (C) 2003-2004 Lawrence E. Rosen. All rights reserved. Permission is hereby granted to copy and distribute this license without modification. This license may not be modified without the express written permission of its copyright owner.
+
+> licensed under the MIT License
+
+### six
+
+* Version: 1.16.0
+* Copyright (c) 2010-2020 Benjamin Peterson
+
+> licensed under the MIT License
+
+### threadpoolctl
+
+* Version: 3.1.0
+* Copyright (c) 2019, threadpoolctl contributors
+*
+> licensed under the BSD-3-Clause "New" or "Revised" License
+
+### typing-extensions
+
+* Version: 4.2.0
+* A. HISTORY OF THE SOFTWARE
+* ==========================
+* Python was created in the early 1990s by Guido van Rossum at Stichting
+Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
+as a successor of a language called ABC. Guido remains Python's
+principal author, although it includes many contributions from others.
+* In 1995, Guido continued his work on Python at the Corporation for
+National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
+in Reston, Virginia where he released several versions of the
+software.
+* In May 2000, Guido and the Python core development team moved to
+BeOpen.com to form the BeOpen PythonLabs team. In October of the same
+year, the PythonLabs team moved to Digital Creations (now Zope
+Corporation, see http://www.zope.com). In 2001, the Python Software
+Foundation (PSF, see http://www.python.org/psf/) was formed, a
+non-profit organization created specifically to own Python-related
+Intellectual Property. Zope Corporation is a sponsoring member of
+the PSF.
+* All Python releases are Open Source (see http://www.opensource.org for
+the Open Source Definition). Historically, most, but not all, Python
+releases have also been GPL-compatible; the table below summarizes
+the various releases.
+```
+ Release Derived Year Owner GPL-
+ from compatible? (1)
+ 0.9.0 thru 1.2 1991-1995 CWI yes
+ 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes
+ 1.6 1.5.2 2000 CNRI no
+ 2.0 1.6 2000 BeOpen.com no
+ 1.6.1 1.6 2001 CNRI yes (2)
+ 2.1 2.0+1.6.1 2001 PSF no
+ 2.0.1 2.0+1.6.1 2001 PSF yes
+ 2.1.1 2.1+2.0.1 2001 PSF yes
+ 2.1.2 2.1.1 2002 PSF yes
+ 2.1.3 2.1.2 2002 PSF yes
+ 2.2 and above 2.1.1 2001-now PSF yes
+```
+* Footnotes:
+* (1) GPL-compatible doesn't mean that we're distributing Python under
+ the GPL. All Python licenses, unlike the GPL, let you distribute
+ a modified version without making your changes open source. The
+ GPL-compatible licenses make it possible to combine Python with
+ other software that is released under the GPL; the others don't.
+* (2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
+ because its license has a choice of law clause. According to
+ CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
+ is "not incompatible" with the GPL.
+*
+* Thanks to the many outside volunteers who have worked under Guido's
+* direction to make these releases possible.
+*
+*
+* B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
+* ===============================================================
+* PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+* --------------------------------------------
+*
+* 1. This LICENSE AGREEMENT is between the Python Software Foundation
+("PSF"), and the Individual or Organization ("Licensee") accessing and
+otherwise using this software ("Python") in source or binary form and
+its associated documentation.
+* 2. Subject to the terms and conditions of this License Agreement, PSF hereby
+grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
+analyze, test, perform and/or display publicly, prepare derivative works,
+distribute, and otherwise use Python alone or in any derivative version,
+provided, however, that PSF's License Agreement and PSF's notice of copyright,
+i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+2011, 2012, 2013, 2014 Python Software Foundation; All Rights Reserved" are
+retained in Python alone or in any derivative version prepared by Licensee.
+* 3. In the event Licensee prepares a derivative work that is based on
+or incorporates Python or any part thereof, and wants to make
+the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to Python.
+* 4. PSF is making Python available to Licensee on an "AS IS"
+basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+* 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+* 6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+* 7. Nothing in this License Agreement shall be deemed to create any
+relationship of agency, partnership, or joint venture between PSF and
+Licensee. This License Agreement does not grant permission to use PSF
+trademarks or trade name in a trademark sense to endorse or promote
+products or services of Licensee, or any third party.
+* 8. By copying, installing or otherwise using Python, Licensee
+agrees to be bound by the terms and conditions of this License
+Agreement.
+*
+* BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
+* -------------------------------------------
+* BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
+*
+* 1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
+office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
+Individual or Organization ("Licensee") accessing and otherwise using
+this software in source or binary form and its associated
+documentation ("the Software").
+* 2. Subject to the terms and conditions of this BeOpen Python License
+Agreement, BeOpen hereby grants Licensee a non-exclusive,
+royalty-free, world-wide license to reproduce, analyze, test, perform
+and/or display publicly, prepare derivative works, distribute, and
+otherwise use the Software alone or in any derivative version,
+provided, however, that the BeOpen Python License is retained in the
+Software, alone or in any derivative version prepared by Licensee.
+* 3. BeOpen is making the Software available to Licensee on an "AS IS"
+basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+* 4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
+SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
+AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
+DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+* 5. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+* 6. This License Agreement shall be governed by and interpreted in all
+respects by the law of the State of California, excluding conflict of
+law provisions. Nothing in this License Agreement shall be deemed to
+create any relationship of agency, partnership, or joint venture
+between BeOpen and Licensee. This License Agreement does not grant
+permission to use BeOpen trademarks or trade names in a trademark
+sense to endorse or promote products or services of Licensee, or any
+third party. As an exception, the "BeOpen Python" logos available at
+http://www.pythonlabs.com/logos.html may be used according to the
+permissions granted on that web page.
+* 7. By copying, installing or otherwise using the software, Licensee
+agrees to be bound by the terms and conditions of this License
+Agreement.
+*
+* CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
+* ---------------------------------------
+*
+* 1. This LICENSE AGREEMENT is between the Corporation for National
+Research Initiatives, having an office at 1895 Preston White Drive,
+Reston, VA 20191 ("CNRI"), and the Individual or Organization
+("Licensee") accessing and otherwise using Python 1.6.1 software in
+source or binary form and its associated documentation.
+* 2. Subject to the terms and conditions of this License Agreement, CNRI
+hereby grants Licensee a nonexclusive, royalty-free, world-wide
+license to reproduce, analyze, test, perform and/or display publicly,
+prepare derivative works, distribute, and otherwise use Python 1.6.1
+alone or in any derivative version, provided, however, that CNRI's
+License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
+1995-2001 Corporation for National Research Initiatives; All Rights
+Reserved" are retained in Python 1.6.1 alone or in any derivative
+version prepared by Licensee. Alternately, in lieu of CNRI's License
+Agreement, Licensee may substitute the following text (omitting the
+quotes): "Python 1.6.1 is made available subject to the terms and
+conditions in CNRI's License Agreement. This Agreement together with
+Python 1.6.1 may be located on the Internet using the following
+unique, persistent identifier (known as a handle): 1895.22/1013. This
+Agreement may also be obtained from a proxy server on the Internet
+using the following URL: http://hdl.handle.net/1895.22/1013".
+* 3. In the event Licensee prepares a derivative work that is based on
+or incorporates Python 1.6.1 or any part thereof, and wants to make
+the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to Python 1.6.1.
+* 4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
+basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+* 5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
+OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+* 6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+* 7. This License Agreement shall be governed by the federal
+intellectual property law of the United States, including without
+limitation the federal copyright law, and, to the extent such
+U.S. federal law does not apply, by the law of the Commonwealth of
+Virginia, excluding Virginia's conflict of law provisions.
+Notwithstanding the foregoing, with regard to derivative works based
+on Python 1.6.1 that incorporate non-separable material that was
+previously distributed under the GNU General Public License (GPL), the
+law of the Commonwealth of Virginia shall govern this License
+Agreement only as to issues arising under or with respect to
+Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this
+License Agreement shall be deemed to create any relationship of
+agency, partnership, or joint venture between CNRI and Licensee. This
+License Agreement does not grant permission to use CNRI trademarks or
+trade name in a trademark sense to endorse or promote products or
+services of Licensee, or any third party.
+* 8. By clicking on the "ACCEPT" button where indicated, or by copying,
+installing or otherwise using Python 1.6.1, Licensee agrees to be
+bound by the terms and conditions of this License Agreement.
+* ACCEPT
+* CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
+* --------------------------------------------------
+* Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
+The Netherlands. All rights reserved.
+*
+* Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of Stichting Mathematisch
+Centrum or CWI not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+* STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
+THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
+FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+### tzdata
+
+* Version: 2022.1
+* Copyright (c) 2014 Lau Taarnskov
+*
+* Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+* The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+> licensed under the MIT License
+
+### tzlocal
+
+* Version: 4.2
+* Copyright 2011-2017 Lennart Regebro
+
+> licensed under the MIT License
+
+### unidic-lite
+
+* Version: 1.0.8
+* Copyright (c) 2011-2017, The UniDic Consortium
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+* * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the
+ distribution.
+* * Neither the name of the UniDic Consortium nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+### win32-setctime
+
+* Version: 1.1.0
+* Copyright (c) 2019 Delgan
+
+> licensed under the MIT License
+
+### zipp
+
+* Version: 3.8.0
+* Copyright Jason R. Coombs
+
+> licensed under the MIT License
+
+
+
+# Reference
+
+## MIT License : https://opensource.org/licenses/MIT
+
+## Apache License : http://www.apache.org/licenses/LICENSE-2.0
+
+## BSD-3-Clause License : https://opensource.org/licenses/BSD-3-Clause
+
+## LGPL License : https://www.gnu.org/licenses/licenses.en.html
+
+## Creative Commons : https://creativecommons.org/publicdomain/zero/1.0/legalcode
+
+## Chart.js : https://www.chartjs.org/docs/latest/
+
+### https://github.com/chartjs/Chart.js/blob/master/LICENSE.md
+
+The MIT License (MIT)
+Copyright (c) 2014-2021 Chart.js Contributors
+
+> licensed under the MIT License
+
+## Bootstrap : https://getbootstrap.com/
+
+### https://github.com/twbs/bootstrap/blob/main/LICENSE (base, theme)
+
+The MIT License (MIT)
+Copyright (c) 2011-2021 Twitter, Inc.
+Copyright (c) 2011-2021 The Bootstrap Authors
+
+> licensed under the MIT License
+
+### https://github.com/snapappointments/bootstrap-select/blob/master/LICENSE
+
+The MIT License (MIT)
+Copyright (c) 2013-2015 bootstrap-select
+
+> licensed under the MIT License
+
+### https://github.com/wenzhixin/bootstrap-table/blob/develop/LICENSE
+
+(The MIT License)
+Copyright (c) 2012-2019 Zhixin Wen
+
+> licensed under the MIT License
+
+
+
+## jQuery : http://jquery.org/license/
+
+Source Code
+Projects referencing this document are released under the terms of the MIT license.
+The MIT License is simple and easy to understand and it places almost no restrictions on what you can do with the Project.
+You are free to use the Project in any other project (even commercial projects) as long as the copyright header is left intact.
+
+Sample Code
+All demos and examples, whether in a Project's repository or displayed on a Project site, are released under the terms of the license as specified in the relevant repository. Many Projects choose to release their sample code under the terms of CC0.
+CC0 is even more permissive than the MIT license, allowing you to use the code in any manner you want, without any copyright headers, notices, or other attribution.
+
+Web Sites
+The content on a Project web site referencing this document in its header is released under the terms of the license specified in the website's repository or if not specified, under the MIT license.
+The design, layout, and look-and-feel of JS Foundation project web sites are not licensed for use and may not be used on any site, personal or commercial, without prior written consent from the JS Foundation.
+For further information regarding JS Foundation licensing and intellectual property, please review the JS Foundation IP Policy.
+
+> licensed under the MIT License
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a1b5d49
--- /dev/null
+++ b/README.md
@@ -0,0 +1,179 @@
+# Analysis Platform
+
+Analysis Platform is an open source web application to import, connect and visualize factory IoT data. It helps to collect, link and integrate data from multiple data sources.
+Visualizations include Digital Native QC7 Tools, which is designed especially suitable for data on manufacturing domain.
+Let's try data-driven process improvement by utilizing the data sleeping in the field.
+
+
+
+## What can we do with Analysis Platform?
+
+The main activities that user can take with Analysis Platform are:
+
+* Check behavior of time-series or id-series data, stratify across processes (Full Points Plot)
+* Check behavior of fluctuation of the distribution (Ridgeline Plot)
+* Check correlation between variables and clusters in distribution (Multiple Scatter Plot)
+* Check behavior of each category/group (Stratified Plot)
+* Check behavior of related to human activities & process and product behavior (Calendar Heatmap)
+* Look for key variables that strongly relates to output (Sankey Diagram)
+* Grasp phenomenon that occurs at the same time (Cooccurrence Graph)
+
+Analysis Platform currently supports following data sources:
+
+* CSV/TSV/SSV(semicolon)
+* SQLite
+* MySQL
+* PostgreSQL
+* SQL Server
+* Oracle Database
+
+## Terms of Use
+
+On your first access to the application, you must read and agree to [Terms of Use](/about/terms_of_use_en.md) shown on modal screen.
+If you are going to use "`oss_start_app.bat`" to run Analysis Platform with
+Windows embeddable package,
+running the batch file is regarded as you agreed to the Terms of Use.
+
+## Requirements
+
+Analysis Platform uses [Flask](https://flask.palletsprojects.com/en/latest/) framework.
+
+- Python (>=3.6) (Tested with Python 3.7.3)
+
+
+## How can we start using Analysis Platform?
+
+First, pull this repository to your PC.
+
+```shell
+git clone https://github.com/apdn7/AnalysisPlatform.git
+cd AnalysisPlatform
+```
+
+or you can download zip file and unzip it.
+
+### For users: Run Analysis Platform with Windows embeddable package
+
+If you use Windows machine, you can use Windows
+embeddable package to run Analysis Platform
+without installing python.
+To download necessary packages and activate Analysis Platform,
+you can run "`oss_start_app.bat`" (beforehand, read [Terms of Use](/about/terms_of_use_en.md) carefully).
+
+```
+Double click "oss_start_app.bat"
+```
+
+This batch file will automatically
+download:
+
+* [Windows embeddable package](https://www.python.org/downloads/windows/): To run Analysis Platform without installing Python
+* [pip](https://github.com/pypa/pip): To manage Python packages
+* [other necessary python packages](requirements/common.txt): To run Analysis Platform
+* [Oracle Instant Client](https://www.oracle.com/database/technologies/instant-client.html): To connect Oracle Database
+
+```
+Note:
+If you are connecting internet using proxy,
+you might have to edit "oss_start_app.bat" and specify the address unless it is registered in your environmental variable.
+Open "oss_start_app.bat" with any text editor, and you will find
+proxy setting on row 7-8. Remove REM from those rows and fill in HTTP_PROXY and HTTPS_PROXY.
+```
+
+Analysis Platform is activated after all downloads are finished.
+
+Analysis Platform uses Port 6868 by default.
+Access below URL to access Analysis Platform.
+If another program already uses port 6868, you can change the setting (See Basic Settings).
+
+```
+http://127.0.0.1:6868/
+```
+
+Downloads are only excecuted only if above files were not detected, so you can use use "`oss_start_app.bat`" for the next activation. (Analysis platform detects a folder named `python_embedded`on the same level as `AnalysisPlatform`)
+
+Analysis Platform can run without internet connection.
+If you want to use this application on machine which has no internet connection (for example, intranet),
+you can first download all required files on other machine (which has internet connection),
+then copy entire files.
+
+### For developers: Run Analysis Platform with Python installed on your machine
+
+Install requirements:
+
+```shell
+pip install -r requirements/common.txt
+```
+If pip install fails, try using Python 3.7.3.
+Analysis Platform is activated by following command
+
+```bash
+python main.py
+```
+
+Corresponding ODBC driver must be installed to use SQL Server and Oracle Database.
+
+
+## How do we shut down Analysis Platform?
+
+To shut down Analysis Platform,
+press shut down button on bottom of sidebar (this button is only available on host machine),
+or you can press `Ctrl + C` on your console.
+
+## Basic Settings
+
+Basic settings of Analysis Platform is defined in
+`info` field of `histview2/config/basic_config.yml`.
+You can open the file with any text editor and set:
+
+* `port-no`: Port number that Analysis Platform use (default: 6868)
+* `language`: Language used in GUI. For example, if you want to use Japanese, set "JA". If empty or invalid value is set, English is used.
+See language selectbox on upper right corner of GUI for abbreviations of each language (default: empty)
+* `hide-setting-page`: If True, hides link to config page (default: False)
+
+If you want to initialize the application, remove `instance` folder and `histview2/config/basic_config.yml`.
+These files are generated on the next activation.
+
+## Is there any sample data that we can use?
+
+By default, Analysis Platform contains sample data and corresponding settings to get an overview of each visualization.
+Data is stored as TSV file under the subdirectories in [/sample_data](/sample_data):
+
+* /assembly: Quality data
+ * /1_parts_feed
+ * 20220228.tsv
+ * /2_inspection
+ * 20220228.tsv
+* /parts_processing: Machine data
+ * /1_machine_parameter_a
+ * 20220311.tsv
+ * /2_machine_parameter_b
+ * 20220311.tsv
+ * /3_finishing
+ * 20220311.tsv
+* /alarm_signal_cog: Daily occurence of machine alarms
+ * 20200401.tsv
+
+Above data will be automatically imported after activation.
+You can call each sample visualization from 'Load' or 'Bookmark' on upper right corner of GUI.
+
+If you do not need those data,
+you can either initialize the application by removing `instance`
+folder before the activation,
+or you can just remove each data source setting from GUI.
+
+## License
+
+Analysis Platform is released under MIT License.
+See our [LICENSE](LICENSE.md) for more detail.
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000..763ab44
--- /dev/null
+++ b/VERSION
@@ -0,0 +1,5 @@
+v4.0.0.141.7bd44ad7
+1
+OSS
+
+
diff --git a/about/Endroll.md b/about/Endroll.md
new file mode 100644
index 0000000..36e2486
--- /dev/null
+++ b/about/Endroll.md
@@ -0,0 +1,183 @@
+
+
+
+
+
+- [Project Analysis Platform](#project-analysis-platform)
+ - [Globalization, i18n](#globalization-i18n)
+ - [Collaborators FY22](#collaborators-fy22)
+ - [Collaborators FY21](#collaborators-fy21)
+ - [Early Bird Collaborators](#early-bird-collaborators)
+- [Research & Development Team](#research--development-team)
+ - [Data Analysis Education, Research & Promotion](#data-analysis-education-research--promotion)
+ - [Data Analysis Package](#data-analysis-package)
+ - [Data Analysis Interface](#data-analysis-interface)
+ - [Data Analysis Platform](#data-analysis-platform)
+ - [© F-IoT DAC Team & Rainbow7 + Bridge7](#-f-iot-dac-team--rainbow7--bridge7)
+
+
+
+
+
+
+
+# Project Analysis Platform
+
+
+
+
+## Globalization, i18n
+
+||||
+|--:|:-:|:--|
+|Software GUI translation into various languages||Support members|
+|English en 英語||Tran Ngoc Tinh チャン ゴク ティン Trần Ngọc Tình FPT Software Japan|
+|Japanese ja 日本語||Le Sy Khanh Duy レイシー カイン ユイ Lê Sỹ Khánh Duy FPT Software Japan|
+|Vietnamese vi ベトナム語||Ho Hoang Tung ホ ホアン トゥン Hồ Hoàng Tùng FPT Software Japan|
+|Italian it イタリア語|||
+|Spanish es スペイン語|||
+|Czech cs チェコ語|||
+|Hungarian hu ハンガリ語|||
+|Portuguese pt ポルトガル語|||
+|German de ドイツ語|||
+|Hindi hi ヒンディ語|||
+|Thai th タイ語|||
+|Simplified Chinese zh-CN 簡体中国語|||
+|Traditional Chinese zh-TW 繁体中国語|||
+
+
+
+## Collaborators FY22
+
+||||
+|--:|:-:|:--|
+|Field Introduction Project Member & Data Preparation||Satoru Fukumori 福森 聖 DNJP Zemmyo & Nishio Injection Components Mfg. Mgt. Div.|
+
+
+
+## Collaborators FY21
+
+||||
+|--:|:-:|:--|
+|Field Introduction Project Leader & Data Preparation||Takeshi Mori 森 剛志 DNJP Nishio Gasoline Injection Mfg. Div.|
+|Specification Examination & Practical Testing||Koji Otaka 大鷹 浩二 DNJP Nishio Gasoline Injection Mfg. Div.|
+|Testing and Field Introduction||Masakazu Iwata 岩田 雅和 DNJP Nishio Gasoline Injection Mfg. Div.|
+|||Tatsunori Uehara 上原 辰徳 DNJP Nishio Gasoline Injection Mfg. Div.|
+|||Norikatsu Sengoku 仙石 典克 DNJP Nishio Gasoline Injection Mfg. Div.|
+|||Kousaku Nagano 永野 公作 DNJP Nishio Gasoline Injection Mfg. Div.|
+|Field Introduction Management||Hisatoshi Tsukahara 塚原 久敏 DNJP Nishio Gasoline Injection Mfg. Div.|
+
+
+
+## Early Bird Collaborators
+
+||||
+|--:|:-:|:--|
+|Testing and Field Introduction||Masanobu Kito 鬼頭 雅伸 DNJP Zenmyo Diesel Injection Mfg. Div.|
+|Field Introduction Management||Kenichi Niinuma 新沼 賢一 DNJP Monozukuri DX Promotion Div.|
+|Specification Examination & R&D Cooperation||Yukinori Orihara 折原 幸宣 DNJP Powertrain Systems Production Eng. R&D Div.|
+
+
+
+
+
+# Research & Development Team
+
+
+
+
+## Data Analysis Education, Research & Promotion
+
+||||
+|--:|:-:|:--|
+|Data Analysis Education Development & Management Leader||Sho Takahashi 髙橋 翔 DNJP Monozukuri DX Promotion Div.|
+|Data Analysis Education Development & Management||Takero Arakawa 荒川 毅郎 DNJP Monozukuri DX Promotion Div.|
+
+
+## Data Analysis Package
+
+||||
+|--:|:-:|:--|
+|Data Analysis Package Development & Management Leader||Genta Kikuchi 菊池 元太 DNJP Monozukuri DX Promotion Div.|
+|Data Analysis Package Development||Sho Takahashi 髙橋 翔 DNJP Monozukuri DX Promotion Div.|
+
+
+## Data Analysis Interface
+
+||||
+|--:|:-:|:--|
+|Developer Leader & Bridge SE of Rainbow7 & Bridge7||Le Sy Khanh Duy レイシー カイン ユイ Lê Sỹ Khánh Duy FPT Software Japan|
+|Developer of Rainbow7 & Bridge7||Tran Ngoc Tinh チャン ゴク ティン Trần Ngọc Tình FPT Software Japan|
+|||Nguyen van Hoai グエン ヴァン ホアイ Nguyễn Văn Hoài FPT Software Japan|
+|||Ho Hoang Tung ホ ホアン トゥン Hồ Hoàng Tùng FPT Software Japan|
+|||Tran Thi Kim Tuyen チャン ティ キム トゥエン Trần Thị Kim Tuyền FPT Software Japan|
+|||Nguyen van Hoai グエン ヴァン ホアイ Nguyễn Văn Hoài|
+|Technology Leader of Rainbow7||Masato Yasuda 安田 真人 DNJP Monozukuri DX Promotion Div.|
+|Agile Master of Rainbow7 & Bridge7||Yasutomo Kawashima 川島 恭朋 DNJP Monozukuri DX Promotion Div.|
+
+
+## Data Analysis Platform
+
+||||
+|--:|:-:|:--|
+|Data Analysis Platform Product Owner FY20-||Tatsunori Kojo 古城 達則 DNJP Monozukuri DX Promotion Div.|
+|Technology Leader of Data Analysis & Data Analysis Platform Product Owner FY19||Genta Kikuchi 菊池 元太 DNJP Monozukuri DX Promotion Div.|
+|Data Analysis Platform Development||Takero Arakawa 荒川 毅郎 DNJP Monozukuri DX Promotion Div.|
+|||Sho Takahashi 髙橋 翔 DNJP Monozukuri DX Promotion Div.|
+|Supervisor & Technology Leader of Data Analysis & SQC||Mutsumi Yoshino 吉野 睦 DNJP Monozukuri DX Promotion Div.|
+|Supervisor & Senior Manager||Toshikuni Shinohara 篠原 壽邦 DNJP Monozukuri DX Promotion Div.|
+
+
+
+
+## © F-IoT DAC Team & Rainbow7 + Bridge7
+
+
diff --git a/about/terms_of_use_en.md b/about/terms_of_use_en.md
new file mode 100644
index 0000000..9e06c6f
--- /dev/null
+++ b/about/terms_of_use_en.md
@@ -0,0 +1,35 @@
+# Terms of use
+
+BY CLICKING THE "I ACCEPT" BUTTON, OR USING THE SOFTWARE, YOU ACKNOWLEDGE THAT YOU HAVE REVIEWED AND ACCEPT THIS TERMS OF USE AGREEMENT AND ARE AUTHORIZED TO ACT ON BEHALF OF, AND BIND TO THIS AGREEMENT, THE OWNER OF THIS SOFTWARE. IN CONSIDERATION OF THE FOREGOING, THE PARTIES AGREE AS FOLLOWS:
+
+IF LICENSEE DOES NOT AGREE TO THE TERMS AND CONDITIONS OF THIS AGREEMENT, LICENSEE SHALL NOT USE THE SOFTWARE.
+
+UNDER THE MIT LICENSE, THE SOFTWARE IS WITHOUT WARRANTY AND DENSO WILL HAVE NO LIABILITY ARISING OUT OF OR IN CONNECTION WITH THE SOFTWARE. THERE IS NO WARRANTY FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE SOFTWARE “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. THIS INCLUDES THE INDUSTRIAL QUALITY INFORMATION AND RESULTS OBTAINED WITH THE SOFTWARE. DENSO ALSO DOES NOT PROVIDE SUPPORT.
+
+Redistribution of the Software should be subject to the terms of the OSS license. Please do not redistribute if you cannot comply with the OSS license. There is a risk of violation of the Copyright Act, Export Administration Regulations, etc. and compensation claims, etc.
+
+There is a function to acquire data from a user-owned database, but there is a possibility that problems may occur due to an increase in the load on the database, so please check the safety beforehand and use it at your own risk. If you do not check, do not connect directly to the database and limit the use to data acquisition via files such as CSV.
+
+The Software tracks certain types of information during use of its data analytics to perform web analytics. This information helps us monitor the traffic and improve the performance of the Software that meets the needs of users. This information does not include personal information which is any information relating to an identified or identifiable natural person. The information that we collect includes, but is not limited to:
+
+* Internet Protocol (IP) address of the device that has contacted the Internet
+* Type of browser used
+* Type of operating system used
+* Date and time of the visit and the analytics execution
+* Pages visited and analytics executed
+* Referral sources
+* Data size of the data analytics
+* Calculation time of the data analytics
+
+DENSO may also use the personal information collected for non-administrative uses, such as audits, evaluations, research, and planning. Personal information collected will not be used for decision making processes about an individual or to profile individual visitors.
+
+DENSO carries out web analytics (included information of the data analytics) using services from third-party service providers, Google Inc. ("Google") and website hosting services from GitHub Inc. ("GitHub").
+
+In carrying out these services, these third parties may have access to information and these providers may be based in the United States of America (“USA”), which means that the personal information collected is transmitted outside of Japan and may be subject to USA laws. In addition, Google and GitHub operate servers in other countries on which the web analytics and user data may be processed. Consequently, the data may be subject to the governing legislation of the country where it is processed. The reason for them having access to information is to permit them to perform the tasks assigned to them on our behalf.
+
+For more information about the privacy policies of these service providers, please visit their individual websites by using the following links:
+
+* Google Analytics Terms of Service https://marketingplatform.google.com/about/analytics/terms/us/
+* GitHub Privacy Statement https://help.github.com/en/articles/github-privacy-statement
+
+To track users for the use of web analytics, Google requires the use of cookies. A cookie is a piece of data sent from a website and the Software and stored on a user's computer by the user’s web browser which the user is browsing and the Software. Cookies are generally used to enhance user’s browsing and user experience. These cookies are encrypted for security purposes. User can choose to set their browser to detect and reject cookies. If user change their browser settings to refuse cookies or disable JavaScript so that user’s visit will not be tracked, no personal information will be collected however, use of the Softwere may be affected in other ways, including making it difficult to access the information on the Softwere. If user use the browser add-ons from Google, their use of the Softwere will not be affected.
diff --git a/about/terms_of_use_jp.md b/about/terms_of_use_jp.md
new file mode 100644
index 0000000..0b402d7
--- /dev/null
+++ b/about/terms_of_use_jp.md
@@ -0,0 +1,35 @@
+# 利用規約
+
+[同意する]ボタンをクリックするか、本ソフトウェアを使用することにより、ユーザは、本利用条件の内容を確認および同意し、本ソフトウェアの所有者に代わって本条件に拘束されることを了承するものとします。上記を約因として、両当事者は以下のとおり合意するものとします。
+
+ライセンシが本条件の条項に同意しない場合、ライセンシは本ソフトウェアを使用しないものとします。
+
+MITライセンスに基づき本ソフトウェアは無保証であり、DENSOは本ソフトウェアに起因または関連する一切の責任を負わないものとします。適用される法律で許可されている範囲で、ソフトウェアの保証はありません。書面で別途記載されている場合を除き、著作権所有者および/またはその他の当事者は、明示または黙示を問わず、商品性および特定の目的への適合性の黙示保証を含む(ただしこれらに限定されない)、いかなる種類の保証もなしにプログラムを「現状のまま」提供します。 本ソフトウェアの品質および性能に関するすべてのリスクはユーザにあります。プログラムに欠陥があることが判明した場合は、必要なすべてのサービス、修理または修正の費用を自己負担するものとします。これには本ソフトウェアで得られる工業上の品質情報や得られた結果等も含まれます。またDENSOはサポートも行いません。
+
+本ソフトウェアを再配布する際はOSSライセンス規約に基く必要があります。OSSライセンス規約を守らない場合の再配布はお止めください。著作権法/輸出管理規則等への違反・賠償請求等のリスクがあります。
+
+ユーザ保有のデータベースからデータを取得する機能がありますが、データベースへの負荷増大によるトラブルが発生する可能性もありますので事前に十分安全を確認したうえ自己責任でご使用ください。確認等を行わない場合はデータベースへの直接接続は避け、CSV等のファイルを介したデータ取得に利用を限定してください。
+
+本ソフトウェアは、Web分析を実行するために分析中に特定の情報を追跡します。これらの情報はソフトウェアのトラフィックをモニタしユーザのニーズを満たすパフォーマンスの向上に役立ちます。この情報に個人情報(氏名、住所その他個人を特定できる情報)は含まれません。収集する情報には、次のものが含まれますが、これらに限定されません。
+
+* インターネットに接続したデバイスのインターネットプロトコル(IP)アドレス
+* 使用するブラウザの種類
+* 使用するオペレーティング・システムの種類
+* 訪問/実行日時
+* 訪問したページ/実行した分析
+* 参照元
+* 分析のデータサイズ
+* 分析の計算時間
+
+DENSOは、収集した情報を監査・評価・調査・計画などの非管理目的で使用することもあります。収集された情報は、個人に関する意思決定プロセスや個人ユーザのプロファイリングには使用されません。
+
+DENSOは、サードパーティのサービスプロバイダであるGoogle Inc.(以下Google)のサービス、およびGitHub Inc.(以下GitHub)のWebサイトホスティングサービスを利用してWeb分析を行っています。
+
+これらのサービスを実施するにあたり、第三者がユーザの個人情報にアクセスし、提供者がアメリカ合衆国(USA)に拠点を置く場合があります。これは、収集された個人情報が日本国外に送信されることを意味し、アメリカ合衆国の法律の適用を受ける場合があることを意味します。また、GoogleとGitHubは場合によってWeb分析とユーザデータの処理を他国のサーバで運用しています。従って、データはそれが処理される国の準拠法の対象となる場合があります。彼らがユーザの個人情報にアクセスできるのは、弊社に代わりユーザに割り当てられたタスクを実行するためです。
+
+これらのサービスプロバイダのプライバシポリシーの詳細については、次のリンクを使用して各サービスプロバイダのWebサイトにアクセスしてください。
+
+Google Analytics サービス利用規約 https://www.google.com/analytics/terms/
+GitHub プライバシに関する声明 https://help.github.com/en/articles/github-privacy-statement
+
+Web分析でユーザの使用状況を追跡するには、GoogleはCookie(クッキー)を使用する必要があります。Cookieとは、Webサイトおよび本ソフトウェアから送信されユーザが閲覧しているWebブラウザおよび本ソフトウェアによってユーザのコンピュータに保存されるデータのことです。Cookieは、一般的にブラウジングとユーザエクスペリエンスを向上させるために使用されます。これらのCookieは、セキュリティの目的で暗号化されています。ユーザはCookieを検出して拒否するようにブラウザを設定できます。Cookieを拒否するようにブラウザの設定を変更したり、JavaScriptを無効にしてユーザのアクセスが追跡されないようにした場合、情報は収集されませんが、ユーザは本ソフトウェア上の情報へのアクセスが困難になるなど、その他の影響を受ける可能性があります。Googleのブラウザアドオンを使用する場合、本ソフトウェアの使用は影響を受けません。
diff --git a/babel.cfg b/babel.cfg
new file mode 100644
index 0000000..aced825
--- /dev/null
+++ b/babel.cfg
@@ -0,0 +1,3 @@
+[python: **.py]
+[jinja2: **/templates/**]
+extensions=jinja2.ext.autoescape,jinja2.ext.with_
\ No newline at end of file
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..2a38f56
--- /dev/null
+++ b/config.py
@@ -0,0 +1,132 @@
+import os
+
+from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor
+from loguru import logger
+
+from histview2.common.common_utils import resource_path
+
+basedir = os.getcwd()
+
+
+class Config(object):
+ SECRET_KEY = '736670cb10a600b695a55839ca3a5aa54a7d7356cdef815d2ad6e19a2031182b'
+ POSTS_PER_PAGE = 10
+ PORT = 80
+ parent_dir = os.path.dirname(basedir)
+ os.environ['FLASK_ENV'] = os.environ.get('FLASK_ENV', 'development')
+ R_PORTABLE = os.environ.get('R-PORTABLE')
+ if not R_PORTABLE:
+ R_PORTABLE = os.path.join(parent_dir, 'R-Portable', 'bin')
+ os.environ['PATH'] = '{};{}'.format(R_PORTABLE, os.environ.get('PATH', ''))
+
+ # R-PORTABLEを設定する。
+ os.environ['R-PORTABLE'] = os.path.join(parent_dir, 'R-Portable')
+
+ ORACLE_PATH = os.path.join(parent_dir, 'Oracle-Portable')
+ os.environ['PATH'] = '{};{}'.format(ORACLE_PATH, os.environ.get('PATH', ''))
+
+ ORACLE_PATH_WITH_VERSION = os.path.join(ORACLE_PATH, 'instantclient_21_3')
+ os.environ['PATH'] = '{};{}'.format(ORACLE_PATH_WITH_VERSION, os.environ.get('PATH', ''))
+
+ logger.info(os.environ['PATH'])
+ print(R_PORTABLE)
+
+ BABEL_DEFAULT_LOCALE = "en"
+
+ # yaml config files name
+ YAML_CONFIG_BASIC = 'basic_config.yml'
+ YAML_CONFIG_DB = 'db_config.yml'
+ YAML_CONFIG_PROC = 'proc_config.yml'
+ YAML_CONFIG_HISTVIEW2 = 'histview2_config.yml'
+ YAML_TILE_INTERFACE_DN7 = 'tile_interface_dn7.yml'
+ YAML_TILE_INTERFACE_AP = 'tile_interface_analysis_platform.yml'
+
+ # run `python histview2/script/generate_db_secret_key.py` to generate DB_SECRET_KEY
+ DB_SECRET_KEY = "4hlAxWLWt8Tyqi5i1zansLPEXvckXR2zrl_pDkxVa-A="
+
+ # timeout
+ SQLALCHEMY_ENGINE_OPTIONS = {'connect_args': {'timeout': 30}}
+
+ # APScheduler
+ SCHEDULER_EXECUTORS = {
+ 'default': ThreadPoolExecutor(100),
+ 'processpool': ProcessPoolExecutor(5)
+ }
+
+ SCHEDULER_JOB_DEFAULTS = {
+ 'coalesce': True,
+ 'max_instances': 1,
+ 'misfire_grace_time': 2 * 60
+ }
+ VERSION_FILE_PATH = resource_path('VERSION')
+ BASE_DIR = basedir
+ GA_TRACKING_ID = 'UA-156244372-2'
+ PARTITION_NUMBER = 100
+
+ COMPRESS_MIMETYPES = [
+ "text/html",
+ "text/css",
+ "text/xml",
+ "text/csv",
+ "text/tsv",
+ "application/json",
+ "application/javascript",
+ ]
+ COMPRESS_LEVEL = 6
+ COMPRESS_MIN_SIZE = 500
+
+
+class ProdConfig(Config):
+ DEBUG = False
+ SQLALCHEMY_TRACK_MODIFICATIONS = False
+ SQLITE_CONFIG_DIR = os.path.join(basedir, 'instance')
+ UNIVERSAL_DB_FILE = os.path.join(SQLITE_CONFIG_DIR, 'universal.sqlite3')
+ SQLALCHEMY_DATABASE_URI = 'sqlite:///' + UNIVERSAL_DB_FILE
+ APP_DB_FILE = os.path.join(SQLITE_CONFIG_DIR, 'app.sqlite3')
+ SQLALCHEMY_DATABASE_APP_URI = 'sqlite:///' + APP_DB_FILE
+ # have to keep SQLALCHEMY_BINDS before SCHEDULER_JOBSTORES -> avoid overwrite
+ SQLALCHEMY_BINDS = {
+ 'app_metadata': SQLALCHEMY_DATABASE_APP_URI
+ }
+ # SCHEDULER_JOBSTORES = {
+ # 'default': SQLAlchemyJobStore(url='sqlite:///' + os.path.join(basedir, 'instance', 'app.sqlite3'))
+ # }
+ YAML_CONFIG_DIR = os.path.join(basedir, 'histview2', 'config')
+
+
+class DevConfig(Config):
+ DEBUG = True
+ SQLALCHEMY_TRACK_MODIFICATIONS = True
+ SQLITE_CONFIG_DIR = os.path.join(basedir, 'instance')
+ UNIVERSAL_DB_FILE = os.path.join(SQLITE_CONFIG_DIR, 'universal.sqlite3')
+ SQLALCHEMY_DATABASE_URI = 'sqlite:///' + UNIVERSAL_DB_FILE
+ APP_DB_FILE = os.path.join(SQLITE_CONFIG_DIR, 'app.sqlite3')
+ SQLALCHEMY_DATABASE_APP_URI = 'sqlite:///' + APP_DB_FILE
+ # have to keep SQLALCHEMY_BINDS before SCHEDULER_JOBSTORES -> avoid overwrite
+ SQLALCHEMY_BINDS = {
+ 'app_metadata': SQLALCHEMY_DATABASE_APP_URI
+ }
+ # SCHEDULER_JOBSTORES = {
+ # 'default': SQLAlchemyJobStore(url='sqlite:///' + os.path.join(basedir, 'instance', 'app.sqlite3'))
+ # }
+ YAML_CONFIG_DIR = os.path.join(basedir, 'histview2', 'config')
+
+
+class TestingConfig(Config):
+ DEBUG = False
+ TESTING = True
+ SQLALCHEMY_TRACK_MODIFICATIONS = True
+ SQLITE_CONFIG_DIR = os.path.join(basedir, 'tests', 'instances')
+ UNIVERSAL_DB_FILE = os.path.join(SQLITE_CONFIG_DIR, 'universal.sqlite3')
+ SQLALCHEMY_DATABASE_URI = 'sqlite:///' + UNIVERSAL_DB_FILE
+ APP_DB_FILE = os.path.join(SQLITE_CONFIG_DIR, 'app.sqlite3')
+ SQLALCHEMY_DATABASE_APP_URI = 'sqlite:///' + APP_DB_FILE
+
+ SQLALCHEMY_BINDS = {
+ 'app_metadata': SQLALCHEMY_DATABASE_APP_URI
+ }
+ # SCHEDULER_JOBSTORES = {
+ # 'default': SQLAlchemyJobStore(url='sqlite:///' + os.path.join(basedir, 'tests', 'instances', 'app.sqlite3'))
+ # }
+ YAML_CONFIG_DIR = os.path.join(basedir, 'tests', 'histview2', 'config')
+ PARTITION_NUMBER = 2
diff --git a/histview2/__init__.py b/histview2/__init__.py
new file mode 100644
index 0000000..cb1902b
--- /dev/null
+++ b/histview2/__init__.py
@@ -0,0 +1,375 @@
+import atexit
+import os
+import time
+from datetime import datetime
+
+import wtforms_json
+from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
+from flask import Flask, render_template, Response, g, json
+from flask_apscheduler import APScheduler, STATE_STOPPED
+from flask_babel import Babel
+from flask_compress import Compress
+from flask_marshmallow import Marshmallow
+from flask_migrate import Migrate
+from flask_sqlalchemy import SQLAlchemy
+from loguru import logger
+from sqlalchemy import create_engine, event
+from sqlalchemy.orm import scoped_session, create_session
+
+from histview2.common.common_utils import check_exist, make_dir, find_babel_locale
+from histview2.common.common_utils import set_sqlite_params
+from histview2.common.constants import FlaskGKey, SQLITE_CONFIG_DIR, PARTITION_NUMBER, UNIVERSAL_DB_FILE, APP_DB_FILE, \
+ TESTING
+from histview2.common.logger import log_execution
+from histview2.common.services.request_time_out_handler import requestTimeOutAPI
+from histview2.common.trace_data_log import get_log_attr, TraceErrKey
+
+db = SQLAlchemy()
+migrate = Migrate()
+scheduler = APScheduler()
+ma = Marshmallow()
+wtforms_json.init()
+
+background_jobs = {}
+
+LOG_IGNORE_CONTENTS = ('.html', '.js', '.css', '.ico', '.png')
+# yaml config files
+dic_yaml_config_file = dict(basic=None, db=None, proc=None, histview2=None, version='0', ti_dn7=None,
+ ti_analysis_platform=None)
+dic_config = {'db_secret_key': None, SQLITE_CONFIG_DIR: None, PARTITION_NUMBER: None, APP_DB_FILE: None,
+ UNIVERSAL_DB_FILE: None, TESTING: None}
+
+# last request time
+dic_request_info = {'last_request_time': datetime.utcnow()}
+
+# ############## init application metadata db ###############
+db_engine = None
+
+
+def init_engine(app, uri, **kwargs):
+ global db_engine
+ # By default, sqlalchemy does not overwrite table. Then no need to manually check file exists neither table exists.
+ # https://docs.sqlalchemy.org/en/14/core/metadata.html?highlight=create_all#sqlalchemy.schema.MetaData.create_all
+ db.create_all(app=app)
+ db_engine = create_engine(uri, **kwargs)
+
+ @event.listens_for(db_engine, 'connect')
+ def do_connect(dbapi_conn, connection_record):
+ set_sqlite_params(dbapi_conn)
+
+ return db_engine
+
+
+Session = scoped_session(lambda: create_session(
+ bind=db_engine,
+ autoflush=True,
+ autocommit=False,
+ expire_on_commit=True
+))
+
+
+def close_sessions():
+ # close universal db session
+ try:
+ db.session.rollback()
+ db.session.close()
+ except Exception:
+ pass
+
+ # close app db session
+ try:
+ session = g.get(FlaskGKey.APP_DB_SESSION)
+ if session:
+ session.rollback()
+ session.close()
+ except Exception:
+ pass
+
+ # Flask g
+ try:
+ g.pop(FlaskGKey.APP_DB_SESSION)
+ except Exception:
+ pass
+
+
+# ##########################################################
+
+
+def create_app(object_name=None):
+ """Create and configure an instance of the Flask application."""
+
+ from .api import create_module as api_create_module
+ from .tile_interface import create_module as tile_interface_create_module
+ from .setting_module import create_module as setting_create_module
+ from .trace_data import create_module as trace_data_create_module
+ from .analyze import create_module as analyze_create_module
+ from .table_viewer import create_module as table_viewer_create_module
+ from .scatter_plot import create_module as scatter_plot_create_module
+ from .heatmap import create_module as heatmap_create_module
+ from .categorical_plot import create_module as categorical_create_module
+ from .ridgeline_plot import create_module as ridgeline_create_module
+ from .parallel_plot import create_module as parallel_create_module
+ from .sankey_plot import create_module as sankey_create_module
+ from .co_occurrence import create_module as co_occurrence_create_module
+ from .multiple_scatter_plot import create_module as multiple_scatter_create_module
+ from .common.logger import bind_user_info
+ from flask import request
+
+ app = Flask(__name__)
+ app.config.from_object(object_name)
+
+ app.config.update(
+ SCHEDULER_JOBSTORES={
+ 'default': SQLAlchemyJobStore(
+ url=app.config['SQLALCHEMY_DATABASE_APP_URI'])
+ },
+ )
+ # table partition number
+ dic_config[PARTITION_NUMBER] = app.config[PARTITION_NUMBER]
+
+ # db directory
+ dic_config[SQLITE_CONFIG_DIR] = app.config[SQLITE_CONFIG_DIR]
+
+ # db files
+ dic_config[APP_DB_FILE] = app.config[APP_DB_FILE]
+ dic_config[UNIVERSAL_DB_FILE] = app.config[UNIVERSAL_DB_FILE]
+
+ # testing param
+ dic_config[TESTING] = app.config.get(TESTING, None)
+
+ # check and create instance folder before run db init
+ if not check_exist(dic_config[SQLITE_CONFIG_DIR]):
+ make_dir(dic_config[SQLITE_CONFIG_DIR])
+
+ should_reset_import_history = False
+ if not check_exist(app.config['UNIVERSAL_DB_FILE']):
+ should_reset_import_history = True
+
+ db.init_app(app)
+ migrate.init_app(app, db)
+ ma.init_app(app)
+ init_engine(app, app.config['SQLALCHEMY_DATABASE_APP_URI'])
+
+ # reset import history when no universal db
+ if should_reset_import_history:
+ from histview2.script.hot_fix.fix_db_issues import reset_import_history
+ reset_import_history(app)
+
+ # yaml files path
+ yaml_config_dir = app.config.get('YAML_CONFIG_DIR')
+ dic_yaml_config_file['basic'] = os.path.join(yaml_config_dir, app.config['YAML_CONFIG_BASIC'])
+ dic_yaml_config_file['db'] = os.path.join(yaml_config_dir, app.config['YAML_CONFIG_DB'])
+ dic_yaml_config_file['proc'] = os.path.join(yaml_config_dir, app.config['YAML_CONFIG_PROC'])
+ dic_yaml_config_file['histview2'] = os.path.join(yaml_config_dir, app.config['YAML_CONFIG_HISTVIEW2'])
+ dic_yaml_config_file['ti_dn7'] = os.path.join(yaml_config_dir, app.config['YAML_TILE_INTERFACE_DN7'])
+ dic_yaml_config_file['ti_analysis_platform'] = os.path.join(yaml_config_dir, app.config['YAML_TILE_INTERFACE_AP'])
+
+ # db secret key
+ dic_config['DB_SECRET_KEY'] = app.config['DB_SECRET_KEY']
+
+ # sqlalchemy echo flag
+ app.config['SQLALCHEMY_ECHO'] = app.config.get('DEBUG')
+
+ babel = Babel(app)
+ Compress(app)
+
+ api_create_module(app)
+ scatter_plot_create_module(app)
+ heatmap_create_module(app)
+ setting_create_module(app)
+ trace_data_create_module(app)
+ analyze_create_module(app)
+ table_viewer_create_module(app)
+ categorical_create_module(app)
+ ridgeline_create_module(app)
+ parallel_create_module(app)
+ sankey_create_module(app)
+ co_occurrence_create_module(app)
+ multiple_scatter_create_module(app)
+ tile_interface_create_module(app)
+ app.add_url_rule('/', endpoint='tile_interface.tile_interface')
+
+ from histview2.common.yaml_utils import BasicConfigYaml
+ basic_config_yaml = BasicConfigYaml()
+ basic_config = basic_config_yaml.dic_config
+ hide_setting_page = BasicConfigYaml.get_node(basic_config, ['info', 'hide-setting-page'], False)
+ lang = BasicConfigYaml.get_node(basic_config, ['info', 'language'], False)
+ lang = find_babel_locale(lang)
+ lang = lang or app.config["BABEL_DEFAULT_LOCALE"]
+
+ @babel.localeselector
+ def get_locale():
+ return request.cookies.get('locale') or lang
+
+ # get app version
+ version_file = app.config.get('VERSION_FILE_PATH') or os.path.join(os.getcwd(), 'VERSION')
+ with open(version_file) as f:
+ rows = f.readlines()
+ app_ver = rows[0]
+ if '%%VERSION%%' in app_ver:
+ app_ver = 'v00.00.000.00000000'
+
+ yaml_ver = rows[1] if len(rows) > 1 else '0'
+ dic_yaml_config_file['version'] = yaml_ver
+
+ app_location = rows[2] if len(rows) > 2 else 'DN'
+ app_location = str(app_location).strip('\n')
+ app_location = app_location if app_location != '' else 'DN'
+
+ # start scheduler (Notice: start scheduler at the end , because it may run job before above setting info was set)
+ if scheduler.state != STATE_STOPPED:
+ scheduler.shutdown(wait=False)
+
+ scheduler.init_app(app)
+ scheduler.start()
+
+ # Shut down the scheduler when exiting the app
+ atexit.register(lambda: scheduler.shutdown() if scheduler.state !=
+ STATE_STOPPED else print('Scheduler is already shutdown'))
+
+ @app.before_request
+ def before_request_callback():
+ g.request_start_time = time.time()
+ # get the last time user request
+ global dic_request_info
+
+ resource_type = request.base_url or ''
+ is_ignore_content = any(resource_type.endswith(extension) for extension in LOG_IGNORE_CONTENTS)
+ if not is_ignore_content:
+ dic_request_info['last_request_time'] = datetime.utcnow()
+ req_logger = bind_user_info(request)
+ req_logger.info("REQUEST ")
+ browser_info = request.user_agent.browser or 'chrome'
+ print("user's browser:", browser_info)
+ browser_info = str(browser_info).lower()
+ is_good_browser = any(name in browser_info in browser_info for name in ('chrome', 'edge'))
+ # if request.headers.environ.get('HTTP_SEC_CH_UA') and 'chrome' not in request.user_agent.browser.lower():
+ if not dic_config.get(TESTING) and not is_good_browser:
+ return render_template('none.html', **{
+ "title": "お使いのブラウザーはサポートされていません。",
+ "message": "現在のバージョンはChromeブラウザのみをサポートしています!",
+ "action": "Chrome を今すぐダウンロード: ",
+ "url": "https://www.google.com/chrome/"
+ })
+
+ @app.after_request
+ def after_request_callback(response: Response):
+ if 'event-stream' in str(request.accept_mimetypes):
+ return response
+
+ # In case of text/html request, add information of disk capacity to show up on UI.
+ if 'text/html' in str(request.accept_mimetypes) or 'text/html' in str(response.headers):
+ from histview2.common.disk_usage import get_disk_capacity_to_load_UI, add_disk_capacity_into_response
+ dict_capacity = get_disk_capacity_to_load_UI()
+ add_disk_capacity_into_response(response, dict_capacity)
+ if not request.cookies.get('locale'):
+ response.set_cookie('locale', lang)
+
+ # close app db session
+ close_sessions()
+
+ response.cache_control.public = True
+
+ # better performance
+ if not request.content_type:
+ response.cache_control.max_age = 60 * 5
+ response.cache_control.must_revalidate = True
+
+ # check everytime (acceptable performance)
+ # response.cache_control.no_cache = True
+
+ response.add_etag()
+ response.make_conditional(request)
+ if response.status_code == 304:
+ return response
+
+ resource_type = request.base_url or ''
+ is_ignore_content = any(resource_type.endswith(extension) for extension in LOG_IGNORE_CONTENTS)
+ if not is_ignore_content:
+ res_logger = bind_user_info(request, response)
+ res_logger.info("RESPONSE")
+ response.set_cookie('hide_setting_page', str(hide_setting_page))
+ response.set_cookie('app_version', str(app_ver).strip('\n'))
+ response.set_cookie('app_location', str(app_location).strip('\n'))
+
+ return response
+
+ @app.errorhandler(404)
+ def page_not_found(e):
+ # note that we set the 404 status explicitly
+ return render_template('404.html'), 404
+
+ @app.errorhandler(500)
+ def internal_server_error(e):
+ # close app db session
+ close_sessions()
+ logger.exception(e)
+
+ response = json.dumps({
+ "code": e.code,
+ "message": str(e),
+ "dataset_id": get_log_attr(TraceErrKey.DATASET)
+ })
+ return Response(response=response, status=500)
+ # return render_template('500.html'), 500
+
+ # @app.errorhandler(Exception)
+ # def unhandled_exception(e):
+ # # close app db session
+ # close_sessions()
+ # logger.exception(e)
+ #
+ # response = json.dumps({
+ # "code": e.status_code,
+ # "message": e.message,
+ # "dataset_id": get_log_attr(TraceErrKey.DATASET)
+ # })
+ # return Response(response=response)
+
+ @app.errorhandler(requestTimeOutAPI)
+ def request_timeout_api_error(e):
+ """Return JSON instead of HTML for HTTP errors."""
+ # close app db session
+ close_sessions()
+
+ # logger.error(e)
+
+ # start with the correct headers and status code from the error
+ # replace the body with JSON
+ response = json.dumps({
+ "code": e.status_code,
+ "message": e.message,
+ })
+ return Response(response=response, status=408)
+
+ @app.teardown_appcontext
+ def shutdown_session(exception=None):
+ # close app db session
+ close_sessions()
+ Session.remove()
+
+ return app
+
+
+@log_execution()
+def init_db(app):
+ """
+ init db with some parameter
+ :return:
+ """
+ from histview2.common.common_utils import sql_regexp, set_sqlite_params
+ from sqlalchemy import event
+
+ db.create_all(app=app)
+ # Universal DB init
+ # if not universal_db_exists():
+
+ universal_engine = db.get_engine(app)
+
+ @event.listens_for(universal_engine, 'connect')
+ def do_connect(dbapi_conn, connection_record):
+ set_sqlite_params(dbapi_conn)
+
+ @event.listens_for(universal_engine, "begin")
+ def do_begin(dbapi_conn):
+ dbapi_conn.connection.create_function('REGEXP', 2, sql_regexp)
diff --git a/histview2/analyze/__init__.py b/histview2/analyze/__init__.py
new file mode 100644
index 0000000..0b6aa06
--- /dev/null
+++ b/histview2/analyze/__init__.py
@@ -0,0 +1,4 @@
+
+def create_module(app, **kwargs):
+ from .controllers import analyze_blueprint
+ app.register_blueprint(analyze_blueprint)
diff --git a/histview2/analyze/controllers.py b/histview2/analyze/controllers.py
new file mode 100644
index 0000000..02c0068
--- /dev/null
+++ b/histview2/analyze/controllers.py
@@ -0,0 +1,27 @@
+from flask import Blueprint, render_template
+from flask_babel import gettext as _
+
+from histview2.common.services.form_env import get_common_config_data
+from histview2.common.yaml_utils import *
+
+analyze_blueprint = Blueprint(
+ 'analyze',
+ __name__,
+ template_folder=os.path.join('..', 'templates', 'analyze'),
+ static_folder=os.path.join('..', 'static', 'analyze'),
+ url_prefix='/histview2/analyze'
+)
+
+local_params = {
+ "config_yaml_fname_proc": dic_yaml_config_file[YAML_CONFIG_PROC],
+ "config_yaml_fname_histview2": dic_yaml_config_file[YAML_CONFIG_HISTVIEW2],
+ "config_yaml_fname_db": dic_yaml_config_file[YAML_CONFIG_DB]
+}
+
+
+@analyze_blueprint.route('/anomaly_detection/pca')
+def pca():
+ output_dict = get_common_config_data()
+ output_dict['sensor_list'] = []
+ output_dict['page_title'] = _('Principle Component Analysis')
+ return render_template("hotelling_tsquare.html", **output_dict)
diff --git a/histview2/analyze/services/__init__.py b/histview2/analyze/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/histview2/analyze/services/sensor_list.py b/histview2/analyze/services/sensor_list.py
new file mode 100644
index 0000000..f7adea1
--- /dev/null
+++ b/histview2/analyze/services/sensor_list.py
@@ -0,0 +1,180 @@
+from functools import lru_cache
+
+from histview2.common.constants import *
+from histview2.common.logger import log_execution_time
+from histview2.common.services.sse import background_announcer, AnnounceEvent
+from histview2.common.sigificant_digit import signify_digit_pca_vector
+from histview2.setting_module.models import CfgProcess
+from histview2.trace_data.models import Sensor, find_sensor_class
+
+NUM_SENSOR = 11
+
+
+def get_checked_cols():
+ """get all checked columns
+
+ Yields:
+ [type] -- [description]
+ """
+ procs: [CfgProcess] = CfgProcess.get_all_order_by_id()
+ for proc in procs:
+ checked_cols = proc.columns or []
+ for col in checked_cols:
+ yield dict(proc_id=proc.id, proc_name=proc.name, col_id=col.id,
+ col_name=col.column_name, col_type=col.data_type)
+
+
+def filter_data(data, filter_func):
+ """filter data func
+
+ Arguments:
+ data {[type]} -- [description]
+ filter_func {[type]} -- [description]
+
+ Yields:
+ [type] -- [description]
+ """
+ for row in data:
+ if filter_func(row):
+ yield row
+
+
+def filter_data_type(dic_row):
+ """filter only real and integer columns
+
+ Arguments:
+ dic_row {[type]} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+ if not dic_row:
+ return False
+
+ data_type = dic_row.get('col_type', None)
+
+ if not data_type:
+ return False
+
+ return data_type in (DataType.INTEGER.name, DataType.REAL.name)
+
+
+def produce_sample_value_str(sensor_vals=[], effective_length=29, max_length=32):
+ """
+ Produce list of sample values of sensor for PCA page
+ :param sensor_vals:
+ :param effective_length:
+ :param max_length:
+ :return:
+ """
+ sensor_vals = list(map(lambda x: str(x), sensor_vals))
+ sensor_vals_str = ", ".join(sensor_vals)
+ len_vals = len(sensor_vals_str)
+ if len_vals > effective_length:
+ sensor_vals_str = sensor_vals_str[0:effective_length]
+ sensor_vals_str = sensor_vals_str.ljust(max_length, ".")
+ else:
+ return sensor_vals_str
+ return sensor_vals_str
+
+
+def produce_tool_tip_data(col_name='', lst_sensor_vals=[], num_head_tail=10):
+ tooltip = [{'pos': '{col}:'.format(col=col_name), 'val': ''}]
+ head = [{'pos': idx + 1, 'val': sample} for idx, sample in enumerate(lst_sensor_vals[0:num_head_tail])]
+ tooltip.extend(head)
+
+ mid = [{'pos': '.', 'val': '.'}, {'pos': '.', 'val': '.'}, {'pos': '.', 'val': '.'}]
+ tooltip.extend(mid)
+
+ num_sample = len(lst_sensor_vals)
+ if num_sample > num_head_tail + 3:
+ tail_sensors = lst_sensor_vals[num_head_tail + 3:][-num_head_tail:]
+ len_tail = len(tail_sensors)
+
+ tail = [{'pos': num_sample + idx - len_tail + 1, 'val': sample}
+ for idx, sample in enumerate(tail_sensors)]
+ tooltip.extend(tail)
+
+ return tooltip
+
+
+@log_execution_time()
+def get_sample_data(columns, limit=None):
+ """get sample data from database
+
+ Arguments:
+ data {[type]} -- [description]
+
+ Keyword Arguments:
+ limit {[type]} -- [description] (default: {None})
+ """
+ samples = []
+ count = 1
+ for col in columns:
+ proc_id = col.get('proc_id')
+ cfg_col_id = col.get('col_id')
+ cfg_col_name = col.get('col_name')
+ sensor = Sensor.get_sensor_by_col_name(proc_id, cfg_col_name)
+ if not sensor:
+ continue
+
+ sensor_id = sensor.id
+ sensor_type = sensor.type
+ sensor_vals = get_sensor_first_records(cfg_col_id, cfg_col_name, sensor_id, sensor_type, limit)
+
+ signified_sensor_vals = signify_digit_pca_vector(sensor_vals, sig_dig=4)
+ sensor_vals_str = produce_sample_value_str(signified_sensor_vals[0:11])
+ col['sample'] = sensor_vals_str
+
+ # produce tooltip
+ col['tooltip'] = produce_tool_tip_data(col_name=cfg_col_name, lst_sensor_vals=signified_sensor_vals)
+ samples.append(col)
+ if count % 60 == 0:
+ background_announcer.announce(samples, AnnounceEvent.PCA_SENSOR.name)
+ samples = []
+
+ if count > 3000:
+ break
+ count += 1
+
+ if samples:
+ background_announcer.announce(samples, AnnounceEvent.PCA_SENSOR.name)
+
+
+@lru_cache(2000)
+def get_sensor_first_records(cfg_col_id, cfg_col_name, sensor_id, sensor_type, limit=100):
+ data_type = DataType(sensor_type)
+ sensor_val_cls = find_sensor_class(sensor_id, data_type)
+ sensor_val = sensor_val_cls.coef(cfg_col_id)
+ sensor_vals = sensor_val_cls.get_first_records(cfg_col_name, limit=limit, coef_col=sensor_val)
+ sensor_vals = [sensor_val[0] for sensor_val in sensor_vals]
+
+ return sensor_vals
+
+
+# def get_sensors(num_sensor=NUM_SENSOR):
+# """get sensors with filtered
+#
+# Returns:
+# [type] -- [description]
+# """
+# data = get_checked_cols()
+# data = filter_data(data, filter_data_type)
+# data = get_sample_data(data, limit=100)
+# samples = []
+# try:
+# [samples.append(next(data)) for i in range(num_sensor)]
+# except StopIteration:
+# pass
+# return samples
+
+
+def get_sensors_incrementally():
+ """get sensors with filtered
+
+ Returns:
+ [type] -- [description]
+ """
+ data = get_checked_cols()
+ columns = filter_data(data, filter_data_type)
+ get_sample_data(columns, limit=100)
diff --git a/histview2/analyze/services/utils.py b/histview2/analyze/services/utils.py
new file mode 100644
index 0000000..ae4627d
--- /dev/null
+++ b/histview2/analyze/services/utils.py
@@ -0,0 +1,71 @@
+import json
+import math
+
+import numpy as np
+import pandas as pd
+from scipy.stats import multivariate_normal as mn
+
+
+def get_valid_procs(procs):
+ """
+ Get valid process to show on selectbox 起点
+ Arguments:
+ procs {dict}
+
+ Returns:
+ dict -- valid process on 起点
+ """
+ proc_list = {}
+ filter_info = procs['filter_info']
+ proc_master = procs['proc_master']
+
+ for key, value in filter_info.items():
+ if len(filter_info[key]) > 0:
+ filter_time = False
+ for item in filter_info[key]:
+ if item.get('item_info', {}) \
+ and item['item_info'].get('type') \
+ and item['item_info']['type'] == 'datehour-range':
+ filter_time = True
+ if filter_time:
+ proc_list.update({key: proc_master[key]})
+
+ return proc_list
+
+
+def get_multivariate_normal(num_samples=500):
+ cov = [[1, 0], [0, 1]] # Covariance
+ mean = [0, 0]
+ dt = np.zeros([num_samples])
+ dt = mn.rvs(mean=mean, cov=cov, size=num_samples, random_state=35)
+ radius_1 = 1
+ radius_2 = 2
+ radius_3 = 3
+ radius_4 = 3.73
+
+ # df = pd.DataFrame(dt, columns=["x", "y"])
+ # get line from normal distribution of x/y
+ # sns.distplot(df['x'], fit=norm, kde=False).get_lines()[0].get_data()
+ # get histogram distribution bar from x/y
+ # [h.get_height() for h in sns.distplot(df['x'], fit=norm, kde=False).patches]
+ return dt
+
+
+def generateCircum(r, n=720):
+ pi = math.pi
+ dt = []
+ for x in range(0, n + 1):
+ dt.append({'x': math.cos(2 * pi / n * x) * r, 'y': math.sin(2 * pi / n * x) * r})
+
+ return dt
+
+
+class JEncoder(json.JSONEncoder):
+ def default(self, obj):
+ if isinstance(obj, np.ndarray):
+ return obj.tolist()
+ if isinstance(obj, pd.DataFrame):
+ return obj.to_dict('list')
+ if isinstance(obj, pd.Series):
+ return obj.tolist()
+ return json.JSONEncoder.default(self, obj)
diff --git a/histview2/api/__init__.py b/histview2/api/__init__.py
new file mode 100644
index 0000000..0259172
--- /dev/null
+++ b/histview2/api/__init__.py
@@ -0,0 +1,25 @@
+def create_module(app, **kwargs):
+ from .setting_module.controllers import api_setting_module_blueprint
+ from .trace_data.controllers import api_trace_data_blueprint
+ from .table_viewer.controllers import api_table_viewer_blueprint
+ from .scatter_plot.controllers import api_scatter_blueprint
+ from .multi_scatter_plot.controllers import api_multi_scatter_blueprint
+ from .sankey_plot.controllers import api_sankey_plot_blueprint
+ from .co_occurrence.controllers import api_co_occurrence_blueprint
+ from .categorical_plot.controllers import api_categorical_plot_blueprint
+ from .analyze.controllers import api_analyze_module_blueprint
+ from .ridgeline_plot.controllers import api_ridgeline_plot_blueprint
+ from .heatmap.controllers import api_heatmap_blueprint
+ from .parallel_plot.controllers import api_paracords_blueprint
+ app.register_blueprint(api_setting_module_blueprint)
+ app.register_blueprint(api_trace_data_blueprint)
+ app.register_blueprint(api_table_viewer_blueprint)
+ app.register_blueprint(api_scatter_blueprint)
+ app.register_blueprint(api_multi_scatter_blueprint)
+ app.register_blueprint(api_sankey_plot_blueprint)
+ app.register_blueprint(api_co_occurrence_blueprint)
+ app.register_blueprint(api_categorical_plot_blueprint)
+ app.register_blueprint(api_analyze_module_blueprint)
+ app.register_blueprint(api_ridgeline_plot_blueprint)
+ app.register_blueprint(api_heatmap_blueprint)
+ app.register_blueprint(api_paracords_blueprint)
diff --git a/histview2/api/analyze/__init__.py b/histview2/api/analyze/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/histview2/api/analyze/controllers.py b/histview2/api/analyze/controllers.py
new file mode 100644
index 0000000..c33b9d2
--- /dev/null
+++ b/histview2/api/analyze/controllers.py
@@ -0,0 +1,90 @@
+import timeit
+
+import simplejson
+from flask import Blueprint, request, jsonify
+
+from histview2.analyze.services.sensor_list import get_sensors_incrementally
+from histview2.api.analyze.services.pca import run_pca, calculate_data_size
+from histview2.common.constants import *
+from histview2.common.services.form_env import parse_multi_filter_into_one
+from histview2.common.services.http_content import json_serial
+from histview2.common.services.import_export_config_n_data import get_dic_form_from_debug_info, \
+ set_export_dataset_id_to_dic_param
+from histview2.common.trace_data_log import is_send_google_analytics, save_input_data_to_file, EventType
+
+api_analyze_module_blueprint = Blueprint(
+ 'api_analyze_module',
+ __name__,
+ url_prefix='/histview2/api/analyze'
+)
+
+
+@api_analyze_module_blueprint.route('/pca', methods=['POST'])
+def pca_modelling():
+ start = timeit.default_timer()
+ dic_form = request.form.to_dict(flat=False)
+
+ if not dic_form.get(START_PROC, None):
+ if dic_form.get('end_proc1'):
+ dic_form[START_PROC] = dic_form.get('end_proc1')
+ else:
+ return
+
+ sample_no = dic_form.get('sampleNo')
+ if sample_no:
+ sample_no = int(sample_no[0]) - 1
+ else:
+ sample_no = 0
+
+ # save dic_form to pickle (for future debug)
+ save_input_data_to_file(dic_form, EventType.PCA)
+
+ dic_param = parse_multi_filter_into_one(dic_form)
+
+ # check if we run debug mode (import mode)
+ dic_param = get_dic_form_from_debug_info(dic_param)
+
+ # run PCA script
+ orig_send_ga_flg = is_send_google_analytics
+ dic_data, errors = run_pca(dic_param, sample_no)
+
+ if errors:
+ output = simplejson.dumps(dict(json_errors=errors), ensure_ascii=False, default=json_serial)
+ return jsonify(output), 400
+
+ plotly_jsons = dic_data[PLOTLY_JSON]
+ data_point_info = dic_data[DATAPOINT_INFO]
+ output_dict = plotly_jsons
+
+ output_dict.update({
+ DATAPOINT_INFO: data_point_info,
+ SHORT_NAMES: dic_data.get(SHORT_NAMES),
+ IS_RES_LIMITED_TRAIN: dic_data.get(IS_RES_LIMITED_TRAIN),
+ IS_RES_LIMITED_TEST: dic_data.get(IS_RES_LIMITED_TEST),
+ ACTUAL_RECORD_NUMBER_TRAIN: dic_data.get(ACTUAL_RECORD_NUMBER_TRAIN),
+ ACTUAL_RECORD_NUMBER_TEST: dic_data.get(ACTUAL_RECORD_NUMBER_TEST),
+ REMOVED_OUTLIER_NAN_TRAIN: dic_data.get(REMOVED_OUTLIER_NAN_TRAIN),
+ REMOVED_OUTLIER_NAN_TEST: dic_data.get(REMOVED_OUTLIER_NAN_TEST),
+ })
+
+ # send google analytics changed flag
+ if orig_send_ga_flg and not is_send_google_analytics:
+ output_dict.update({'is_send_ga_off': True})
+
+ calculate_data_size(output_dict)
+
+ stop = timeit.default_timer()
+ output_dict['backend_time'] = stop - start
+
+ # export mode ( output for export mode )
+ set_export_dataset_id_to_dic_param(dic_param)
+
+ output_dict = simplejson.dumps(output_dict, ensure_ascii=False, default=json_serial, ignore_nan=True)
+ return output_dict, 200
+
+
+@api_analyze_module_blueprint.route('/sensor', methods=['GET'])
+def pca():
+ get_sensors_incrementally()
+ output_dict = simplejson.dumps({}, ensure_ascii=False, default=json_serial, ignore_nan=True)
+ return output_dict, 200
diff --git a/histview2/api/analyze/services/__init__.py b/histview2/api/analyze/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/histview2/api/analyze/services/pca.py b/histview2/api/analyze/services/pca.py
new file mode 100644
index 0000000..f386c68
--- /dev/null
+++ b/histview2/api/analyze/services/pca.py
@@ -0,0 +1,693 @@
+from typing import List
+
+import numpy as np
+import pandas as pd
+from pandas import DataFrame
+from sklearn.preprocessing import StandardScaler
+
+from histview2.api.trace_data.services.csv_export import to_csv
+from histview2.api.trace_data.services.time_series_chart import get_data_from_db, get_procs_in_dic_param
+from histview2.common.common_utils import gen_sql_label, gen_abbr_name, zero_variance
+from histview2.common.constants import *
+from histview2.common.logger import log_execution_time
+from histview2.common.memoize import memoize
+from histview2.common.pysize import get_size
+from histview2.common.services.form_env import bind_dic_param_to_class
+from histview2.common.services.sse import notify_progress
+from histview2.common.trace_data_log import set_log_attr, TraceErrKey, save_trace_log_db, trace_log, \
+ EventAction, EventType, Target
+from histview2.setting_module.models import CfgProcess
+# ------------------------------------START TRACING DATA TO SHOW ON GRAPH-----------------------------
+from histview2.trace_data.schemas import DicParam
+
+
+@log_execution_time('[PCA]')
+@notify_progress(75)
+def run_pca(dic_param, sample_no=0):
+ """run pca package to get graph jsons"""
+
+ dic_output, dic_biplot, dic_t2q_lrn, dic_t2q_tst, errors = gen_base_object(dic_param)
+ if errors:
+ return None, errors
+
+ plotly_jsons, errors = get_sample_no_data(dic_biplot, dic_t2q_lrn, dic_t2q_tst, sample_no)
+ if errors:
+ return None, errors
+
+ # get sample no info
+ graph_param, dic_proc_cfgs, dic_serials, dic_get_date = pca_bind_dic_param_to_class(dic_param)
+ data_point_info = get_data_point_info(sample_no, dic_output.get('df'), graph_param, dic_proc_cfgs, dic_serials,
+ dic_get_date)
+ return {PLOTLY_JSON: plotly_jsons, DATAPOINT_INFO: data_point_info,
+ IS_RES_LIMITED_TRAIN: dic_output.get(IS_RES_LIMITED_TRAIN),
+ IS_RES_LIMITED_TEST: dic_output.get(IS_RES_LIMITED_TEST),
+ ACTUAL_RECORD_NUMBER_TRAIN: dic_output.get(ACTUAL_RECORD_NUMBER_TRAIN),
+ ACTUAL_RECORD_NUMBER_TEST: dic_output.get(ACTUAL_RECORD_NUMBER_TEST),
+ REMOVED_OUTLIER_NAN_TRAIN: dic_output.get(REMOVED_OUTLIER_NAN_TRAIN),
+ REMOVED_OUTLIER_NAN_TEST: dic_output.get(REMOVED_OUTLIER_NAN_TEST),
+ SHORT_NAMES: dic_output.get(SHORT_NAMES)
+ }, None
+
+
+@log_execution_time()
+@memoize(is_save_file=True)
+def gen_base_object(dic_param):
+ dic_biplot, dic_t2q_lrn, dic_t2q_tst, errors = None, None, None, None
+ dic_output, dict_data, dict_train_data, errors = get_test_n_train_data(dic_param)
+ if not errors:
+ # call pca function
+ dic_sensor_headers = dict_train_data[DIC_SENSOR_HEADER]
+ x_train = dict_train_data['df'][dic_sensor_headers].rename(columns=dic_sensor_headers)
+ x_test = dict_data['df'][dic_sensor_headers].rename(columns=dic_sensor_headers)
+ var_names = x_train.columns.values
+ dic_biplot, dic_t2q_lrn, dic_t2q_tst = run_pca_and_calc_t2q(x_train, x_test, var_names)
+
+ return dic_output, dic_biplot, dic_t2q_lrn, dic_t2q_tst, errors
+
+
+@log_execution_time()
+def get_test_n_train_data(dic_param):
+ dic_output, dict_data, dict_train_data, errors = None, None, None, None
+ # is remove outlier
+ is_remove_outlier = int(dic_param[COMMON][IS_REMOVE_OUTLIER])
+
+ # bind dic_param
+ orig_graph_param = bind_dic_param_to_class(dic_param)
+ graph_param, dic_proc_cfgs, dic_serials, dic_get_date = pca_bind_dic_param_to_class(dic_param)
+ train_graph_param, *_ = pca_bind_dic_param_to_class(dic_param, dic_proc_cfgs, is_train_data=True)
+
+ dict_train_data = gen_trace_data(dic_proc_cfgs, train_graph_param, orig_graph_param,
+ training_data=True, is_remove_outlier=is_remove_outlier)
+
+ errors = dict_train_data.get('errors')
+ if errors:
+ return dic_output, dict_data, dict_train_data, errors
+
+ dict_data = gen_trace_data(dic_proc_cfgs, graph_param, orig_graph_param)
+
+ errors = dict_data.get('errors')
+ if errors:
+ return dic_output, dict_data, dict_train_data, errors
+
+ # count removed outlier, nan
+ dic_output = {IS_RES_LIMITED_TRAIN: dict_train_data.get(IS_RES_LIMITED),
+ IS_RES_LIMITED_TEST: dict_data.get(IS_RES_LIMITED),
+ REMOVED_OUTLIER_NAN_TRAIN: int(dict_train_data[ACTUAL_RECORD_NUMBER]) - len(dict_train_data['df']),
+ ACTUAL_RECORD_NUMBER_TEST: dict_data.get(ACTUAL_RECORD_NUMBER),
+ REMOVED_OUTLIER_NAN_TEST: int(dict_data[ACTUAL_RECORD_NUMBER]) - len(dict_data['df']),
+ 'df': dict_data['df'],
+ SHORT_NAMES: dict_data[SHORT_NAMES]}
+
+ return dic_output, dict_data, dict_train_data, errors
+
+
+@log_execution_time()
+def get_sample_no_data(dic_biplot, dic_t2q_lrn, dic_t2q_tst, sample_no=0):
+ """
+ clicked sample no data
+ :param dic_biplot:
+ :param dic_t2q_lrn:
+ :param dic_t2q_tst:
+ :param sample_no:
+ :return:
+ """
+ plotly_jsons = _gen_jsons_for_plotly(dic_biplot, dic_t2q_lrn, dic_t2q_tst, sample_no)
+ # check R script error
+ if plotly_jsons:
+ if isinstance(plotly_jsons, str):
+ errors = [plotly_jsons]
+ else:
+ errors = plotly_jsons.get('err')
+
+ if errors:
+ set_log_attr(TraceErrKey.MSG, str(errors))
+ save_trace_log_db()
+ return None, errors
+ else:
+ return None, ['No output from R']
+
+ return plotly_jsons, None
+
+
+@log_execution_time()
+def gen_trace_data(dic_proc_cfgs, graph_param, orig_graph_param, training_data=False, is_remove_outlier=False):
+ """tracing data to show graph
+ 1 start point x n end point
+ filter by condition points that between start point and end_point
+ """
+
+ # get sensor cols
+ dic_sensor_headers, short_names = gen_sensor_headers(orig_graph_param)
+
+ # get data from database
+ df, actual_record_number, is_res_limited = get_trace_data(dic_proc_cfgs, graph_param)
+
+ if not actual_record_number:
+ return dict(errors=[ErrorMsg.E_ALL_NA.name])
+
+ # sensor headers
+ cols = list(dic_sensor_headers)
+
+ # replace inf -inf to NaN , so we can dropNA later
+ df.loc[:, cols] = df[cols].replace(dict.fromkeys([np.inf, -np.inf, np.nan], np.nan))
+
+ # sensors
+ df_sensors: DataFrame = df[dic_sensor_headers]
+
+ # if training_data and int(dic_param[COMMON][IS_REMOVE_OUTLIER]):
+ if training_data and is_remove_outlier:
+ df_sensors = remove_outlier(df_sensors, threshold=0.05)
+ if df_sensors is None or not df_sensors.size:
+ return dict(errors=[ErrorMsg.E_ALL_NA.name])
+
+ df[cols] = df_sensors[cols].to_numpy()
+
+ # remove NaN row
+ df.dropna(subset=cols, inplace=True)
+
+ # zero variance check
+ if zero_variance(df[df_sensors.columns]):
+ return dict(errors=[ErrorMsg.E_ZERO_VARIANCE.name])
+
+ # if there is no data
+ if not df.size:
+ return dict(errors=[ErrorMsg.E_ALL_NA.name])
+
+ return {'df': df, 'dic_sensor_headers': dic_sensor_headers, IS_RES_LIMITED: is_res_limited,
+ ACTUAL_RECORD_NUMBER: actual_record_number, SHORT_NAMES: short_names}
+
+
+@log_execution_time()
+def remove_outlier(df: pd.DataFrame, threshold=0.05):
+ if df is None:
+ return None
+
+ cols = list(df.columns)
+ total = df.index.size
+ for col in cols:
+ num_nan = df[col].isna().sum()
+ num_numeric = total - num_nan
+ if num_numeric < 20: # when n=19, p1=0, p9=19 -> remove nothing -> skip
+ continue
+ p1 = np.floor(num_numeric * threshold)
+ p9 = num_numeric - p1
+ df['rank_{}'.format(col)] = df[col].replace(dict.fromkeys([np.inf, -np.inf, np.nan], np.nan)).rank(
+ method='first')
+ df[col] = np.where((df['rank_{}'.format(col)] > p9) | (df['rank_{}'.format(col)] < p1), np.nan, df[col])
+
+ return df[cols]
+
+
+@log_execution_time()
+@trace_log((TraceErrKey.TYPE, TraceErrKey.ACTION, TraceErrKey.TARGET),
+ (EventType.PCA, EventAction.READ, Target.DATABASE), send_ga=True)
+def get_trace_data(dic_proc_cfgs, graph_param):
+ """get data from universal db
+
+ Arguments:
+ trace {Trace} -- [DataFrame Trace]
+ dic_param {dictionary} -- parameter form client
+
+ Returns:
+ [type] -- data join from start to end by global_id
+ """
+ # get data from database
+ df, actual_record_number, is_res_limited = get_data_from_db(graph_param)
+
+ return df, actual_record_number, is_res_limited
+
+
+@log_execution_time()
+@trace_log((TraceErrKey.ACTION, TraceErrKey.TARGET),
+ (EventAction.SAVE, Target.TSV),
+ output_key=TraceErrKey.DUMPFILE, send_ga=True)
+def write_csv(dfs, dic_proc_cfgs, graph_param, file_paths, dic_headers):
+ for df, file_path in zip(dfs, file_paths):
+ to_csv(df, dic_proc_cfgs, graph_param, delimiter=CsvDelimiter.TSV.value,
+ output_path=file_path, output_col_ids=dic_headers, len_of_col_name=10)
+
+ return file_paths[0]
+
+
+@log_execution_time()
+def change_path(file_path):
+ return file_path.replace('dat_', 'ret_').replace('tsv', 'pickle')
+
+
+@log_execution_time()
+def get_data_point_info(sample_no, df: DataFrame, graph_param: DicParam, dic_proc_cfgs, dic_serials, dic_get_date):
+ row = df.iloc[sample_no]
+ proc_cnt = -1
+ col_infos = []
+ for proc in graph_param.array_formval:
+ proc_cnt += 1
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ for col_id, col_name, show_name in zip(proc.col_ids, proc.col_names, proc.col_show_names):
+ col_name = gen_sql_label(col_id, col_name)
+ if col_name not in df.columns:
+ continue
+
+ if col_name in dic_serials.values():
+ order = f'{proc_cnt:03}_1'
+ elif col_name in dic_get_date.values():
+ order = f'{proc_cnt:03}_2'
+ else:
+ order = f'{proc_cnt:03}_3'
+
+ col_infos.append((proc_cfg.name, show_name, row[col_name], order))
+
+ return col_infos
+
+
+@log_execution_time()
+def calculate_data_size(output_dict):
+ """
+ Calculate data size for each chart.
+ """
+ dtsize_pca_score_train = get_size(output_dict.get('json_pca_score_train'))
+ output_dict['dtsize_pca_score_train'] = dtsize_pca_score_train
+
+ dtsize_pca_score_test = get_size(output_dict.get('json_pca_score_test'))
+ output_dict['dtsize_pca_score_test'] = dtsize_pca_score_test
+
+ dtsize_t2_time_series = get_size(output_dict.get('json_t2_time_series'))
+ output_dict['dtsize_t2_time_series'] = dtsize_t2_time_series
+
+ dtsize_q_time_series = get_size(output_dict.get('json_q_time_series'))
+ output_dict['dtsize_q_time_series'] = dtsize_q_time_series
+
+ dtsize_t2_contribution = get_size(output_dict.get('json_t2_contribution'))
+ output_dict['dtsize_t2_contribution'] = dtsize_t2_contribution
+
+ dtsize_q_contribution = get_size(output_dict.get('json_q_contribution'))
+ output_dict['dtsize_q_contribution'] = dtsize_q_contribution
+
+ dtsize_pca_biplot = get_size(output_dict.get('json_pca_biplot'))
+ output_dict['dtsize_pca_biplot'] = dtsize_pca_biplot
+
+
+@log_execution_time()
+def pca_bind_dic_param_to_class(dic_param, dic_proc_cfgs: List[CfgProcess] = None, is_train_data=False):
+ # bind dic_param
+ graph_param = bind_dic_param_to_class(dic_param)
+
+ # move start proc to first
+ graph_param.add_start_proc_to_array_formval()
+
+ # add condition procs
+ graph_param.add_cond_procs_to_array_formval()
+
+ if dic_proc_cfgs is None:
+ dic_proc_cfgs = get_procs_in_dic_param(graph_param) # add start proc
+
+ # get serials and get_date
+ dic_serials = {}
+ dic_get_dates = {}
+ for proc in graph_param.array_formval:
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ serials = proc_cfg.get_serials(column_name_only=False)
+ serial_ids = [serial.id for serial in serials]
+ get_date = proc_cfg.get_date_col(column_name_only=False)
+ get_date_id = get_date.id
+ text_cols = [col.id for col in proc_cfg.get_cols_by_data_type(DataType.TEXT, column_name_only=False)]
+ proc.add_cols(text_cols, append_first=True)
+ proc.add_cols(get_date_id, append_first=True)
+ proc.add_cols(serial_ids, append_first=True)
+
+ dic_serials.update({col.id: gen_sql_label(col.id, col.column_name) for col in serials})
+ dic_get_dates[get_date_id] = gen_sql_label(get_date.id, get_date.column_name)
+
+ if is_train_data:
+ time_idx = 0
+ else:
+ time_idx = 1
+
+ graph_param.common.start_date = graph_param.common.start_date[time_idx]
+ graph_param.common.start_time = graph_param.common.start_time[time_idx]
+ graph_param.common.end_date = graph_param.common.end_date[time_idx]
+ graph_param.common.end_time = graph_param.common.end_time[time_idx]
+
+ return graph_param, dic_proc_cfgs, dic_serials, dic_get_dates
+
+
+def gen_sensor_headers(orig_graph_param):
+ dic_labels = {}
+ short_names = {}
+ used_names = set()
+ for proc in orig_graph_param.array_formval:
+ for col_id, col_name in zip(proc.col_ids, proc.col_names):
+ name = gen_sql_label(col_id, col_name)
+ dic_labels[name] = col_name
+
+ # gen short name
+ new_name = gen_abbr_name(col_name)
+ i = 1
+ while new_name in used_names:
+ new_name = f'{new_name[0:-3]}({i})'
+ i += 1
+
+ short_names[name] = new_name
+
+ return dic_labels, short_names
+
+
+# ------------------------------------------------------
+
+# run_pca_and_calc_t2q()
+# - _calc_biplot_data()
+# - _calc_biplot_circle_radius()
+# - _gen_biplot_circles_dataframe()
+# - _calc_biplot_arrows()
+# - _gen_biplot_axislabs()
+#
+# - _calc_mspc_t2q()
+#
+# - _gen_jsons_for_plotly()
+# - _extract_clicked_sample()
+# - _convert_df_circles_to_dict()
+
+
+@log_execution_time()
+def run_pca_and_calc_t2q(X_train, X_test, varnames: list) -> dict:
+ ''' Run PCA and Calculate T2/Q Statistics/Contributions
+
+ X_train and X_test must have same number of columns.
+ Data must all be integer or float, and NA/NaNs must be removed beforehand.
+ Number of rows can not be 0, and columns with constant value is not allowed.
+
+ Inputs
+ ----------
+ X_train: dataframe or 2d ndarray
+ (ntrain x p) pd.DataFrame of train data.
+ X_test: dataframe or 2d ndarray
+ (ntest x p) pd.DataFrame of test data. Must be X_test.shape[1] == X_train.shape[1]
+ varnames: list
+ (p) Column names. Must be len(varnames) == X_train.shape[1]
+ Returns
+ ----------
+ output_dict: dict
+ A dictionary of jsons (dictionaries) to draw Biplot and T2/Q chart with plotly.js
+ json_pca_score_test
+ json_pca_score_train
+ json_t2_time_series
+ json_q_time_series
+ json_t2_contribution
+ json_q_contribution
+ json_pca_biplot
+
+ '''
+ pca = PCA()
+ pca.fit(X_train)
+ pca.x = pca.transform(X_train)
+ pca.newx = pca.transform(X_test)
+ dic_biplot = _calc_biplot_data(pca, varnames)
+
+ threshold = 80
+ num_pc = np.where(pca.cum_explained >= threshold)[0][0] + 1
+ dic_t2q_lrn = _calc_mspc_t2q(pca, X_train, num_pc)
+ dic_t2q_tst = _calc_mspc_t2q(pca, X_test, num_pc)
+
+ # move getting sample no data out of this function to cache base object.
+ # output_dict = _gen_jsons_for_plotly(dic_biplot, dic_t2q_lrn, dic_t2q_tst, clicked_sample_no)
+ # return output_dict
+
+ return dic_biplot, dic_t2q_lrn, dic_t2q_tst
+
+
+class PCA:
+ ''' Principle Component Analysis with sklearn interface
+
+ Note that sklearn's PCA function does not return rotation matrix.
+
+ Attributes
+ ----------
+ sdev: ndarray
+ 1D array containing standard deviation of principle components (calculated by eigen values).
+ rotation: ndarray
+ 2D array of loadings.
+ var_explained: ndarray
+ 1D array of ratio[%] of variance explained in each principle components.
+ cum_explained: ndarray
+ 1D array of ratio[%] of cumulative variance explained.
+ scale: bool
+ If True, scale date to zero mean unit variance.
+ scaler: StandardScaler instance
+ Scaler fitted with data given to fit().
+ '''
+
+ def __init__(self, scale=True):
+ self.sdev = None
+ self.rotation = None
+ self.var_explained = None
+ self.cum_explained = None
+ self.scale = scale
+ self.scaler = None
+ self.x = None
+ self.newx = None
+
+ def fit(self, X):
+ if self.scale:
+ self.scaler = StandardScaler().fit(X)
+ X = self.scaler.transform(X)
+ covmat = np.cov(X.T)
+
+ # note that eig() does not return eigen values in descending order
+ eig_vals, eig_vecs = np.linalg.eig(covmat)
+ idx_desc = np.argsort(eig_vals)[::-1]
+ eig_vals = eig_vals[idx_desc]
+ eig_vecs = eig_vecs[:, idx_desc]
+
+ self.rotation = eig_vecs
+ self.sdev = np.sqrt(eig_vals)
+ self.var_explained = eig_vals / np.sum(eig_vals) * 100
+ self.cum_explained = np.cumsum(self.var_explained)
+
+ def transform(self, X):
+ if self.scale:
+ X = self.scaler.transform(X)
+ return X.dot(self.rotation)
+
+
+# ---------------------------
+# Biplot (PCA)
+# ---------------------------
+
+def _calc_biplot_data(pca: dict, varnames: list, tgt_pc=[1, 2]) -> dict:
+ ''' Generate a set of data for biplot
+ Data for scatter plot, arrows, circles (and axis labels)
+ '''
+ dic_radius = _calc_biplot_circle_radius(pca.x)
+ dic_arrows = _calc_biplot_arrows(rotation=pca.rotation, sdev=pca.sdev, max_train=dic_radius['max'])
+ df_circles = _gen_biplot_circles_dataframe(dic_radius)
+ axislabs = _gen_biplot_axislabs(pca.var_explained)
+ idx_tgt_pc = [x - 1 for x in tgt_pc]
+
+ res = {'pca_obj': pca,
+ 'varnames': varnames,
+ 'arr_biplot_lrn': pca.x[:, idx_tgt_pc],
+ 'arr_biplot_tst': pca.newx[:, idx_tgt_pc],
+ 'dic_arrows': dic_arrows,
+ 'dic_radius': dic_radius,
+ 'df_circles': df_circles,
+ 'axislab': axislabs}
+ return res
+
+
+def _calc_biplot_circle_radius(pca_score_train, prob_manual=85) -> dict:
+ ''' Calculate radius for circles in biplot
+ '''
+ # standardized to unit variance
+ score_sqsums = pca_score_train[:, 0] ** 2 + pca_score_train[:, 1] ** 2
+
+ dic_radius = {'sigma': 1,
+ '2sigma': 2,
+ '3sigma': 3,
+ 'max': np.sqrt(np.max(score_sqsums)),
+ # 'train': np.sqrt(np.percentile(score_sqsums, 99.5)),
+ 'train': np.sqrt(np.max(score_sqsums)),
+ 'percentile': np.sqrt(np.percentile(score_sqsums, prob_manual)),
+ 'prob_manual': str(prob_manual)}
+ return dic_radius
+
+
+def _gen_biplot_circles_dataframe(dic_radius: dict):
+ ''' Generate a dataframe with x,y values to plot circles in biplot
+ '''
+ theta = np.concatenate([np.linspace(-np.pi, np.pi, 50), np.linspace(np.pi, -np.pi, 50)])
+ px = np.cos(theta)
+ py = np.sin(theta)
+ df_1sigma = pd.DataFrame({'pc1.x': dic_radius['sigma'] * px,
+ 'pc2.y': dic_radius['sigma'] * py,
+ 'border': 'Sigma'})
+ df_2sigma = pd.DataFrame({'pc1.x': dic_radius['2sigma'] * px,
+ 'pc2.y': dic_radius['2sigma'] * py,
+ 'border': '2Sigma'})
+ df_3sigma = pd.DataFrame({'pc1.x': dic_radius['3sigma'] * px,
+ 'pc2.y': dic_radius['3sigma'] * py,
+ 'border': '3Sigma'})
+ df_maxval = pd.DataFrame({'pc1.x': dic_radius['max'] * px,
+ 'pc2.y': dic_radius['max'] * py,
+ 'border': 'Outlier'})
+ df_normal = pd.DataFrame({'pc1.x': dic_radius['train'] * px,
+ 'pc2.y': dic_radius['train'] * py,
+ 'border': 'Range'})
+ df_percen = pd.DataFrame({'pc1.x': dic_radius['percentile'] * px,
+ 'pc2.y': dic_radius['percentile'] * py,
+ 'border': 'Percentile' + dic_radius['prob_manual']})
+ df_circles = pd.concat([df_1sigma, df_2sigma, df_3sigma, df_maxval, df_normal, df_percen])
+ return df_circles
+
+
+def _calc_biplot_arrows(rotation, sdev, max_train, var_name_adjust=1.5, tgt_pc=[1, 2]) -> dict:
+ ''' Calculate direction of arrows (loadings) for biplot
+ '''
+ dic_arrows = {'xval': None, 'yval': None, 'angle': None, 'hjust': None, 'varname': None}
+
+ # x,y direction for arrows (length corresponds to eigen values)
+ scaled_eig_vecs = rotation * sdev
+ max_len = np.sqrt(np.max(np.sum(scaled_eig_vecs ** 2, axis=1)))
+
+ idx_tgt_pc = [x - 1 for x in tgt_pc]
+ direc = scaled_eig_vecs[:, idx_tgt_pc] * (max_train / max_len)
+ dic_arrows['xval'] = direc[:, 0].copy()
+ dic_arrows['yval'] = direc[:, 1].copy()
+
+ # angles and hjust for labels
+ angle = (180 / np.pi) * np.arctan(direc[:, 1], direc[:, 0])
+ hjust = (1 - var_name_adjust * np.sign(direc[:, 0])) / 2.0
+ dic_arrows['angle'] = angle
+ dic_arrows['hjust'] = hjust
+ return dic_arrows
+
+
+def _gen_biplot_axislabs(var_explained, tgt_pc=[1, 2]) -> list:
+ ''' Generate axis labels for biplot
+ '''
+ axislabs = ['PC{}({:.1f} [%] explained Var.)'.format(x, var_explained[x - 1]) for x in tgt_pc]
+ return axislabs
+
+
+# ---------------------------
+# PCA-MSPC
+# ---------------------------
+
+def _calc_mspc_t2q(pca, X, num_pc=2) -> dict:
+ ''' PCA-MSPC: Calculate T2/Q statics and contributions
+ '''
+ dic_t2q = dict(stats=None, contr_t2=None, contr_q=None)
+
+ # calculate T2 stats and contributions
+ pc_score = pca.transform(X)
+ sigma = np.std(pc_score, axis=0)
+ t2_stats = np.sum((pc_score[:, :num_pc] ** 2) / sigma[:num_pc] ** 2, axis=1)
+ t2_contr = (pc_score / sigma) @ pca.rotation.T
+
+ # calculate Q stats and contributions
+ xstd = pca.scaler.transform(X)
+ xhat = xstd @ pca.rotation[:, :num_pc] @ pca.rotation[:, :num_pc].T
+ q_contr = (xstd - xhat) ** 2
+ q_stats = np.sum(q_contr, axis=1)
+
+ dic_t2q['dic_stats'] = {'t2': t2_stats, 'q': q_stats}
+ dic_t2q['contr_t2'] = t2_contr
+ dic_t2q['contr_q'] = (q_contr.T / q_stats).T
+ return dic_t2q
+
+
+# ---------------------------
+# Utilities
+# ---------------------------
+
+def _gen_jsons_for_plotly(dic_biplot: dict, dic_t2q_lrn: dict, dic_t2q_tst: dict, sample_no=0) -> dict:
+ """
+ Generate a json to pass to plotly (Biplot and T2/Q Chart)
+ :param dic_biplot:
+ :param dic_t2q_lrn:
+ :param dic_t2q_tst:
+ :param sample_no:
+ :return:
+ sample_no: int
+ An integer specifing sample no of clicked data point.
+ For example, clicked_sample_no=1 is the first data point of X_test.
+ clicked_sample_no=-1 is the last data point of X_train.
+ """
+
+ # Biplots (scatter, circles)
+ dic_circles = _convert_df_circles_to_dict(dic_biplot['df_circles'])
+ json_pca_score_train = {
+ 'scatter': {
+ 'x': dic_biplot['arr_biplot_lrn'][:, 0],
+ 'y': dic_biplot['arr_biplot_lrn'][:, 1],
+ },
+ 'circles': dic_circles,
+ 'axislab': dic_biplot['axislab'],
+ 'r': dic_biplot['dic_radius']
+ }
+
+ json_pca_score_test = {
+ 'scatter': {
+ 'x': dic_biplot['arr_biplot_tst'][:, 0],
+ 'y': dic_biplot['arr_biplot_tst'][:, 1],
+ },
+ 'circles': dic_circles,
+ 'axislab': dic_biplot['axislab'],
+ 'r': dic_biplot['dic_radius']
+ }
+
+ # Biplots (arrows): same graph for train and test
+ score_clicked = _extract_clicked_sample(dic_biplot['arr_biplot_lrn'], dic_biplot['arr_biplot_tst'], sample_no)
+ json_pca_biplot = {
+ 'x': dic_biplot['dic_arrows'].get('xval'),
+ 'y': dic_biplot['dic_arrows'].get('yval'),
+ 'varname': dic_biplot['varnames'],
+ 'angle': dic_biplot['dic_arrows'].get('angle'),
+ 'hjust': dic_biplot['dic_arrows'].get('hjust'),
+ 'r': dic_biplot['dic_radius'],
+ 'clicked_point': {
+ "x": score_clicked[0],
+ "y": score_clicked[1],
+ }
+ }
+
+ # PCA-MSPC
+ json_t2_time_series = {'train': dic_t2q_lrn['dic_stats']['t2'], 'test': dic_t2q_tst['dic_stats']['t2']}
+ json_q_time_series = {'SPE': dic_t2q_lrn['dic_stats']['q'], 'test': dic_t2q_tst['dic_stats']['q']}
+ t2_contr = _extract_clicked_sample(dic_t2q_lrn['contr_t2'], dic_t2q_tst['contr_t2'], sample_no)
+ q_contr = _extract_clicked_sample(dic_t2q_lrn['contr_q'], dic_t2q_tst['contr_q'], sample_no)
+
+ df_t2_contr = pd.DataFrame({'Var': dic_biplot['varnames'], 'Ratio': t2_contr / np.sum(np.abs(t2_contr))})
+ df_q_contr = pd.DataFrame({'Var': dic_biplot['varnames'], 'Ratio': q_contr / np.sum(np.abs(q_contr))})
+ df_t2_contr = df_t2_contr.sort_values('Ratio', ascending=False, key=abs).reset_index()
+ df_q_contr = df_q_contr.sort_values('Ratio', ascending=False, key=abs).reset_index()
+
+ output_dict = {
+ 'json_pca_score_test': json_pca_score_test,
+ 'json_pca_score_train': json_pca_score_train,
+ 'json_t2_time_series': json_t2_time_series,
+ 'json_q_time_series': json_q_time_series,
+ 'json_t2_contribution': df_t2_contr,
+ 'json_q_contribution': df_q_contr,
+ 'json_pca_biplot': json_pca_biplot}
+ return output_dict
+
+
+def _convert_df_circles_to_dict(df_circles) -> dict:
+ ''' Convert dataframe of circles to dictionary, to pass it to plotly
+ '''
+ dic_circles = {}
+ for label in df_circles['border'].unique():
+ dic_circles[label] = {'x': [], 'y': []}
+ idx = np.where(df_circles['border'].values == label)[0]
+ dic_circles[label]['x'] = df_circles['pc1.x'].values[idx]
+ dic_circles[label]['y'] = df_circles['pc2.y'].values[idx]
+ return dic_circles
+
+
+def _extract_clicked_sample(df_train, df_test, sample_no=0):
+ ''' Extract a row from train/test data, according to given sample_no
+ '''
+
+ if sample_no >= 0:
+ return df_test[sample_no, :]
+ else:
+ return df_train[sample_no + df_train.shape[0], :]
diff --git a/histview2/api/categorical_plot/controllers.py b/histview2/api/categorical_plot/controllers.py
new file mode 100644
index 0000000..e5860e9
--- /dev/null
+++ b/histview2/api/categorical_plot/controllers.py
@@ -0,0 +1,84 @@
+import timeit
+
+import simplejson
+from flask import Blueprint, request, send_from_directory
+
+from histview2.api.categorical_plot.services \
+ import gen_trace_data_by_categorical_var, customize_dict_param, \
+ gen_trace_data_by_term, convert_end_cols_to_array, \
+ gen_trace_data_by_cyclic
+from histview2.common.common_utils import resource_path
+from histview2.common.logger import logger
+from histview2.common.services import http_content
+from histview2.common.services.form_env import parse_multi_filter_into_one
+from histview2.common.services.import_export_config_n_data import get_dic_form_from_debug_info, \
+ set_export_dataset_id_to_dic_param
+from histview2.common.trace_data_log import save_input_data_to_file, EventType
+from histview2.common.yaml_utils import *
+
+api_categorical_plot_blueprint = Blueprint(
+ 'api_categorical_plot',
+ __name__,
+ url_prefix='/histview2/api/stp'
+)
+
+# ローカルパラメータの設定
+local_params = {
+ "config_yaml_fname_proc": dic_yaml_config_file[YAML_PROC],
+ "config_yaml_fname_histview2": dic_yaml_config_file[YAML_CONFIG_HISTVIEW2],
+ "config_yaml_fname_db": dic_yaml_config_file[YAML_CONFIG_DB],
+}
+
+MAX_GRAPH_PER_TAB = 32
+
+
+@api_categorical_plot_blueprint.route('/index', methods=['POST'])
+def trace_data():
+ """
+ Trace Data API
+ return dictionary
+ """
+
+ start = timeit.default_timer()
+ dic_form = request.form.to_dict(flat=False)
+ save_input_data_to_file(dic_form, EventType.STP)
+
+ dic_param = parse_multi_filter_into_one(dic_form)
+
+ # check if we run debug mode (import mode)
+ dic_param = get_dic_form_from_debug_info(dic_param)
+
+ customize_dict_param(dic_param)
+
+ proc_name = dic_param.get(COMMON).get(END_PROC)
+ time_conds = dic_param.get(TIME_CONDS)
+ compare_type = dic_param.get(COMMON).get(COMPARE_TYPE)
+
+ if not proc_name or not time_conds:
+ return {}, 200
+
+ if compare_type == CATEGORICAL:
+ convert_end_cols_to_array(dic_param)
+ dic_param = gen_trace_data_by_categorical_var(dic_param, MAX_GRAPH_PER_TAB)
+ elif compare_type == RL_CYCLIC_TERM:
+ dic_param = gen_trace_data_by_cyclic(dic_param, MAX_GRAPH_PER_TAB)
+ else:
+ dic_param = gen_trace_data_by_term(dic_param, MAX_GRAPH_PER_TAB)
+
+ stop = timeit.default_timer()
+ dic_param['backend_time'] = stop - start
+
+ # export mode ( output for export mode )
+ set_export_dataset_id_to_dic_param(dic_param)
+
+ # trace_data.htmlをもとにHTML生成
+ out_dict = simplejson.dumps(dic_param, ensure_ascii=False, default=http_content.json_serial, ignore_nan=True)
+ return out_dict, 200
+
+
+@api_categorical_plot_blueprint.route('/image/')
+def download_file(filename):
+ dir_data_view = resource_path('data', 'view', level=AbsPath.SHOW)
+ logger.info('dir_data_view: ', dir_data_view, '; filename', filename)
+
+ return send_from_directory(dir_data_view, filename)
diff --git a/histview2/api/categorical_plot/services.py b/histview2/api/categorical_plot/services.py
new file mode 100644
index 0000000..dfef764
--- /dev/null
+++ b/histview2/api/categorical_plot/services.py
@@ -0,0 +1,868 @@
+import traceback
+from collections import defaultdict
+from copy import deepcopy
+from datetime import datetime, timedelta
+
+import pandas as pd
+from dateutil import tz
+from pandas import DataFrame
+
+from histview2.api.efa.services.etl import FILE as FILE_ETL_SPRAY_SHAPE
+from histview2.api.efa.services.etl import call_com_view
+from histview2.api.trace_data.services.time_series_chart import (get_data_from_db, get_chart_infos,
+ get_procs_in_dic_param,
+ gen_dic_data_from_df, get_min_max_of_all_chart_infos,
+ get_chart_infos_by_stp_var,
+ get_chart_infos_by_stp_value, build_regex_index,
+ apply_coef_text,
+ main_check_filter_detail_match_graph_data,
+ set_chart_infos_to_plotdata, calc_raw_common_scale_y,
+ calc_scale_info, get_cfg_proc_col_info)
+from histview2.common.common_utils import (start_of_minute, end_of_minute, create_file_path,
+ get_view_path, get_basename, gen_sql_label,
+ make_dir_from_file_path,
+ any_not_none_in_dict, DATE_FORMAT_STR, TIME_FORMAT, DATE_FORMAT,
+ RL_DATETIME_FORMAT, convert_time)
+from histview2.common.constants import *
+from histview2.common.logger import log_execution_time
+from histview2.common.memoize import memoize
+from histview2.common.services.ana_inf_data import calculate_kde_trace_data
+from histview2.common.services.form_env import bind_dic_param_to_class
+from histview2.common.services.sse import notify_progress
+from histview2.common.services.statistics import calc_summaries_cate_var, calc_summaries
+from histview2.common.trace_data_log import EventType, trace_log, TraceErrKey, EventAction, Target
+from histview2.setting_module.models import CfgProcess, CfgDataSource, CfgProcessColumn
+from histview2.trace_data.models import Cycle
+
+
+@log_execution_time()
+def gen_graph_param(dic_param):
+ # bind dic_param
+ graph_param = category_bind_dic_param_to_class(dic_param)
+
+ dic_proc_cfgs = get_procs_in_dic_param(graph_param)
+
+ # TODO: check start proc cols( difference to time series)
+ # add start proc
+ graph_param.add_start_proc_to_array_formval()
+
+ # add category
+ graph_param.add_cate_procs_to_array_formval()
+
+ # add condition procs
+ graph_param.add_cond_procs_to_array_formval()
+
+ # add cat exp
+ graph_param.add_cat_exp_to_array_formval()
+
+ proc_cfg = dic_proc_cfgs[graph_param.common.start_proc]
+
+ non_sensor_cols = []
+ if use_etl_spray_shape(proc_cfg):
+ # get all checked cols
+ non_sensor_cols = [column.id for column in proc_cfg.columns if not column.data_type == DataType.REAL.name]
+
+ # get serials
+ for proc in graph_param.array_formval:
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ proc_sensor_ids = proc.col_sensor_only_ids
+ proc_col_ids = proc.col_ids.copy()
+ serial_ids = [serial.id for serial in proc_cfg.get_serials(column_name_only=False)]
+ proc.add_cols(serial_ids + non_sensor_cols)
+ if len(proc_sensor_ids) != len(proc_col_ids):
+ proc.add_sensor_col_ids(proc_col_ids)
+
+ return graph_param, dic_proc_cfgs
+
+
+@log_execution_time()
+@notify_progress(50)
+@trace_log((TraceErrKey.TYPE, TraceErrKey.ACTION, TraceErrKey.TARGET),
+ (EventType.STP, EventAction.PLOT, Target.GRAPH), send_ga=True)
+@memoize(is_save_file=True)
+def gen_trace_data_by_categorical_var(dic_param, max_graph=None):
+ """tracing data to show graph
+ 1 start point x n end point
+ filter by condition points that between start point and end_point
+ """
+
+ # gen graph_param
+ graph_param, dic_proc_cfgs = gen_graph_param(dic_param)
+
+ # get data from database
+ df, actual_record_number, is_res_limited = get_data_from_db(graph_param)
+
+ # check filter match or not ( for GUI show )
+ matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids = main_check_filter_detail_match_graph_data(
+ graph_param, df)
+
+ # apply coef for text
+ df = apply_coef_text(df, graph_param, dic_proc_cfgs)
+
+ # convert proc to cols dic
+ # transform raw data to graph data
+ # create output data
+ graph_param_with_cate = category_bind_dic_param_to_class(dic_param)
+ graph_param_with_cate.add_cate_procs_to_array_formval()
+ graph_param_with_cate.add_cat_exp_to_array_formval()
+ dic_data = gen_dic_data_from_df(df, graph_param_with_cate)
+ orig_graph_param = category_bind_dic_param_to_class(dic_param)
+ dic_data, is_graph_limited = split_data_by_condition(dic_data, orig_graph_param, max_graph)
+ dic_plots = gen_plotdata_for_var(dic_data)
+ for col_id, plots in dic_plots.items():
+ if max_graph and max_graph < len(plots):
+ is_graph_limited = True
+ dic_plots[col_id] = plots[:max_graph]
+
+ dic_param[ARRAY_PLOTDATA] = dic_plots
+ dic_param[IS_GRAPH_LIMITED] = is_graph_limited
+ dic_param[ACTUAL_RECORD_NUMBER] = actual_record_number
+
+ # flag to show that trace result was limited
+ dic_param[IS_RES_LIMITED] = is_res_limited
+
+ # matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids
+ dic_param[MATCHED_FILTER_IDS] = matched_filter_ids
+ dic_param[UNMATCHED_FILTER_IDS] = unmatched_filter_ids
+ dic_param[NOT_EXACT_MATCH_FILTER_IDS] = not_exact_match_filter_ids
+
+ # get visualization setting
+ add_threshold_configs(dic_param, orig_graph_param)
+
+ # calculating the summaries information
+ calc_summaries_cate_var(dic_param)
+
+ # calc common scale y min max
+ for end_col, plotdatas in dic_param[ARRAY_PLOTDATA].items():
+ min_max_list, all_graph_min, all_graph_max = calc_raw_common_scale_y(plotdatas)
+ calc_scale_info(plotdatas, min_max_list, all_graph_min, all_graph_max)
+
+ # generate kde for each trace output array
+ dic_param = gen_kde_data_cate_var(dic_param)
+
+ remove_array_x_y_cyclic(dic_param)
+
+ # images
+ img_files = dump_img_files(df, graph_param, dic_proc_cfgs)
+ dic_param['images'] = img_files
+
+ return dic_param
+
+
+@log_execution_time()
+def add_threshold_configs(dic_param, orig_graph_param):
+ try:
+ chart_infos_by_cond_procs, chart_infos_org = get_chart_infos(orig_graph_param, no_convert=True)
+ chart_infos_by_stp_var = get_chart_infos_by_stp_var(orig_graph_param)
+ var_col_id = orig_graph_param.get_cate_var_col_id()
+ dic_filter_detail_2_regex = build_regex_index(var_col_id)
+ if chart_infos_by_cond_procs:
+ for end_col, plotdatas in dic_param[ARRAY_PLOTDATA].items():
+ # TODO proc_id, col_id are str vs int
+ chart_info_cond_proc \
+ = chart_infos_by_cond_procs[int(dic_param[COMMON][END_PROC][end_col])].get(int(end_col)) or {}
+ chart_info_cond_proc_org \
+ = chart_infos_org[int(dic_param[COMMON][END_PROC][end_col])].get(int(end_col)) or {}
+ for plotdata in plotdatas:
+ stp_value = plotdata[RL_CATE_NAME]
+ chart_info_stp_value = get_chart_infos_by_stp_value(
+ stp_value,
+ end_col,
+ dic_filter_detail_2_regex,
+ chart_infos_by_stp_var,
+ )
+ # selected_chart_infos = chart_info_stp_value or chart_info_cond_proc # OR for now, may union
+ if any_not_none_in_dict(chart_info_stp_value):
+ selected_chart_infos = chart_info_stp_value
+ chart_info_cond_proc_org = chart_info_stp_value
+ else:
+ selected_chart_infos = chart_info_cond_proc
+
+ y_min, y_max = get_min_max_of_all_chart_infos(selected_chart_infos)
+ plotdata[CHART_INFOS] = selected_chart_infos
+ plotdata[CHART_INFOS_ORG] = chart_info_cond_proc_org
+ plotdata[Y_MIN] = y_min
+ plotdata[Y_MAX] = y_max
+ except Exception:
+ traceback.print_exc()
+
+
+@trace_log((TraceErrKey.TYPE, TraceErrKey.ACTION, TraceErrKey.TARGET),
+ (EventType.STP, EventAction.PLOT, Target.GRAPH), send_ga=True)
+@memoize(is_save_file=True)
+def gen_trace_data_by_cyclic(dic_param, max_graph=None):
+ dic_param = gen_trace_data_by_cyclic_common(dic_param, max_graph)
+
+ dic_plotdata = defaultdict(list)
+ for plotdata in dic_param[ARRAY_PLOTDATA]:
+ dic_plotdata[plotdata['end_col']].append(plotdata)
+
+ dic_param[ARRAY_PLOTDATA] = dic_plotdata
+
+ # calculating the summaries information
+ calc_summaries_cate_var(dic_param)
+
+ # calc common scale y min max
+ for end_col, plotdatas in dic_param[ARRAY_PLOTDATA].items():
+ min_max_list, all_graph_min, all_graph_max = calc_raw_common_scale_y(plotdatas)
+ calc_scale_info(plotdatas, min_max_list, all_graph_min, all_graph_max)
+
+ # generate kde for each trace output array
+ dic_param = gen_kde_data_cate_var(dic_param)
+
+ # kde
+ remove_array_x_y_cyclic(dic_param)
+
+ return dic_param
+
+
+@log_execution_time()
+@notify_progress(75)
+def gen_trace_data_by_cyclic_common(dic_param, max_graph=None):
+ """tracing data to show graph
+ filter by condition points that between start point and end_point
+ """
+
+ produce_cyclic_terms(dic_param)
+ terms = gen_dic_param_terms(dic_param)
+
+ dic_param = gen_graph_cyclic(dic_param, terms, max_graph)
+ dic_param[TIME_CONDS] = terms
+ return dic_param
+
+
+def gen_dic_param_terms(dic_param):
+ terms = dic_param[COMMON].get(CYCLIC_TERMS) or []
+ terms = [{START_DATE: convert_time(start_dt, DATE_FORMAT),
+ START_TM: convert_time(start_dt, TIME_FORMAT),
+ START_DT: start_dt,
+ END_DATE: convert_time(end_dt, DATE_FORMAT),
+ END_TM: convert_time(end_dt, TIME_FORMAT),
+ END_DT: end_dt} for start_dt, end_dt in terms]
+ return terms
+
+
+@log_execution_time()
+@notify_progress(75)
+@trace_log((TraceErrKey.TYPE, TraceErrKey.ACTION, TraceErrKey.TARGET),
+ (EventType.STP, EventAction.PLOT, Target.GRAPH), send_ga=True)
+@memoize(is_save_file=True)
+def gen_trace_data_by_term(dic_param, max_graph=None):
+ """tracing data to show graph
+ filter by condition points that between start point and end_point
+ """
+ is_graph_limited = False
+ terms = dic_param.get(TIME_CONDS) or []
+ dic_param[ARRAY_PLOTDATA] = []
+ dic_param[MATCHED_FILTER_IDS] = []
+ dic_param[UNMATCHED_FILTER_IDS] = []
+ dic_param[NOT_EXACT_MATCH_FILTER_IDS] = []
+ dic_param[ACTUAL_RECORD_NUMBER] = 0
+
+ if max_graph and len(terms) > max_graph:
+ terms = terms[:max_graph]
+ is_graph_limited = True
+
+ for term_id, term in enumerate(terms):
+ # create dic_param for each term from original dic_param
+ term_dic_param = deepcopy(dic_param)
+ term_dic_param[TIME_CONDS] = [term]
+ term_dic_param[COMMON][START_DATE] = term[START_DATE]
+ term_dic_param[COMMON][START_TM] = term[START_TM]
+ term_dic_param[COMMON][END_DATE] = term[END_DATE]
+ term_dic_param[COMMON][END_TM] = term[END_TM]
+ term_dic_param['term_id'] = term_id
+
+ # get data from database + visual setting from yaml
+ term_result = gen_graph_term(term_dic_param, max_graph)
+ if term_result.get(IS_GRAPH_LIMITED):
+ is_graph_limited = True
+
+ # matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids
+ dic_param[MATCHED_FILTER_IDS] += term_result.get(MATCHED_FILTER_IDS, [])
+ dic_param[UNMATCHED_FILTER_IDS] += term_result.get(UNMATCHED_FILTER_IDS, [])
+ dic_param[NOT_EXACT_MATCH_FILTER_IDS] += term_result.get(NOT_EXACT_MATCH_FILTER_IDS, [])
+ dic_param[ACTUAL_RECORD_NUMBER] += term_result.get(ACTUAL_RECORD_NUMBER, 0)
+
+ # update term data to original dic_param
+ dic_param[ARRAY_PLOTDATA].extend(term_result.get(ARRAY_PLOTDATA))
+
+ dic_param[ARRAY_PLOTDATA], is_graph_limited_second = limit_graph_per_tab(dic_param[ARRAY_PLOTDATA], max_graph)
+ dic_param[IS_GRAPH_LIMITED] = is_graph_limited or is_graph_limited_second
+
+ # calculating the summaries information
+ calc_summaries(dic_param)
+
+ # calc common scale y min max
+ min_max_list, all_graph_min, all_graph_max = calc_raw_common_scale_y(dic_param[ARRAY_PLOTDATA])
+ calc_scale_info(dic_param[ARRAY_PLOTDATA], min_max_list, all_graph_min, all_graph_max)
+
+ # generate kde for each trace output array
+ dic_param = gen_kde_data(dic_param)
+
+ remove_array_x_y(dic_param)
+
+ return dic_param
+
+
+@log_execution_time()
+def customize_dict_param(dic_param):
+ """ Combine start_time, end_time, start_date, end_date into one object
+
+ Arguments:
+ dic_form {[type]} -- [description]
+ """
+ # end_proc
+ dic_end_procs = customize_dict_param_common(dic_param)
+ dic_param[COMMON][END_PROC] = dic_end_procs
+ dic_param[COMMON][GET02_VALS_SELECT] = list(dic_end_procs)
+
+ # time
+ dic_param[TIME_CONDS] = gen_time_conditions(dic_param)
+
+
+def gen_time_conditions(dic_param):
+ start_dates = dic_param.get(COMMON).get(START_DATE)
+ start_times = dic_param.get(COMMON).get(START_TM)
+ end_dates = dic_param.get(COMMON).get(END_DATE)
+ end_times = dic_param.get(COMMON).get(END_TM)
+ # if type(start_dates) is not list and type(start_dates) is not tuple:
+ if not isinstance(start_dates, (list, tuple)):
+ start_dates = [start_dates]
+ start_times = [start_times]
+ end_dates = [end_dates]
+ end_times = [end_times]
+
+ lst_datetimes = []
+ if start_dates and start_times and end_dates and end_times and len(start_dates) == len(start_times) == len(
+ end_dates) == len(end_times):
+ names = [START_DATE, START_TM, END_DATE, END_TM]
+ lst_datetimes = [dict(zip(names, row)) for row in zip(start_dates, start_times, end_dates, end_times)]
+ for idx, time_cond in enumerate(lst_datetimes):
+ start_dt = start_of_minute(time_cond.get(START_DATE), time_cond.get(START_TM))
+ end_dt = end_of_minute(time_cond.get(END_DATE), time_cond.get(END_TM))
+ lst_datetimes[idx][START_DT] = start_dt
+ lst_datetimes[idx][END_DT] = end_dt
+
+ return lst_datetimes
+
+
+@log_execution_time()
+def convert_end_cols_to_array(dic_param):
+ end_col_alias = dic_param[COMMON][GET02_VALS_SELECT]
+ if type(end_col_alias) == str:
+ dic_param[COMMON][GET02_VALS_SELECT] = [end_col_alias]
+
+ from_end_col_alias = dic_param[ARRAY_FORMVAL][0][GET02_VALS_SELECT]
+ if type(from_end_col_alias) == str:
+ dic_param[ARRAY_FORMVAL][0][GET02_VALS_SELECT] = [from_end_col_alias]
+
+
+@log_execution_time()
+def gen_kde_data(dic_param, dic_array_full=None):
+ array_plotdata = dic_param.get(ARRAY_PLOTDATA)
+ for num, plotdata in enumerate(array_plotdata):
+ full_array_y = dic_array_full[num] if dic_array_full else None
+ kde_list = calculate_kde_trace_data(plotdata, full_array_y=full_array_y)
+ plotdata[SCALE_SETTING][KDE_DATA], plotdata[SCALE_COMMON][KDE_DATA], plotdata[SCALE_THRESHOLD][KDE_DATA], \
+ plotdata[SCALE_AUTO][KDE_DATA], plotdata[SCALE_FULL][KDE_DATA] = kde_list
+
+ return dic_param
+
+
+@log_execution_time()
+def gen_kde_data_cate_var(dic_param, dic_array_full=None):
+ array_plotdatas = dic_param.get(ARRAY_PLOTDATA)
+ for end_col, array_plotdata in array_plotdatas.items():
+ for num, plotdata in enumerate(array_plotdata):
+ full_array_y = dic_array_full[num] if dic_array_full else None
+ kde_list = calculate_kde_trace_data(plotdata, full_array_y=full_array_y)
+ plotdata[SCALE_SETTING][KDE_DATA], plotdata[SCALE_COMMON][KDE_DATA], plotdata[SCALE_THRESHOLD][KDE_DATA], \
+ plotdata[SCALE_AUTO][KDE_DATA], plotdata[SCALE_FULL][KDE_DATA] = kde_list
+
+ return dic_param
+
+
+@log_execution_time()
+def split_data_by_condition(dic_data, graph_param, max_graph=None):
+ """split data by condition
+
+ Arguments:
+ data {[type]} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+ is_graph_limited = False
+ dic_output = {}
+ for proc in graph_param.array_formval:
+ proc_id = proc.proc_id
+ cat_exp_cols = graph_param.common.cat_exp
+
+ end_cols = proc.col_ids
+ dic_data_for_df = {Cycle.time.key: dic_data[proc_id][Cycle.time.key],
+ **{end_col: dic_data[proc_id][end_col] for end_col in end_cols}
+ }
+ group_by_cols = []
+ for cat_exp_col in cat_exp_cols or []:
+ group_by_cols.append(cat_exp_col)
+ if cat_exp_col not in dic_data_for_df:
+ for dic_col in dic_data.values():
+ vals = dic_col.get(cat_exp_col)
+ if vals:
+ dic_data_for_df[cat_exp_col] = vals
+ break
+
+ df = pd.DataFrame(dic_data_for_df)
+ if not len(df):
+ continue
+
+ df = df.convert_dtypes()
+
+ if group_by_cols:
+ dic_col, is_graph_limited = gen_plotdata_with_group_by(df, end_cols, group_by_cols, max_graph)
+ else:
+ dic_col = gen_plotdata_without_group_by(df, end_cols)
+
+ dic_output.update(dic_col)
+
+ return dic_output, is_graph_limited
+
+
+def gen_plotdata_without_group_by(df, end_cols):
+ dic_output = {}
+ array_x = df[Cycle.time.key].to_list()
+ for end_col in end_cols:
+ dic_cate = defaultdict(dict)
+ dic_output[end_col] = dic_cate
+ dic_cate[None] = {ARRAY_X: array_x, ARRAY_Y: df[end_col].to_list()}
+
+ return dic_output
+
+
+def gen_plotdata_with_group_by(df, end_cols, group_by_cols, max_graph=None):
+ is_graph_limit = False
+ dic_output = {}
+ df_group = df.groupby(group_by_cols)
+ limit_cols = end_cols
+ if max_graph and max_graph < len(end_cols):
+ is_graph_limit = True
+ limit_cols = end_cols[:max_graph]
+
+ for end_col in limit_cols:
+ dic_cate = defaultdict(dict)
+ dic_output[end_col] = dic_cate
+ for group_name, idxs in df_group.groups.items():
+ if isinstance(group_name, (list, tuple)):
+ group_name = ' | '.join([str(NA_STR if pd.isna(val) else val) for val in group_name])
+
+ rows = df.loc[idxs, end_col]
+ if len(rows.dropna()) == 0:
+ continue
+
+ dic_cate[group_name] = {ARRAY_X: df.loc[idxs, Cycle.time.key].to_list(), ARRAY_Y: rows.to_list()}
+
+ return dic_output, is_graph_limit
+
+
+def gen_plotdata_for_var(dic_data):
+ plotdatas = {}
+ col_ids = list(dic_data.keys())
+ dic_procs, dic_cols = get_cfg_proc_col_info(col_ids)
+ for end_col, cat_exp_data in dic_data.items():
+ plotdatas[end_col] = []
+ cfg_col: CfgProcessColumn = dic_cols[end_col]
+ cfg_proc: CfgProcess = dic_procs[cfg_col.process_id]
+ for cat_exp_name, data in cat_exp_data.items():
+ if not data:
+ continue
+
+ plotdata = {ARRAY_Y: data[ARRAY_Y], ARRAY_X: data[ARRAY_X],
+ END_PROC_ID: cfg_col.process_id, END_PROC_NAME: cfg_proc.name,
+ END_COL: end_col, END_COL_NAME: cfg_col.name, CAT_EXP_BOX: cat_exp_name}
+ plotdatas[end_col].append(plotdata)
+
+ return plotdatas
+
+
+def gen_plotdata_one_proc(dic_data):
+ plotdatas = []
+ col_ids = list(dic_data.keys())
+ dic_procs, dic_cols = get_cfg_proc_col_info(col_ids)
+ for end_col, cat_exp_data in dic_data.items():
+ cfg_col: CfgProcessColumn = dic_cols[end_col]
+ cfg_proc: CfgProcess = dic_procs[cfg_col.process_id]
+ for cat_exp_name, data in cat_exp_data.items():
+ plotdata = {ARRAY_Y: data[ARRAY_Y], ARRAY_X: data[ARRAY_X],
+ END_PROC_ID: cfg_col.process_id, END_PROC_NAME: cfg_proc.name,
+ END_COL: end_col, END_COL_NAME: cfg_col.name, CAT_EXP_BOX: cat_exp_name}
+ plotdatas.append(plotdata)
+
+ return plotdatas
+
+
+@log_execution_time()
+def save_input_data_to_gen_images(df: DataFrame, graph_param):
+ dic_rename_columns = {}
+ for proc in graph_param.array_formval:
+ col_ids_names = sorted(zip(proc.col_ids, proc.col_names))
+ for col_id, col_name in col_ids_names:
+ sql_label = gen_sql_label(col_id, col_name)
+ if sql_label in df.columns:
+ dic_rename_columns[sql_label] = col_name
+
+ file_path = create_file_path('dat_' + EventType.STP.value + '_image')
+
+ make_dir_from_file_path(file_path)
+ df.rename(columns=dic_rename_columns).to_csv(file_path, sep=CsvDelimiter.TSV.value, index=False,
+ columns=list(dic_rename_columns.values()))
+ return file_path
+
+
+def get_checked_cols(trace, dic_param):
+ dic_header = {}
+ for proc in dic_param[ARRAY_FORMVAL]:
+ proc_name = proc[END_PROC]
+ end_cols = proc[GET02_VALS_SELECT]
+ if isinstance(end_cols, str):
+ end_cols = [end_cols]
+
+ checked_cols = trace.proc_yaml.get_checked_columns(proc_name)
+ cols = []
+ for col, col_detail in checked_cols.items():
+ data_type = col_detail[YAML_DATA_TYPES]
+ # alias_name = col_detail[YAML_ALIASES]
+ if data_type == DataType.REAL.name or col in end_cols:
+ continue
+
+ cols.append(col)
+
+ dic_header[proc_name] = cols
+ return dic_header
+
+
+@log_execution_time()
+def use_etl_spray_shape(proc: CfgProcess):
+ data_source: CfgDataSource = proc.data_source
+ if data_source.type.lower() == DBType.CSV.name.lower():
+ etl_func = data_source.csv_detail.etl_func
+ if etl_func == FILE_ETL_SPRAY_SHAPE:
+ return True
+ return False
+
+
+@log_execution_time()
+def category_bind_dic_param_to_class(dic_param):
+ graph_param = bind_dic_param_to_class(dic_param)
+ if dic_param[COMMON].get(CYCLIC_TERMS):
+ graph_param.cyclic_terms += dic_param[COMMON][CYCLIC_TERMS]
+
+ return graph_param
+
+
+@log_execution_time()
+def gen_graph_cyclic(dic_param, terms, max_graph=None):
+ """tracing data to show graph
+ 1 start point x n end point
+ filter by condition point
+ https://files.slack.com/files-pri/TJHPR9BN3-F01GG67J84C/image.pngnts that between start point and end_point
+ """
+ # bind dic_param
+ orig_graph_param = bind_dic_param_to_class(dic_param)
+
+ graph_param_with_cat_exp = bind_dic_param_to_class(dic_param)
+ graph_param_with_cat_exp.add_cat_exp_to_array_formval()
+
+ graph_param = bind_dic_param_to_class(dic_param)
+
+ # add start proc
+ graph_param.add_start_proc_to_array_formval()
+
+ # add condition procs
+ graph_param.add_cond_procs_to_array_formval()
+
+ # add cat exp (use for category page)
+ graph_param.add_cat_exp_to_array_formval()
+
+ # get serials
+ dic_proc_cfgs = get_procs_in_dic_param(graph_param)
+ for proc in graph_param.array_formval:
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ serial_ids = [serial.id for serial in proc_cfg.get_serials(column_name_only=False)]
+ proc.add_cols(serial_ids)
+
+ # get data from database
+ df, actual_record_number, is_res_limited = get_data_from_db(graph_param)
+
+ # check filter match or not ( for GUI show )
+ matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids = main_check_filter_detail_match_graph_data(
+ graph_param, df)
+
+ # matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids
+ dic_param[MATCHED_FILTER_IDS] = matched_filter_ids
+ dic_param[UNMATCHED_FILTER_IDS] = unmatched_filter_ids
+ dic_param[NOT_EXACT_MATCH_FILTER_IDS] = not_exact_match_filter_ids
+
+ # apply coef for text
+ df = apply_coef_text(df, orig_graph_param, dic_proc_cfgs)
+
+ # flag to show that trace result was limited
+ dic_param[DATA_SIZE] = df.memory_usage(deep=True).sum()
+ dic_param[IS_RES_LIMITED] = is_res_limited
+
+ dic_param[ACTUAL_RECORD_NUMBER] = actual_record_number
+
+ # create output dataJOIN
+ dic_param[ARRAY_PLOTDATA] = []
+ end_procs = orig_graph_param.array_formval
+ df.set_index(Cycle.time.key, inplace=True, drop=False)
+ all_plots = []
+ is_graph_limited = False
+ for term_id, term in enumerate(terms):
+ df_chunk = df[(df.index >= term['start_dt']) & (df.index < term['end_dt'])]
+ if not len(df_chunk):
+ continue
+
+ dic_data = gen_dic_data_from_df(df_chunk, graph_param_with_cat_exp)
+ dic_data, _is_graph_limited = split_data_by_condition(dic_data, orig_graph_param, max_graph)
+ if _is_graph_limited:
+ is_graph_limited = True
+
+ plots = gen_plotdata_one_proc(dic_data)
+ # get graph configs
+ times = df_chunk[Cycle.time.key].tolist() or []
+ dic_data_for_graph_configs = {}
+ for end_proc in end_procs:
+ time_col_alias = f'{Cycle.time.key}_{end_proc.proc_id}'
+ end_col_time = df_chunk[time_col_alias].to_list()
+ dic_data_for_graph_configs[end_proc.proc_id] = {Cycle.time.key: end_col_time}
+
+ chart_infos, original_graph_configs = get_chart_infos(orig_graph_param, dic_data_for_graph_configs, times)
+ for plot in plots:
+ plot['term_id'] = term_id
+ set_chart_infos_to_plotdata(plot[END_COL], chart_infos, original_graph_configs, plot)
+
+ all_plots += plots
+
+ dic_param[ARRAY_PLOTDATA], dic_param[IS_GRAPH_LIMITED] = limit_graph_per_tab(all_plots, max_graph)
+
+ if is_graph_limited:
+ dic_param[IS_GRAPH_LIMITED] = True
+
+ return dic_param
+
+
+def gen_graph_term(dic_param, max_graph=None):
+ """tracing data to show graph
+ 1 start point x n end point
+ filter by condition point
+ https://files.slack.com/files-pri/TJHPR9BN3-F01GG67J84C/image.pngnts that between start point and end_point
+ """
+ # bind dic_param
+ orig_graph_param = bind_dic_param_to_class(dic_param)
+
+ graph_param_with_cat_exp = bind_dic_param_to_class(dic_param)
+ graph_param_with_cat_exp.add_cat_exp_to_array_formval()
+
+ graph_param = bind_dic_param_to_class(dic_param)
+
+ # add start proc
+ graph_param.add_start_proc_to_array_formval()
+
+ # add condition procs
+ graph_param.add_cond_procs_to_array_formval()
+
+ # add cat exp (use for category page)
+ graph_param.add_cat_exp_to_array_formval()
+
+ # get serials
+ dic_proc_cfgs = get_procs_in_dic_param(graph_param)
+ for proc in graph_param.array_formval:
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ serial_ids = [serial.id for serial in proc_cfg.get_serials(column_name_only=False)]
+ proc.add_cols(serial_ids)
+
+ # get data from database
+ df, actual_record_number, is_res_limited = get_data_from_db(graph_param)
+
+ # check filter match or not ( for GUI show )
+ matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids = main_check_filter_detail_match_graph_data(
+ graph_param, df)
+
+ # matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids
+ dic_param[MATCHED_FILTER_IDS] = matched_filter_ids
+ dic_param[UNMATCHED_FILTER_IDS] = unmatched_filter_ids
+ dic_param[NOT_EXACT_MATCH_FILTER_IDS] = not_exact_match_filter_ids
+
+ # apply coef for text
+ df = apply_coef_text(df, orig_graph_param, dic_proc_cfgs)
+
+ # flag to show that trace result was limited
+ dic_param[DATA_SIZE] = df.memory_usage(deep=True).sum()
+ dic_param[IS_RES_LIMITED] = is_res_limited
+
+ # create output data
+ dic_data = gen_dic_data_from_df(df, graph_param_with_cat_exp)
+ dic_data, is_graph_limited = split_data_by_condition(dic_data, orig_graph_param, max_graph)
+ dic_param[IS_GRAPH_LIMITED] = is_graph_limited
+ dic_param[ARRAY_PLOTDATA] = gen_plotdata_one_proc(dic_data)
+
+ # get graph configs
+ times = df[Cycle.time.key].tolist() or []
+ end_procs = orig_graph_param.array_formval
+ dic_data_for_graph_configs = {}
+ for end_proc in end_procs:
+ if not len(df):
+ continue
+ time_col_alias = f'{Cycle.time.key}_{end_proc.proc_id}'
+ end_col_time = df[time_col_alias].to_list()
+ dic_data_for_graph_configs[end_proc.proc_id] = {Cycle.time.key: end_col_time}
+
+ chart_infos, original_graph_configs = get_chart_infos(orig_graph_param, dic_data_for_graph_configs, times)
+
+ for plot in dic_param[ARRAY_PLOTDATA]:
+ plot['term_id'] = dic_param['term_id']
+ set_chart_infos_to_plotdata(plot[END_COL], chart_infos, original_graph_configs, plot)
+
+ dic_param[ACTUAL_RECORD_NUMBER] = actual_record_number
+
+ return dic_param
+
+
+@log_execution_time()
+def produce_cyclic_terms(dic_param): # TODO reverse when interval is negative
+ num_ridge_lines = int(dic_param[COMMON][CYCLIC_DIV_NUM])
+ interval = float(dic_param[COMMON][CYCLIC_INTERVAL])
+ window_len = float(dic_param[COMMON][CYCLIC_WINDOW_LEN])
+ start_date = dic_param[COMMON][START_DATE]
+ start_time = dic_param[COMMON][START_TM]
+ start_datetime = '{}T{}'.format(start_date, start_time) # '2020/11/01T00:00'
+
+ cyclic_terms = []
+ prev_start = datetime.strptime(start_datetime, RL_DATETIME_FORMAT)
+ end = prev_start + timedelta(hours=window_len)
+ start_utc_str = datetime.strftime(prev_start.replace(tzinfo=tz.tzutc()), DATE_FORMAT_STR)
+ end_utc_str = datetime.strftime(end.replace(tzinfo=tz.tzutc()), DATE_FORMAT_STR)
+ cyclic_terms.append((start_utc_str, end_utc_str))
+
+ for i in range(1, num_ridge_lines):
+ start = prev_start + timedelta(hours=interval)
+ end = start + timedelta(hours=window_len)
+ start_utc_str = datetime.strftime(start.replace(tzinfo=tz.tzutc()), DATE_FORMAT_STR)
+ end_utc_str = datetime.strftime(end.replace(tzinfo=tz.tzutc()), DATE_FORMAT_STR)
+ cyclic_terms.append((start_utc_str, end_utc_str))
+ prev_start = start
+
+ # get new start/end datetime
+ last_cyclic_term_end = cyclic_terms[-1][1]
+ end_dt = datetime.strptime(last_cyclic_term_end, DATE_FORMAT_STR)
+ end_date = datetime.strftime(end_dt, DATE_FORMAT)
+ end_time = datetime.strftime(end_dt, TIME_FORMAT)
+
+ if interval < 0: # exchange start time and end time when interval is negative
+ dic_param[COMMON][END_DATE] = start_date
+ dic_param[COMMON][END_TM] = start_time
+ dic_param[COMMON][START_DATE] = end_date
+ dic_param[COMMON][START_TM] = end_time
+ dic_param[TIME_CONDS] = {
+ END_DATE: start_date,
+ END_TM: start_time,
+ END_DT: end_of_minute(start_date, start_time),
+ START_DATE: end_date,
+ START_TM: end_time,
+ START_DT: start_of_minute(end_date, end_time),
+ }
+ else:
+ # set END date/time
+ dic_param[COMMON][END_DATE] = end_date
+ dic_param[COMMON][END_TM] = end_time
+ if dic_param.get(TIME_CONDS):
+ time_cond = dic_param[TIME_CONDS][0]
+ time_cond[END_DATE] = end_date
+ time_cond[END_TM] = end_time
+ time_cond[END_DT] = end_of_minute(end_date, end_time)
+
+ dic_param[COMMON][CYCLIC_TERMS] = cyclic_terms
+
+
+def remove_array_x_y(dic_param):
+ for plot in dic_param[ARRAY_PLOTDATA]:
+ if not plot:
+ continue
+
+ del plot[ARRAY_X]
+ del plot[ARRAY_Y]
+ return True
+
+
+def remove_array_x_y_cyclic(dic_param):
+ for plots in dic_param[ARRAY_PLOTDATA].values():
+ for plot in plots:
+ if not plot:
+ continue
+
+ del plot[ARRAY_X]
+ del plot[ARRAY_Y]
+
+ return True
+
+
+@log_execution_time()
+@notify_progress(75)
+def dump_img_files(df, graph_param, dic_proc_cfgs):
+ # TODO: minor trick to resolve nested-trace_log problem
+ img_files = []
+ if not df.index.size:
+ return img_files
+
+ # make input tsv file
+ tsv_file = save_input_data_to_gen_images(df, graph_param)
+
+ if use_etl_spray_shape(dic_proc_cfgs[graph_param.common.start_proc]):
+ img_files = call_com_view(tsv_file, get_view_path())
+
+ # strip folder
+ if img_files is not None and not isinstance(img_files, Exception):
+ if isinstance(img_files, str):
+ img_files = [img_files]
+ img_files = [get_basename(img) for img in img_files]
+
+ return img_files
+
+
+def customize_dict_param_common(dic_param):
+ dic_end_procs = {}
+ end_procs = dic_param.get(ARRAY_FORMVAL)
+ for end_proc in end_procs:
+ proc_id = end_proc.get(END_PROC)
+ if isinstance(proc_id, list):
+ proc_id = proc_id[0]
+
+ col_ids = end_proc.get(GET02_VALS_SELECT)
+ if not isinstance(col_ids, list):
+ col_ids = [col_ids]
+
+ for col_id in col_ids:
+ dic_end_procs[int(col_id)] = int(proc_id)
+
+ return dic_end_procs
+
+
+@log_execution_time()
+def limit_graph_per_tab(plots, max_graph=None):
+ is_limited = False
+ if max_graph is None:
+ return plots, is_limited
+
+ dic_count = defaultdict(int)
+ limit_plots = []
+ for plot in plots:
+ col_id = plot[END_COL]
+ dic_count[col_id] += 1
+ if dic_count[col_id] > max_graph:
+ is_limited = True
+ continue
+
+ limit_plots.append(plot)
+
+ return limit_plots, is_limited
diff --git a/histview2/api/co_occurrence/controllers.py b/histview2/api/co_occurrence/controllers.py
new file mode 100644
index 0000000..21d7825
--- /dev/null
+++ b/histview2/api/co_occurrence/controllers.py
@@ -0,0 +1,77 @@
+import json
+
+from flask import Blueprint, request, jsonify
+
+from histview2.api.co_occurrence.services import validate_csv_data, calc_csv_graph_data, add_node_coordinate, \
+ filter_edge_by_threshold, calc_pareto
+from histview2.api.setting_module.services.csv_import import csv_to_df
+from histview2.common.common_utils import get_csv_delimiter
+from histview2.common.services import http_content
+from histview2.common.yaml_utils import *
+
+api_co_occurrence_blueprint = Blueprint(
+ 'api_co_occurrence',
+ __name__,
+ url_prefix='/histview2/api/cog'
+)
+
+
+@api_co_occurrence_blueprint.route('/check_file', methods=['POST'])
+def check_file():
+ try:
+ data = request.json.get('url')
+ return jsonify({
+ 'status': 200,
+ 'url': data,
+ 'is_exists': os.path.isfile(data) and os.path.exists(data),
+ 'dir': os.path.dirname(data)
+ })
+ except Exception:
+ # raise
+ return jsonify({
+ 'status': 500,
+ })
+
+
+@api_co_occurrence_blueprint.route('/show_graph', methods=['POST'])
+def show_graph():
+ from_file = False
+ file = request.files.get('file')
+ file_path = request.form.get('url')
+ if file:
+ file_path = file.read()
+ from_file = True
+
+ delimiter = request.form.get('delimiter')
+ aggregate_by = request.form.get('aggregate_by')
+ threshold = request.form.get('threshold') or 100
+ layout = request.form.get('layout')
+ aggregate_by = AggregateBy(aggregate_by)
+
+ data_src: CfgDataSourceCSV = CfgDataSourceCSV()
+ data_src.delimiter = delimiter
+
+ # csv delimiter
+ csv_delimiter = get_csv_delimiter(delimiter)
+
+ # read csv file
+ data_first_row = 1
+ skip_row = 0
+ df = csv_to_df(file_path, data_src, [], data_first_row, skip_row,
+ csv_delimiter, default_csv_param={}, from_file=from_file)
+
+ validate_result = validate_csv_data(df)
+ if isinstance(validate_result, Exception):
+ return validate_result, 200
+
+ # calc pareto data
+ pareto = calc_pareto(df)
+
+ # calc_data
+ nodes, edges = calc_csv_graph_data(df, aggregate_by, pareto)
+ nodes = add_node_coordinate(nodes, layout=layout)
+ edges = filter_edge_by_threshold(edges, threshold)
+
+ result = dict(nodes=nodes, edges=edges, pareto=pareto)
+ out_dict = json.dumps(result, ensure_ascii=False, default=http_content.json_serial)
+ return out_dict, 200
diff --git a/histview2/api/co_occurrence/services.py b/histview2/api/co_occurrence/services.py
new file mode 100644
index 0000000..441c89e
--- /dev/null
+++ b/histview2/api/co_occurrence/services.py
@@ -0,0 +1,196 @@
+import itertools
+
+import numpy as np
+import pandas as pd
+from flask_babel import gettext as _
+
+from histview2.common.constants import *
+from histview2.common.logger import logger
+from histview2.common.services.sse import notify_progress
+from histview2.common.trace_data_log import *
+from histview2.common.yaml_utils import YamlConfig
+
+dic_colors = {
+ "bar_highlight": "#729e44", # green, same as other charts
+ "bar_normal": "#8d8b8b",
+ "line_80": "#a9a9a9",
+ "chart_title": "#3385b7", # blue, same as other charts
+}
+
+
+def validate_csv_data(df: DataFrame):
+ if df is None or df.size == 0:
+ return Exception('There is no data')
+
+ cols = df.columns.tolist()
+ date_time_col = cols[0]
+ data_cols = cols[1:]
+
+ # fill na
+ df.dropna(how='all', inplace=True)
+ df.fillna(0, inplace=True)
+
+ # convert time
+ try:
+ df[date_time_col] = pd.to_datetime(df[date_time_col])
+ except Exception as e:
+ logger.exception(e)
+ return Exception('There are some none datetime values in Datetime column (First column)')
+
+ # check numeric
+ try:
+ for col in data_cols:
+ df[col] = pd.to_numeric(df[col])
+ except Exception as e:
+ logger.exception(e)
+ return Exception('There are some none integer values in data columns')
+
+ # check is int data type
+ for col in data_cols:
+ try:
+ df[col] = pd.to_numeric(df[col])
+ except Exception as e:
+ logger.exception(e)
+ return Exception('There are some none integer values in data columns')
+
+ # convert float to int
+ df[col] = df[col].convert_dtypes()
+
+ # check < zero
+ if (df[col] < 0).any():
+ return Exception('There are some values < 0 in data columns')
+
+ # check not integer ( float )
+ if df.select_dtypes(include=['integer']).columns.size < len(data_cols):
+ return Exception('There are some float values in data columns')
+
+ return True
+
+
+@notify_progress(60)
+def calc_csv_graph_data(df: DataFrame, aggregate_by: AggregateBy, pareto=None):
+ if pareto is None:
+ pareto = {}
+ cols = df.columns.tolist()
+ date_time_col = cols[0]
+ data_cols = cols[1:]
+
+ df.set_index(date_time_col, inplace=True)
+
+ if aggregate_by is AggregateBy.HOUR:
+ freq = 'H'
+ else:
+ freq = 'D'
+
+ df_sum = df.groupby(pd.Grouper(freq=freq)).sum()
+ nodes_cum_rate_80 = YamlConfig.get_node(pareto, ['bar', 'highlight_bars'], set()) or set()
+ nodes = []
+ for key, val in df_sum.sum().to_dict().items():
+ color = dic_colors["bar_highlight"] if key in nodes_cum_rate_80 else ''
+ nodes.append(dict(id=key, label=key, size=int(val), color=color))
+
+ edges = []
+ for source, target in itertools.combinations(data_cols, 2):
+ edge = [source, target]
+ size = int(df_sum[edge].min(axis=1).sum())
+ edge_id = '-'.join([str(node_id) for node_id in edge])
+ dic_edge = dict(id=edge_id, label=size, source=source, target=target)
+ edges.append(dic_edge)
+
+ return nodes, edges
+
+
+def add_node_coordinate(nodes, layout='CIRCLE'):
+ if layout == 'CIRCLE':
+ nodes = gen_circular_coordinate(nodes)
+ elif layout == 'FORCE_ATLAS_2':
+ nodes = gen_random_coordinate(nodes)
+ return nodes
+
+
+def gen_circular_coordinate(nodes):
+ if not nodes:
+ return []
+ radius = 200 # TODO
+ num_nodes = len(nodes) # number of sensors (what user selected)
+ # note that 0 and 2*pi indicate same position
+ theta = np.linspace(0.5 * np.pi, 2.5 * np.pi, num_nodes + 1)
+ theta = theta[:num_nodes]
+
+ pos_x = np.cos(theta) * radius
+ pos_y = np.sin(theta) * radius
+ for idx, node in enumerate(nodes):
+ node['x'] = -pos_x[idx]
+ node['y'] = -pos_y[idx]
+ return nodes
+
+
+def gen_random_coordinate(nodes):
+ if not nodes:
+ return []
+ import random
+ for idx, node in enumerate(nodes):
+ node['x'] = random.randint(1, 5)
+ node['y'] = random.randint(1, 5)
+ return nodes
+
+
+def filter_edge_by_threshold(edges, threshold=100):
+ if not edges:
+ return []
+ edges = sorted(edges, key=lambda x: x.get('label') or -1, reverse=True)
+ limit = round(int(threshold) * len(edges) / 100)
+ edges = edges[0:limit]
+ return edges
+
+
+@notify_progress(30)
+def calc_pareto(df: pd.DataFrame):
+ drop_col = df.columns.values[0]
+
+ # ----- summarize -----
+ # sort with descending order and take cumsum for pareto chart
+ total_occurrences = df.drop(drop_col, axis="columns").sum(axis="index").sort_values(ascending=False)
+ cum_occurrences_ratio = total_occurrences.cumsum() / total_occurrences.sum()
+ alarm_names = total_occurrences.index.values
+ print("alarm names: ", alarm_names[:5], "...")
+ print("total number of alarms: ", total_occurrences.values[:5], "...")
+ print("cumulative ratio of number of alarms [%]: ", cum_occurrences_ratio.values[:5], "...")
+
+ # change color of bar (highlight cumulative ratio <= 80%)
+ # note that this list is not in the original column order.
+ # bar_colors = [dic_colors["bar_highlight"] if x <= 0.8 else dic_colors["bar_normal"] for x in cum_occurrences_ratio]
+ highlight_bars = set()
+ bar_colors = []
+ for alarm_name, cum_ratio_value in list(zip(cum_occurrences_ratio.index, cum_occurrences_ratio)):
+ if cum_ratio_value <= 0.8:
+ color = dic_colors["bar_highlight"]
+ highlight_bars.add(alarm_name)
+ else:
+ color = dic_colors["bar_normal"]
+ bar_colors.append(color)
+
+ return {
+ 'title': _('Pareto Chart'),
+ 'bar': {
+ 'y': alarm_names,
+ 'x': total_occurrences,
+ 'name': _('Total Occurrences'),
+ 'orientation': "h",
+ 'marker_color': bar_colors,
+ 'text': total_occurrences.values,
+ 'highlight_bars': highlight_bars,
+ },
+ 'line_cum_ratio': {
+ 'x': cum_occurrences_ratio * total_occurrences.max(),
+ 'name': _('Cumulative Ratio [%]'),
+ 'text': cum_occurrences_ratio.values * 100,
+ 'mode': 'lines+markers',
+ },
+ 'line_80_percent': {
+ 'x': np.repeat(0.8, len(alarm_names)) * total_occurrences.max(),
+ 'name': "80 [%]",
+ 'marker_color': dic_colors["line_80"],
+ 'mode': "lines",
+ }
+ }
diff --git a/histview2/api/efa/services/__init__.py b/histview2/api/efa/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/histview2/api/efa/services/etl.py b/histview2/api/efa/services/etl.py
new file mode 100644
index 0000000..a7f21d6
--- /dev/null
+++ b/histview2/api/efa/services/etl.py
@@ -0,0 +1,156 @@
+from histview2.common.common_utils import detect_encoding
+from histview2.common.common_utils import get_temp_path, get_wrapr_path, get_etl_path, get_base_dir, make_dir
+from histview2.common.constants import CfgConstantType, CsvDelimiter
+from histview2.common.logger import logger, log_execution_time
+from histview2.common.services.sse import notify_progress
+from histview2.script.r_scripts.wrapr import wrapr_utils
+from histview2.setting_module.models import CfgConstant
+
+FILE = 'etl_spray_shape.R'
+
+UNKNOWN_ERROR_MESSAGE = 'NO OUTPUT FROM R SCRIPT'
+NO_DATA_ERROR = 'NoDataError'
+
+
+@log_execution_time()
+def preview_data(fname):
+ """
+ transform data , output will be put in temp folder
+ """
+ output_fname = call_com_read(fname, get_temp_path())
+ return output_fname
+
+
+@log_execution_time()
+def csv_transform(proc_id, fname):
+ """transform to standard csv
+
+ """
+ out_dir = get_base_dir(fname)
+ etl_dir = get_etl_path(str(proc_id), out_dir)
+ make_dir(etl_dir)
+
+ output_fname = call_com_read(fname, etl_dir)
+ return output_fname
+
+
+@log_execution_time()
+def call_com_read(fname, out_dir):
+ """call com read func to transform data
+
+ Args:
+ fname ([type]): [description]
+ out_dir ([type]): [description]
+
+ Returns:
+ [type]: [description]
+ """
+ target_func = 'com_read'
+
+ # define parameters
+ dic_data = {} # filecheckr does not need input data
+ dic_task = dict(func=target_func, file=FILE, fpath=fname)
+
+ # define and run pipeline
+ try:
+ pipe = wrapr_utils.RPipeline(get_wrapr_path(), out_dir, use_pkl=False, verbose=True)
+ out = pipe.run(dic_data, [dic_task])
+ except Exception as e:
+ logger.error(e)
+ return e
+
+ if out:
+ error = out.get('err', None)
+ error_type = out.get('err_type', None)
+ if error:
+ if error_type == NO_DATA_ERROR:
+ return None
+
+ logger.error(error)
+ return Exception(error)
+
+ # save latest json string
+ json_str = out['results']['pass']
+ save_etl_json(FILE, json_str)
+
+ # return
+ return out['results']['fname_out']
+
+ return Exception(UNKNOWN_ERROR_MESSAGE)
+
+
+@log_execution_time()
+@notify_progress(60)
+def call_com_view(fname, out_dir):
+ """call com view func to export image
+
+ Args:
+ fname ([type]): [description]
+ out_dir ([type]): [description]
+
+ Returns:
+ [type]: [description]
+ """
+ target_func = 'com_view'
+
+ # get json string
+ json_str = load_etl_json(FILE)
+
+ # define parameters
+ dic_data = {}
+ dic_task = {'func': target_func, 'file': FILE, 'fpath': fname, 'pass': json_str}
+
+ # define and run pipeline
+ try:
+ pipe = wrapr_utils.RPipeline(get_wrapr_path(), out_dir, use_pkl=False, verbose=True)
+ out = pipe.run(dic_data, [dic_task])
+ except Exception as e:
+ logger.error(e)
+ return e
+
+ if out:
+ error = out.get('err', None)
+ if error:
+ logger.error(error)
+ return Exception(error)
+
+ return out['results']['fname_out']
+
+ return Exception(UNKNOWN_ERROR_MESSAGE)
+
+
+@log_execution_time()
+def save_etl_json(script_fname, json_str):
+ CfgConstant.create_or_update_by_type(const_type=CfgConstantType.ETL_JSON.name,
+ const_value=json_str, const_name=script_fname)
+
+
+@log_execution_time()
+def load_etl_json(script_fname):
+ # get json string
+ json_str = CfgConstant.get_value_by_type_name(CfgConstantType.ETL_JSON.name, script_fname, str)
+ return json_str
+
+
+@log_execution_time()
+def detect_file_delimiter(file_path, default_delimiter):
+ white_list = [CsvDelimiter.CSV.value, CsvDelimiter.TSV.value, CsvDelimiter.SMC.value]
+ encoding = detect_encoding(file_path)
+ candidates = []
+ with open(file_path, "r", encoding=encoding) as f:
+ for i in range(200):
+ try:
+ line = f.readline()
+ except StopIteration:
+ break
+
+ if line:
+ _, row_delimiter = max([(len(line.split(split_char)), split_char) for split_char in white_list])
+ candidates.append(row_delimiter)
+
+ if candidates:
+ good_delimiter = max(candidates, key=candidates.count)
+ if good_delimiter is not None:
+ return good_delimiter
+
+ return default_delimiter
diff --git a/histview2/api/heatmap/__init__.py b/histview2/api/heatmap/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/histview2/api/heatmap/controllers.py b/histview2/api/heatmap/controllers.py
new file mode 100644
index 0000000..2826615
--- /dev/null
+++ b/histview2/api/heatmap/controllers.py
@@ -0,0 +1,56 @@
+import timeit
+
+import simplejson
+from flask import Blueprint, request
+
+from histview2.api.categorical_plot.services import customize_dict_param
+from histview2.api.heatmap.services import gen_heatmap_data
+from histview2.common.constants import COMMON, START_PROC, ARRAY_FORMVAL, END_PROC
+from histview2.common.services import http_content
+from histview2.common.services.form_env import parse_multi_filter_into_one
+from histview2.common.services.import_export_config_n_data import get_dic_form_from_debug_info, \
+ set_export_dataset_id_to_dic_param
+from histview2.common.trace_data_log import save_input_data_to_file, EventType
+
+api_heatmap_blueprint = Blueprint(
+ 'api_heatmap',
+ __name__,
+ url_prefix='/histview2/api/chm'
+)
+
+
+@api_heatmap_blueprint.route('/plot', methods=['POST'])
+def generate_heatmap():
+ """ [summary]
+ Returns:
+ [type] -- [description]
+ """
+ start = timeit.default_timer()
+ dic_form = request.form.to_dict(flat=False)
+ mode = dic_form.get('mode') or []
+ if mode and '1' in set(mode): # 1 for daily and 7 for weekly
+ dic_form['step'] = dic_form['step_minute']
+ else:
+ dic_form['step'] = dic_form['step_hour']
+
+ # save dic_form to pickle (for future debug)
+ save_input_data_to_file(dic_form, EventType.CHM)
+
+ dic_param = parse_multi_filter_into_one(dic_form)
+
+ # check if we run debug mode (import mode)
+ dic_param = get_dic_form_from_debug_info(dic_param)
+
+ start_proc = dic_param[COMMON].get(START_PROC)
+ dic_param[COMMON][START_PROC] = start_proc if start_proc else dic_param[ARRAY_FORMVAL][0][END_PROC]
+
+ customize_dict_param(dic_param)
+ dic_param = gen_heatmap_data(dic_param)
+
+ stop = timeit.default_timer()
+ dic_param['backend_time'] = stop - start
+
+ # export mode ( output for export mode )
+ set_export_dataset_id_to_dic_param(dic_param)
+
+ return simplejson.dumps(dic_param, ensure_ascii=False, default=http_content.json_serial, ignore_nan=True)
diff --git a/histview2/api/heatmap/services.py b/histview2/api/heatmap/services.py
new file mode 100644
index 0000000..2e469ab
--- /dev/null
+++ b/histview2/api/heatmap/services.py
@@ -0,0 +1,660 @@
+import math
+from datetime import timedelta, datetime
+from typing import Dict, List
+
+import numpy as np
+import pandas as pd
+from dateutil import parser
+from dateutil.tz import tz
+from flask_babel import gettext as _
+from scipy.stats import iqr
+
+from histview2.api.categorical_plot.services import gen_graph_param
+from histview2.api.trace_data.services.time_series_chart import validate_data, graph_one_proc, \
+ main_check_filter_detail_match_graph_data
+from histview2.common.common_utils import start_of_minute, end_of_minute, DATE_FORMAT_QUERY, gen_sql_label, \
+ reformat_dt_str
+from histview2.common.constants import TIME_COL, CELL_SUFFIX, AGG_COL, ARRAY_PLOTDATA, HMFunction, DataType, \
+ MATCHED_FILTER_IDS, UNMATCHED_FILTER_IDS, NOT_EXACT_MATCH_FILTER_IDS, ACTUAL_RECORD_NUMBER, IS_RES_LIMITED, \
+ AGG_FUNC, CATE_VAL, END_COL, X_TICKTEXT, X_TICKVAL, Y_TICKTEXT, Y_TICKVAL, ACT_CELLS, MAX_TICKS, NA_STR
+from histview2.common.logger import log_execution_time
+from histview2.common.memoize import memoize
+from histview2.common.services.sse import notify_progress, background_announcer, AnnounceEvent
+from histview2.common.sigificant_digit import signify_digit2
+from histview2.common.trace_data_log import TraceErrKey, EventType, EventAction, Target, trace_log
+from histview2.setting_module.models import CfgProcess
+from histview2.trace_data.schemas import DicParam
+
+
+@log_execution_time()
+@notify_progress(75)
+@trace_log((TraceErrKey.TYPE, TraceErrKey.ACTION, TraceErrKey.TARGET),
+ (EventType.CHM, EventAction.PLOT, Target.GRAPH), send_ga=True)
+@memoize(is_save_file=True)
+def gen_heatmap_data(dic_param):
+ # gen graph_param
+ graph_param, dic_proc_cfgs = gen_graph_param(dic_param)
+
+ hm_mode = int(graph_param.common.hm_mode)
+ hm_step = int(graph_param.common.hm_step)
+ # start proc
+ start_tm = start_of_minute(graph_param.common.start_date, graph_param.common.start_time)
+ end_tm = end_of_minute(graph_param.common.end_date, graph_param.common.end_time)
+ client_timezone = graph_param.get_client_timezone()
+ # client_tz = pytz.timezone(client_timezone) if client_timezone else tz.tzlocal()
+ client_tz = tz.gettz(client_timezone or None) or tz.tzlocal()
+
+ # generate all cells
+ cells = gen_cells(start_tm, end_tm, hm_mode, hm_step)
+ df_cells = pd.DataFrame({TIME_COL: cells})
+ # time_delta = calc_time_delta(hm_mode, hm_step, start_tm)
+ offset = get_utc_offset(client_tz)
+ df_cells = convert_cell_tz(df_cells, offset)
+ df_cells = gen_agg_col(df_cells, hm_mode, hm_step)
+
+ # limit to 10000 cells
+ dic_param.update({ACT_CELLS: df_cells.index.size})
+ df_cells, end_tm, is_res_limited = limit_num_cells(df_cells, end_tm, offset)
+ dic_param.update({IS_RES_LIMITED: is_res_limited})
+
+ # generate x, y, x_label, y_label
+ df_cells = gen_x_y(df_cells, hm_mode, hm_step, start_tm, end_tm, client_tz)
+
+ # build dic col->function
+ dic_col_func = build_dic_col_func(dic_proc_cfgs, graph_param)
+
+ # get stratified variable
+ var_col_id = graph_param.get_cate_var_col_id()
+ var_agg_col = None
+ if var_col_id:
+ var_col_name = graph_param.get_cate_var_col_name()
+ var_agg_col = gen_sql_label(var_col_id, var_col_name)
+
+ dic_df_proc = dict()
+ num_proc = len(dic_proc_cfgs.keys()) or 1
+ total_actual_record_number = 0
+ for idx, (proc_id, proc_config) in enumerate(dic_proc_cfgs.items()):
+ if graph_param.is_end_proc(proc_id):
+ pct_start = (idx + 1) * 50 / num_proc # to report progress
+ dic_df_proc[proc_id], dic_filter_results, actual_record_number = graph_heatmap_data_one_proc(
+ proc_config, graph_param, start_tm, end_tm, offset, dic_col_func, var_agg_col, pct_start)
+ total_actual_record_number += actual_record_number
+
+ # fill empty cells
+ dic_df_proc = fill_empty_cells(df_cells, dic_df_proc, var_agg_col)
+
+ # gen plotly data + gen array_plotdata from here
+ dic_param = gen_plotly_data(dic_param, dic_df_proc, hm_mode, hm_step, dic_col_func, df_cells, var_agg_col)
+
+ # matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids
+ dic_param.update(dic_filter_results)
+
+ # real records
+ dic_param[ACTUAL_RECORD_NUMBER] = total_actual_record_number
+
+ return dic_param
+
+
+def get_utc_offset(time_zone):
+ """
+ get utc time offset
+ :param time_zone: str, timezone object
+ :return: timedelta(seconds)
+ """
+ if isinstance(time_zone, str):
+ time_zone = tz.gettz(time_zone)
+
+ time_in_tz = datetime.now(tz=time_zone)
+ time_offset = time_in_tz.utcoffset().seconds
+ time_offset = timedelta(seconds=time_offset)
+
+ return time_offset
+
+
+def limit_num_cells(df_cells: pd.DataFrame, end_tm, offset, limit=10000):
+ """ Limit number of cells to 10k including empty cells """
+ is_res_limited = df_cells.index.size > limit
+
+ print("///// is_res_limited: ", is_res_limited, ': ', df_cells.index.size)
+ df_cells: pd.DataFrame = df_cells.loc[:limit]
+
+ # update new end_time to 10000 cells
+ last_cell_time = list(df_cells.tail(1)[TIME_COL])[0]
+ end_tm_tz = pd.Timestamp(end_tm) + offset
+ new_end_time = np.minimum(end_tm_tz, last_cell_time)
+ new_end_tm = new_end_time.strftime(DATE_FORMAT_QUERY)
+
+ return df_cells, new_end_tm, is_res_limited
+
+
+@log_execution_time()
+def gen_cells(start_tm, end_tm, hm_mode, hm_step):
+ """ Generate cells of heatmap """
+ floor_start_tm = pd.Timestamp(start_tm)
+ floor_end_tm = pd.Timestamp(end_tm).replace(microsecond=0)
+ cells = [floor_start_tm]
+ prev = floor_start_tm
+ while prev < floor_end_tm:
+ if hm_mode == 1:
+ next_cell = prev + pd.Timedelta(minutes=hm_step)
+ else:
+ next_cell = prev + pd.Timedelta(hours=hm_step)
+ cells.append(next_cell)
+ prev = next_cell
+
+ return cells[:-1]
+
+
+@log_execution_time()
+def fill_empty_cells(df_cells: pd.DataFrame, dic_df_proc, var_agg_col=None):
+ """ Some cells don't have data -> need to fill """
+ for proc_id, proc_data in dic_df_proc.items():
+ for end_col in proc_data:
+ df_sensor: pd.DataFrame = dic_df_proc[proc_id][end_col].set_index(AGG_COL)
+ if var_agg_col:
+ dic_df_proc[proc_id][end_col] = dict()
+ dic_df_cate = {cate_value: df_cate for cate_value, df_cate in df_sensor.groupby(var_agg_col)}
+ df_cates = list(dic_df_cate.items())[:30]
+ for cate_value, df_cate in df_cates:
+ dic_df_proc[proc_id][end_col][cate_value] = df_cells.set_index(AGG_COL) \
+ .join(df_cate, how="left", lsuffix=CELL_SUFFIX).replace({np.nan: None})
+ else:
+ dic_df_proc[proc_id][end_col] = df_cells.set_index(AGG_COL) \
+ .join(df_sensor, how="left", lsuffix=CELL_SUFFIX).replace({np.nan: None})
+ return dic_df_proc
+
+
+@log_execution_time()
+def gen_y_ticks(hm_mode, hm_step):
+ """ Generate ticks of y-axis """
+ if hm_mode == 7:
+ ticktext = ['Sat', 'Fri', 'Thu', 'Wed', 'Tue', 'Mon', 'Sun']
+ row_per_day = int(24 / hm_step)
+ tickvals = [(i + 1) * row_per_day for i in range(len(ticktext))]
+ else:
+ ticktext = ['24:00', '22:00', '20:00', '18:00', '16:00', '14:00', '12:00', '10:00', ' 8:00', ' 6:00', ' 4:00',
+ ' 2:00', ' 0:00']
+ row_per_2hour = 120 / hm_step
+ tickvals = [i * row_per_2hour for i in range(len(ticktext))]
+
+ return ticktext, tickvals
+
+
+@log_execution_time()
+def get_x_ticks(df: pd.DataFrame):
+ """ Generate ticks of x-axis """
+ df_ticks = df.drop_duplicates('x', keep='last')
+ df_ticks = df_ticks.drop_duplicates('x_label', keep='first')
+ size = df_ticks.index.size
+ if size <= MAX_TICKS:
+ return df_ticks['x'].tolist(), df_ticks['x_label'].tolist()
+
+ step = math.ceil(size / MAX_TICKS)
+ indices = np.array(range(size))
+ selected_indices = indices[0:-1:step]
+ df_ticks = df_ticks.reset_index()
+ df_ticks = df_ticks.loc[df_ticks.index.intersection(selected_indices)]
+ return df_ticks['x'].tolist(), df_ticks['x_label'].tolist()
+
+
+def build_hover(df, end_col, hm_function):
+ df['hover'] = df['from'].astype(str) + \
+ df['to'].astype(str) + \
+ hm_function + ': ' + df[end_col].astype(str).str.replace('None', NA_STR) + ''
+ return df['hover'].to_list()
+
+
+def build_plot_data(df, end_col, hm_function):
+ """ Build data for heatmap trace of Plotly """
+ df = df.sort_values(by=['x', 'y'])
+ df = df.replace(dict.fromkeys([np.inf, -np.inf, np.nan], np.nan))
+ df = df.where(pd.notnull(df), None)
+
+ x = df['x'].to_list()
+ y = df['y'].to_list()
+ z = df[end_col].to_list()
+
+ # build color scale
+ z_min = df[end_col].dropna().min()
+ if np.isnan(z_min):
+ z_min = None
+ z_max = df[end_col].dropna().max()
+ if np.isnan(z_max):
+ z_max = None
+
+ hover_texts = build_hover(df, end_col, hm_function)
+
+ return {
+ 'x': x,
+ 'y': y,
+ 'z': z,
+ 'z_min': z_min,
+ 'z_max': z_max,
+ 'hover': hover_texts,
+ }
+
+
+def get_function_i18n(hm_function): # TODO better. can be moved to frontend
+ """ Generate i18n aggregate function name """
+ return _('CHM' + hm_function.replace('_', ' ').title().replace(' ', ''))
+
+
+@log_execution_time()
+@notify_progress(60)
+def gen_plotly_data(dic_param: dict(), dic_df_proc: dict(), hm_mode, hm_step, dic_col_func: dict(), df_cells,
+ var_agg_col=None):
+ dic_param[ARRAY_PLOTDATA] = dict()
+
+ # gen x-axis ticks: ticktexts + tickvals daily, weekly, monthly, yearly
+ x_tickvals, x_ticktext = get_x_ticks(df_cells)
+
+ # gen y-axis ticks: ticktexts + tickvals
+ y_ticktext, y_tickvals = gen_y_ticks(hm_mode, hm_step)
+ plot_count = 0
+ for proc_id, proc_data in dic_df_proc.items():
+ dic_param[ARRAY_PLOTDATA][proc_id] = []
+ for end_col, end_col_data in proc_data.items():
+ hm_function = get_function_i18n(dic_col_func[proc_id][end_col].name)
+ if var_agg_col:
+ for cate_value, df_cate in end_col_data.items():
+ df_sensor_agg: pd.DataFrame = df_cate
+ plotdata: dict = build_plot_data(df_sensor_agg, end_col, hm_function)
+ plotdata.update({
+ AGG_FUNC: hm_function,
+ CATE_VAL: cate_value,
+ END_COL: end_col,
+ X_TICKTEXT: x_ticktext,
+ X_TICKVAL: x_tickvals,
+ Y_TICKTEXT: y_ticktext,
+ Y_TICKVAL: y_tickvals,
+ })
+ dic_param[ARRAY_PLOTDATA][proc_id].append(plotdata)
+ else:
+ df_sensor_agg: pd.DataFrame = end_col_data
+ plotdata: dict = build_plot_data(df_sensor_agg, end_col, hm_function)
+ plotdata.update({
+ AGG_FUNC: hm_function,
+ END_COL: end_col,
+ X_TICKTEXT: x_ticktext,
+ X_TICKVAL: x_tickvals,
+ Y_TICKTEXT: y_ticktext,
+ Y_TICKVAL: y_tickvals,
+ })
+ dic_param[ARRAY_PLOTDATA][proc_id].append(plotdata)
+
+ plot_count += len(dic_param[ARRAY_PLOTDATA][proc_id])
+
+ # limit to show only 30 graphs
+ if plot_count > 30:
+ remain = 30
+ for proc_id, plot_datas in dic_param[ARRAY_PLOTDATA].items():
+ num_plot = len(plot_datas)
+ keep = min(remain, num_plot)
+ dic_param[ARRAY_PLOTDATA][proc_id] = plot_datas[:keep]
+ remain -= keep
+ remain = max(0, remain)
+
+ return dic_param
+
+
+@log_execution_time()
+def gen_agg_col(df: pd.DataFrame, hm_mode, hm_step):
+ """ Aggregate data by time """
+ pd_step = convert_to_pandas_step(hm_step, hm_mode)
+ print(df.index.size)
+ if hm_mode == 7:
+ # .astype(str).str[:13] or 16 sometimes doesn't work as expected
+ df[AGG_COL] = df[TIME_COL].dt.floor(pd_step).dt.strftime('%Y-%m-%d %H')
+ else:
+ df[AGG_COL] = df[TIME_COL].dt.floor(pd_step).dt.strftime('%Y-%m-%d %H:%M')
+ return df
+
+
+def gen_weekly_ticks(df: pd.DataFrame):
+ # tick weekly, first day of week, sunday
+ df['x_label'] = df[TIME_COL] - ((df[TIME_COL].dt.weekday + 1) % 7) * np.timedelta64(1, 'D')
+ df['x_label'] = get_year_week_in_df_column(df['x_label']) \
+ + "
" + df['x_label'].dt.month.astype(str).str.pad(2, fillchar='0') \
+ + "-" + df['x_label'].dt.day.astype(str).str.pad(2, fillchar='0')
+ return df['x_label']
+
+
+def get_year_week_in_df_column(column: pd.DataFrame.columns):
+ """ get year and week with format 'yy,w' -> '22, 20' """
+ return column.dt.year.astype(str).str[-2:] + ", " \
+ + (column.dt.strftime('%U').astype(int) + 1).astype(str).str.pad(2, fillchar='0')
+
+
+def convert_cell_tz(df: pd.DataFrame, offset):
+ df[TIME_COL] = df[TIME_COL] + offset
+ return df
+
+
+@log_execution_time()
+def gen_x_y(df: pd.DataFrame, hm_mode, hm_step, start_tm, end_tm, client_tz=tz.tzlocal()):
+ """ Generate x, y values and text labels of x and y axes """
+ start_dt = parser.parse(start_tm)
+ end_dt = parser.parse(end_tm)
+ diff: timedelta = end_dt - start_dt
+ num_days = diff.days
+
+ if hm_mode == 7:
+ # gen y
+ row_per_day = int(24 / hm_step)
+ df['dayofweek'] = df[TIME_COL].dt.day_name().astype(str).str[:3]
+ df['newdayofweek'] = (12 - df[TIME_COL].dt.dayofweek) % 7 # sat,fri,...,mon,sun
+ df['y'] = int(24 / hm_step) - (df[TIME_COL].dt.hour / hm_step).astype(int) + df[
+ 'newdayofweek'] * row_per_day
+
+ # gen x
+ df['year'] = df[TIME_COL].dt.year
+ min_year = df['year'].min()
+ df['x'] = df[TIME_COL].dt.strftime('%U').astype(int) + 1 + (df['year'] % min_year) * 53
+
+ # x_label
+ if num_days <= 140:
+ df['x_label'] = gen_weekly_ticks(df)
+ elif num_days <= 365 * 2:
+ # tick monthly
+ df['x_label'] = get_year_week_in_df_column(df[TIME_COL]) + '
' \
+ + df[TIME_COL].dt.month.astype(str).str.pad(2, fillchar='0') + '-01'
+ else:
+ # tick yearly
+ df['x_label'] = get_year_week_in_df_column(df[TIME_COL]) + '
01-01'
+ else:
+ # gen y
+ num_rows = int(1440 / hm_step)
+ row_per_hour = 60 / hm_step
+ df['dayofweek'] = df[TIME_COL].dt.day_name().astype(str).str[:3]
+ if hm_step > 60:
+ df['y'] = num_rows - (
+ ((df[TIME_COL].dt.minute + df[TIME_COL].dt.hour * 60) / hm_step).astype(float))
+ else:
+ df['y'] = num_rows - (
+ (df[TIME_COL].dt.minute / hm_step).astype(int) + (df[TIME_COL].dt.hour * row_per_hour).astype(int))
+
+ # gen x
+ df['year'] = df[TIME_COL].dt.year
+ min_year = df['year'].min()
+ df['x'] = df[TIME_COL].dt.dayofyear + 366 * (df['year'] % min_year)
+
+ # x_label
+ if num_days <= 21:
+ # tick daily
+ df['x_label'] = get_year_week_in_df_column(df[TIME_COL]) \
+ + "
" + df[TIME_COL].dt.date.astype(str).str[5:]
+ elif num_days <= 140:
+ df['x_label'] = gen_weekly_ticks(df)
+ elif num_days <= 365 * 2:
+ # tick monthly
+ df['x_label'] = get_year_week_in_df_column(df[TIME_COL]) + '
' \
+ + df[TIME_COL].dt.month.astype(str).str.pad(2, fillchar='0') + '-01'
+ else:
+ # tick yearly
+ df['x_label'] = get_year_week_in_df_column(df[TIME_COL]) + '
01-01'
+
+ df['from'] = 'From: ' + df[TIME_COL].astype(str).str[:16] + '
'
+ unit = 'min' if hm_mode == 1 else 'h'
+ df['to_temp'] = df[TIME_COL] + pd.to_timedelta(hm_step, unit=unit)
+ df.loc[df['to_temp'].astype(str).str[11:16] == '00:00', 'to'] = \
+ df['to_temp'].astype(str).str[:8] + df[TIME_COL].astype(str).str[8:11] + '24:00'
+ df.loc[df['to_temp'].astype(str).str[11:16] != '00:00', 'to'] = df['to_temp'].astype(str).str[:16]
+ df['to'] = 'To : ' + df['to'] + ''
+
+ return df
+
+
+@log_execution_time()
+def build_dic_col_func(dic_proc_cfgs: Dict[int, CfgProcess], graph_param: DicParam):
+ """ Each column needs an aggregate function """
+ dic_col_func = dict()
+ for proc_id, proc_config in dic_proc_cfgs.items():
+ if graph_param.is_end_proc(proc_id):
+ dic_col_func[proc_id] = dict()
+ end_col_ids = graph_param.get_sensor_cols(proc_id)
+ end_cols = proc_config.get_cols(end_col_ids)
+ for end_col in end_cols:
+ if DataType[end_col.data_type] is DataType.REAL:
+ hm_function = HMFunction[graph_param.common.hm_function_real]
+ else:
+ hm_function = HMFunction[graph_param.common.hm_function_cate]
+ dic_col_func[proc_id][end_col.id] = hm_function
+ return dic_col_func
+
+
+@log_execution_time()
+def apply_significant_digit(dic_df_col):
+ for end_col, df_end in dic_df_col.items():
+ df_end[end_col] = df_end[end_col].apply(signify_digit2)
+
+ return dic_df_col
+
+
+def append_result(dic_df_col, df_end_col, end_col_id):
+ """ Append result of each batch to the whole result """
+ if dic_df_col.get(end_col_id) is None: # no need if use default dict
+ dic_df_col[end_col_id] = df_end_col
+ else:
+ dic_df_col[end_col_id] = dic_df_col[end_col_id].append(df_end_col)
+
+
+def get_batch_data(proc_cfg: CfgProcess, graph_param: DicParam, start_tm, end_tm, sql_limit=None) -> pd.DataFrame:
+ """ Query data for each batch. """
+ batches = generate_batches(start_tm, end_tm, batch_size=7)
+ num_batches = len(batches)
+
+ for idx, (batch_start_tm, batch_end_tm) in enumerate(batches):
+ df_batch = graph_one_proc(proc_cfg.id, batch_start_tm, batch_end_tm,
+ graph_param.common.cond_procs, graph_param.array_formval, sql_limit,
+ same_proc_only=True)
+ df_batch = validate_data(df_batch)
+
+ # to report progress
+ progress = idx / num_batches
+
+ yield progress, df_batch
+
+
+def gen_empty_df(end_col_id, var_agg_col, ):
+ if var_agg_col:
+ return pd.DataFrame({TIME_COL: [], AGG_COL: [], end_col_id: [], var_agg_col: []})
+ else:
+ return pd.DataFrame({TIME_COL: [], AGG_COL: [], end_col_id: []})
+
+
+def gen_df_end_col(df_batch, end_col, var_agg_col):
+ """ Use separate data frame for each column """
+ end_col_label = gen_sql_label(end_col.id, end_col.column_name)
+ if var_agg_col:
+ df_end_col = df_batch[[TIME_COL, AGG_COL, var_agg_col, end_col_label]]
+ df_end_col[var_agg_col] = df_end_col[var_agg_col].astype(str)
+ else:
+ df_end_col = df_batch[[TIME_COL, AGG_COL, end_col_label]]
+
+ if end_col_label == var_agg_col:
+ c_id = list(df_end_col.columns).index(end_col_label)
+ df_end_col.columns.values[c_id] = end_col.id
+ else:
+ df_end_col = df_end_col.rename({end_col_label: end_col.id}, axis=1)
+ return df_end_col
+
+
+def gen_agg_col_names(var_agg_col):
+ """ If use stratify variable -> aggregate by [stratify variable, time], otherwise, aggregate by time. """
+ if var_agg_col:
+ return [var_agg_col, AGG_COL]
+ else:
+ return [AGG_COL]
+
+
+@log_execution_time()
+def graph_heatmap_data_one_proc(proc_cfg: CfgProcess, graph_param: DicParam, start_tm, end_tm, offset,
+ dic_col_func, var_agg_col=None, pct_start=0.0):
+ """ Build heatmap data for all columns of each process """
+
+ # start proc
+ proc_id = proc_cfg.id
+
+ # get end cols
+ end_col_ids = graph_param.get_sensor_cols(proc_id)
+ end_cols = proc_cfg.get_cols(end_col_ids)
+ hm_mode = int(graph_param.common.hm_mode)
+ hm_step = int(graph_param.common.hm_step)
+
+ num_rows = 0
+ dic_df_col = dict()
+ dic_filter_results = {MATCHED_FILTER_IDS: [], UNMATCHED_FILTER_IDS: [], NOT_EXACT_MATCH_FILTER_IDS: []}
+ for (progress, df_batch) in get_batch_data(proc_cfg, graph_param, start_tm, end_tm):
+ percent = pct_start + progress * 10 # to report progress
+ background_announcer.announce(percent, AnnounceEvent.SHOW_GRAPH.name)
+
+ # check filter match or not ( for GUI show )
+ matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids = \
+ main_check_filter_detail_match_graph_data(graph_param, df_batch)
+
+ # matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids
+ dic_filter_results[MATCHED_FILTER_IDS] += matched_filter_ids
+ dic_filter_results[UNMATCHED_FILTER_IDS] += unmatched_filter_ids
+ dic_filter_results[NOT_EXACT_MATCH_FILTER_IDS] += not_exact_match_filter_ids
+
+ if df_batch is None or df_batch.empty:
+ for end_col in end_cols:
+ df_end_col = gen_empty_df(end_col.id, var_agg_col)
+ append_result(dic_df_col, df_end_col, end_col.id)
+ else:
+ num_rows += df_batch.index.size
+
+ # gen aggregate endcol
+ pd_step = convert_to_pandas_step(hm_step, hm_mode)
+ df_batch: pd.DataFrame = create_agg_column(df_batch, pd_step, AGG_COL, hm_mode, offset)
+
+ # transform + aggregate
+ for end_col in end_cols:
+ agg_cols = gen_agg_col_names(var_agg_col)
+ df_end_col = gen_df_end_col(df_batch, end_col, var_agg_col)
+ hm_function = dic_col_func[proc_id][end_col.id]
+
+ df_end_col: pd.DataFrame \
+ = gen_heat_map_cell_value(df_end_col, graph_param, agg_cols, end_col.id, hm_function)
+
+ append_result(dic_df_col, df_end_col, end_col.id)
+
+ print('/////// proc_id: {}, num_rows: '.format(proc_id), num_rows)
+ dic_df_col = apply_significant_digit(dic_df_col)
+
+ return dic_df_col, dic_filter_results, num_rows
+
+
+@log_execution_time()
+def trim_data(df: pd.DataFrame, agg_cols: List, end_col):
+ """ Trim first 5% biggest and 5% smallest. Alternative, rank by pct. """
+
+ df = df.replace(dict.fromkeys([np.inf, -np.inf, np.nan], np.nan)).dropna()
+ df['rank'] = df.groupby(agg_cols)[end_col].rank(method='first')
+ df['group_size'] = df.groupby(agg_cols)['rank'].transform(np.size)
+ df['p1'] = (df['group_size'] * 0.05).transform(np.floor)
+ df['p9'] = df['group_size'] - df['p1']
+ return df[(df['p1'] <= df['rank']) & (df['rank'] <= df['p9'])].reset_index()
+
+
+def range_func(x):
+ return np.max(x) - np.min(x)
+
+
+def convert_to_pandas_step(hm_step, hm_mode):
+ """ Pandas steps are: 4h, 15min, ... """
+ if hm_mode == 7:
+ return '{}h'.format(hm_step)
+ return '{}min'.format(hm_step)
+
+
+@log_execution_time()
+def create_agg_column(df, pd_step='4h', agg_col=AGG_COL, hm_mode=7, offset=timedelta(0)):
+ """ Create aggregate column data """
+ if hm_mode == 7:
+ length = 13
+ else:
+ length = 16
+ temp = pd.to_datetime(df[TIME_COL], format='%Y-%m-%dT%H:%M') + offset
+ df[agg_col] = temp.dt.floor(pd_step).astype(str).str[:length]
+ return df
+
+
+@log_execution_time()
+def groupby_and_aggregate(df: pd.DataFrame, hm_function: HMFunction, hm_mode, hm_step, agg_cols, end_col):
+ """ Group by time and calculate aggregates """
+ if hm_function is HMFunction.count_per_hour:
+ agg_params = {end_col: HMFunction.count.name, TIME_COL: HMFunction.first.name}
+ df = df.groupby(agg_cols).agg(agg_params).reset_index()
+ if hm_mode == 7:
+ df[end_col] = df[end_col].div(hm_step)
+ else:
+ df[end_col] = df[end_col].div(hm_step / 60)
+ elif hm_function is HMFunction.count_per_min:
+ agg_params = {end_col: HMFunction.count.name, TIME_COL: HMFunction.first.name}
+ df = df.groupby(agg_cols).agg(agg_params).reset_index()
+ if hm_mode == 7:
+ df[end_col] = df[end_col].div(hm_step * 60)
+ else:
+ df[end_col] = df[end_col].div(hm_step)
+ elif hm_function is HMFunction.range:
+ agg_params = {end_col: range_func, TIME_COL: HMFunction.first.name}
+ df = df.groupby(agg_cols).agg(agg_params).reset_index()
+ elif hm_function is HMFunction.iqr:
+ agg_params = {end_col: iqr, TIME_COL: HMFunction.first.name}
+ df = df.groupby(agg_cols).agg(agg_params).reset_index()
+ elif hm_function is HMFunction.time_per_count:
+ agg_params = {end_col: HMFunction.count.name, TIME_COL: HMFunction.first.name}
+ df = df.groupby(agg_cols).agg(agg_params).reset_index()
+ step_time = (hm_step * 60) if hm_mode == 1 else (hm_step * 3600)
+ df[end_col] = step_time / df[end_col]
+ else:
+ agg_params = {end_col: hm_function.name, TIME_COL: HMFunction.first.name}
+ df = df.groupby(agg_cols).agg(agg_params).reset_index()
+ return df
+
+
+@log_execution_time()
+def gen_heat_map_cell_value(df: pd.DataFrame, graph_param: DicParam, agg_cols, end_col, hm_function: HMFunction):
+ """ Value z for each cell (x,y) """
+ hm_mode = int(graph_param.common.hm_mode)
+ hm_step = int(graph_param.common.hm_step)
+ hm_trim = int(graph_param.common.hm_trim)
+ # trim data
+ if 'count' not in hm_function.name and hm_trim:
+ df = trim_data(df, agg_cols, end_col)
+
+ # groupby + aggregate
+ df = groupby_and_aggregate(df, hm_function, hm_mode, hm_step, agg_cols, end_col)
+
+ return df
+
+
+@log_execution_time()
+def generate_batches(start_tm, end_tm, batch_size=7):
+ """ Divide [start_time, end_time] to small batches. Default 7 days for each batch. """
+ batch_start_str = reformat_dt_str(start_tm, DATE_FORMAT_QUERY)
+ batch_start = datetime.strptime(batch_start_str, DATE_FORMAT_QUERY)
+
+ batch_end = batch_start + timedelta(days=batch_size)
+ batch_end_str = datetime.strftime(batch_end, DATE_FORMAT_QUERY)
+ batch_end_str = min(batch_end_str, end_tm)
+
+ batches = [(batch_start_str, batch_end_str)]
+
+ # previous_start = batch_start
+ previous_end = batch_end
+ while batch_end_str < end_tm:
+ batch_start = previous_end
+ batch_start_str = datetime.strftime(batch_start, DATE_FORMAT_QUERY)
+
+ batch_end = batch_start + timedelta(days=batch_size)
+ batch_end_str = datetime.strftime(batch_end, DATE_FORMAT_QUERY)
+ batch_end_str = min(batch_end_str, end_tm)
+
+ batches.append((batch_start_str, batch_end_str))
+ # previous_start = batch_start
+ previous_end = batch_end
+ # break
+
+ return batches
diff --git a/histview2/api/multi_scatter_plot/controllers.py b/histview2/api/multi_scatter_plot/controllers.py
new file mode 100644
index 0000000..4e800ba
--- /dev/null
+++ b/histview2/api/multi_scatter_plot/controllers.py
@@ -0,0 +1,64 @@
+import timeit
+
+import simplejson
+from flask import Blueprint, request
+
+from histview2.api.multi_scatter_plot.services import gen_scatter_plot, remove_unused_params, gen_scatter_n_contour_data
+from histview2.common.constants import SCATTER_CONTOUR
+from histview2.common.pysize import get_size
+from histview2.common.services import http_content
+from histview2.common.services.form_env import parse_multi_filter_into_one
+from histview2.common.services.import_export_config_n_data import get_dic_form_from_debug_info, \
+ set_export_dataset_id_to_dic_param
+from histview2.common.trace_data_log import is_send_google_analytics, save_input_data_to_file, EventType
+
+api_multi_scatter_blueprint = Blueprint(
+ 'api_multi_scatter_module',
+ __name__,
+ url_prefix='/histview2/api/msp'
+)
+
+
+@api_multi_scatter_blueprint.route('/plot', methods=['POST'])
+def trace_data():
+ """
+ Trace Data API
+ return dictionary
+ """
+ start = timeit.default_timer()
+ dic_form = request.form.to_dict(flat=False)
+ save_input_data_to_file(dic_form, EventType.MSP)
+
+ use_contour = int(request.form.get('use_contour')) if request.form.get('use_contour') else 0
+ dic_param = parse_multi_filter_into_one(dic_form)
+
+ # check if we run debug mode (import mode)
+ dic_param = get_dic_form_from_debug_info(dic_param)
+
+ # if universal call gen_dframe else gen_results
+ orig_send_ga_flg = is_send_google_analytics
+ dic_param, dic_data = gen_scatter_plot(dic_param)
+
+ # generate SCATTER_CONTOUR
+ dic_param[SCATTER_CONTOUR] = gen_scatter_n_contour_data(dic_param, dic_data, use_contour)
+
+ # send Google Analytics changed flag
+ if orig_send_ga_flg and not is_send_google_analytics:
+ dic_param.update({'is_send_ga_off': True})
+
+ # remove unused params
+ remove_unused_params(dic_param)
+
+ # calculate data size to send gtag
+ data_size = get_size(dic_param)
+ dic_param['data_size'] = data_size
+
+ stop = timeit.default_timer()
+ dic_param['backend_time'] = stop - start
+
+ # export mode ( output for export mode )
+ set_export_dataset_id_to_dic_param(dic_param)
+
+ out_dict = simplejson.dumps(dic_param, ensure_ascii=False, default=http_content.json_serial, ignore_nan=True)
+
+ return out_dict, 200
diff --git a/histview2/api/multi_scatter_plot/services.py b/histview2/api/multi_scatter_plot/services.py
new file mode 100644
index 0000000..b981818
--- /dev/null
+++ b/histview2/api/multi_scatter_plot/services.py
@@ -0,0 +1,489 @@
+import numpy as np
+import pandas as pd
+from loguru import logger
+from scipy.linalg import pinv
+from scipy.signal import convolve2d
+from scipy.stats import gaussian_kde, binned_statistic_2d
+
+from histview2.api.trace_data.services.time_series_chart import (get_data_from_db, get_chart_infos,
+ gen_plotdata, make_irregular_data_none,
+ get_procs_in_dic_param, gen_dic_data_from_df,
+ main_check_filter_detail_match_graph_data,
+ calc_setting_scale_y)
+from histview2.common.constants import ARRAY_FORMVAL, ARRAY_PLOTDATA, ACTUAL_RECORD_NUMBER, \
+ IS_RES_LIMITED, ARRAY_Y, MATCHED_FILTER_IDS, UNMATCHED_FILTER_IDS, NOT_EXACT_MATCH_FILTER_IDS, END_PROC, \
+ GET02_VALS_SELECT, END_COL_ID, CORRS, CORR, PCORR, ARRAY_X, SCALE_SETTING, KDE_DATA, NTOTALS, \
+ NORMAL_MODE_MAX_RECORD
+from histview2.common.memoize import memoize
+from histview2.common.services.ana_inf_data import calculate_kde_trace_data
+from histview2.common.services.form_env import bind_dic_param_to_class
+from histview2.common.services.sse import notify_progress
+from histview2.common.sigificant_digit import signify_digit_vector, signify_digit
+from histview2.common.trace_data_log import *
+from histview2.trace_data.models import Cycle
+from sklearn.preprocessing import StandardScaler
+
+
+@log_execution_time('[MULTI SCATTER PLOT]')
+@notify_progress(60)
+@trace_log((TraceErrKey.TYPE, TraceErrKey.ACTION, TraceErrKey.TARGET),
+ (EventType.MSP, EventAction.PLOT, Target.GRAPH), send_ga=True)
+@memoize(is_save_file=True)
+def gen_scatter_plot(dic_param):
+ """tracing data to show graph
+ 1 start point x n end point
+ filter by condition points that between start point and end_point
+ """
+ # bind dic_param
+ graph_param = bind_dic_param_to_class(dic_param)
+ dic_proc_cfgs = get_procs_in_dic_param(graph_param)
+
+ # add start proc
+ graph_param.add_start_proc_to_array_formval()
+
+ # add condition procs
+ graph_param.add_cond_procs_to_array_formval()
+
+ dic_proc_name = {}
+ # get serials
+ for proc in graph_param.array_formval:
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ serial_ids = [serial.id for serial in proc_cfg.get_serials(column_name_only=False)]
+ proc.add_cols(serial_ids)
+ dic_proc_name[proc.proc_id] = proc_cfg.name
+
+ # get data from database
+ df, actual_record_number, is_res_limited = get_data_from_db(graph_param)
+
+ # check filter match or not ( for GUI show )
+ matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids = main_check_filter_detail_match_graph_data(
+ graph_param, df)
+
+ # matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids
+ dic_param[MATCHED_FILTER_IDS] = matched_filter_ids
+ dic_param[UNMATCHED_FILTER_IDS] = unmatched_filter_ids
+ dic_param[NOT_EXACT_MATCH_FILTER_IDS] = not_exact_match_filter_ids
+ dic_param['proc_name'] = dic_proc_name
+
+ # create output data
+ orig_graph_param = bind_dic_param_to_class(dic_param)
+ dic_data = gen_dic_data_from_df(df, orig_graph_param)
+ times = df[Cycle.time.key].tolist() or []
+
+ # TODO: ask Tinh san how about serial hover
+ # gen_dic_serial_data_scatter(df, dic_proc_cfgs, dic_param)
+
+ # get chart infos
+ chart_infos, chart_infos_org = get_chart_infos(orig_graph_param, dic_data, times)
+
+ dic_param[ARRAY_FORMVAL], dic_param[ARRAY_PLOTDATA] = \
+ gen_plotdata(orig_graph_param, dic_data, chart_infos, chart_infos_org, reorder=False)
+ dic_param[ACTUAL_RECORD_NUMBER] = actual_record_number
+
+ # calculate_summaries
+ # calc_summaries(dic_param)
+
+ # scale settings
+ # min_max_list, all_graph_min, all_graph_max = calc_raw_common_scale_y(dic_param[ARRAY_PLOTDATA])
+ # calc_scale_info(dic_param[ARRAY_PLOTDATA], min_max_list, all_graph_min, all_graph_max)
+
+ # flag to show that trace result was limited
+ dic_param[IS_RES_LIMITED] = is_res_limited
+
+ # convert irregular data
+ make_irregular_data_none(dic_param)
+
+ # remove none data points
+ # remove_none_data(dic_param)
+
+ # generate kde for each trace output array
+ # dic_param = gen_kde_data_trace_data(dic_param)
+ for plotdata in dic_param.get(ARRAY_PLOTDATA, []):
+ series_y = pd.Series(plotdata.get(ARRAY_Y, []))
+ plotdata[SCALE_SETTING] = calc_setting_scale_y(plotdata, series_y)
+ plotdata[SCALE_SETTING][KDE_DATA], *_ = calculate_kde_trace_data(plotdata)
+
+ # partial correlation
+ calc_partial_corr(dic_param)
+
+ return dic_param, dic_data
+
+
+@log_execution_time()
+def remove_none_data(dic_param):
+ num_sensors = len(dic_param.get(ARRAY_PLOTDATA) or [])
+ if not num_sensors or not dic_param[ARRAY_PLOTDATA][0] or not len(dic_param[ARRAY_PLOTDATA][0].get(ARRAY_Y) or []):
+ return
+
+ # find none vals
+ array_ys = zip(*[dic_param[ARRAY_PLOTDATA][ss].get(ARRAY_Y) or [] for ss in range(num_sensors)])
+ list_nones = [idx for idx, vals in enumerate(array_ys) if any([v is None or np.isnan(v) for v in vals])]
+
+ # remove none vals
+ num_points = len(dic_param[ARRAY_PLOTDATA][0].get(ARRAY_Y) or [])
+ for ss in range(num_sensors):
+ array_y = dic_param[ARRAY_PLOTDATA][ss][ARRAY_Y]
+ dic_param[ARRAY_PLOTDATA][ss][ARRAY_Y] = [array_y[i] for i in range(num_points) if i not in list_nones]
+
+
+@log_execution_time()
+def gen_scatter_n_contour_data_pair(array_y1, array_y2, use_contour):
+ # parameters
+ num_bins = 50 # 50 x 50 cells
+ outlier_ratio = 0.05 # ratio for number of points to plot
+ max_num_points = 1000 # maximum number of points for scatter plot
+ max_num_points_kde = 10000 # maximum number of points for kde
+
+ df = pd.DataFrame({'array_y1': array_y1, 'array_y2': array_y2})
+ df = df.replace(dict.fromkeys([np.inf, -np.inf, np.nan], np.nan)).dropna()
+ array_y1 = df['array_y1'].to_numpy()
+ array_y2 = df['array_y2'].to_numpy()
+
+ contour_data = {
+ 'x': [],
+ 'y': [],
+ 'z': [],
+ 'contours_coloring': 'heatmap',
+ 'line_width': 0
+ }
+ scatter_data = {
+ 'x': [],
+ 'y': [],
+ 'mode': 'markers'
+ }
+
+ if len(array_y1) < 2 or len(array_y2) < 2:
+ return [contour_data, scatter_data]
+
+ try:
+ # fit kde and generate/evaluate gridpoints
+ kernel = fit_2d_kde(array_y1, array_y2, max_num_points_kde)
+ hist = calc_2d_hist(array_y1, array_y2, num_bins)
+ kde_gridpoints, x_grid, y_grid = calc_kde_gridpoints(kernel, hist.x_edge, hist.y_edge)
+
+ # for contour: store x-y value and density of each gridpoints
+ # dic_contour = {'x': x_grid, 'y': y_grid, 'z': kde_gridpoints.ravel()} # TODO comment out for now
+
+ if len(array_y1) < NORMAL_MODE_MAX_RECORD and not use_contour:
+ # return full point
+ scatter_data = {
+ 'x': signify_digit_vector(array_y1),
+ 'y': signify_digit_vector(array_y2),
+ 'mode': 'markers'
+ }
+
+ if len(array_y1) >= NORMAL_MODE_MAX_RECORD or use_contour:
+ # return contour;
+ num_outliers = np.min([int(len(array_y1) * outlier_ratio), max_num_points])
+ flg_outlier = get_outlier_flg(hist, kde_gridpoints, num_outliers, num_bins)
+ # normalize and change to log scale (fit to Logarithm)
+ z_value = np.log(kde_gridpoints.ravel() / np.max(kde_gridpoints.ravel()) * 1000 + 1)
+ contour_data = {
+ 'x': signify_digit_vector(x_grid),
+ 'y': signify_digit_vector(y_grid),
+ 'z': signify_digit_vector(z_value),
+ 'contours_coloring': 'heatmap',
+ 'line_width': 1
+ }
+
+ scatter_data = {
+ 'x': signify_digit_vector(array_y1[flg_outlier]),
+ 'y': signify_digit_vector(array_y2[flg_outlier]),
+ 'mode': 'markers'
+ }
+
+ except Exception as ex:
+ logger.exception(ex)
+
+ return [contour_data, scatter_data]
+
+
+@log_execution_time()
+def fit_2d_kde(x, y, max_num_points_kde=10000):
+ """
+ Fit density estimator
+ scipy.stats.gaussian_kde is used.
+
+ Parameters
+ ----------
+ x, y: array
+ Raw data
+ max_num_points_kde: int
+ x and y is under-sampled to this length
+
+ Returns
+ -------
+ kernel:
+ Fitted gaussian_kde
+ """
+
+ idx_sample = np.arange(len(x))
+ if len(x) > max_num_points_kde:
+ idx_sample = np.random.choice(np.arange(len(x)), size=max_num_points_kde)
+ x = x[idx_sample].copy()
+ y = y[idx_sample].copy()
+
+ # add jitter when x and y have almost complete linear correlation, or constant value
+ x, y = add_jitter_for_kde(x, y)
+
+ kernel = gaussian_kde(np.vstack((x, y)))
+ # kde with Silverman bandwidth estimation method
+ # kernel = gen_gaussian_kde_1d_same_as_r(np.vstack((x, y)))
+ return kernel
+
+
+@log_execution_time()
+def add_jitter_for_kde(x, y):
+ """
+ Add jitter (random noise) to x, y
+ If x, y have almost complete linear correlation, or constant value.
+ This is due to usage of inverse covariance matrix in gaussian_kde()
+
+ Parameters
+ ----------
+ x, y: array
+ Raw data
+
+ Returns
+ -------
+ x_, y_: array
+ Added jitter if necessary
+ """
+
+ xrng = (np.nanmax(x) - np.nanmin(x))
+ yrng = (np.nanmax(y) - np.nanmin(y))
+
+ add_jitter = False
+ if (xrng == 0.0) or (yrng == 0.0):
+ add_jitter = True
+ elif np.abs(np.corrcoef(x, y)[0, 1]) > 0.999:
+ # corrcoef returns nan if y or x is constant
+ add_jitter = True
+
+ if add_jitter:
+ x_offset = xrng / 50
+ y_offset = yrng / 50
+ x_offset = x_offset if xrng > 0.0 else 0.01
+ y_offset = y_offset if yrng > 0.0 else 0.01
+ x_ = x + np.random.uniform(-x_offset, x_offset, len(x))
+ y_ = y + np.random.uniform(-y_offset, y_offset, len(y))
+ return x_, y_
+
+ return x, y
+
+
+@log_execution_time()
+def calc_2d_hist(x, y, num_bins):
+ """
+ Calculate 2D histogram
+ scipy.stats.binned_statistic_2d is used.
+
+ Parameters
+ ----------
+ x, y: array
+ Raw data
+ num_bins: int
+ Total number of cells = num_bins^2
+
+ Returns
+ -------
+ hist:
+ Fitted 2D histogram.
+ Counts for each cell, and x-y value for binning is contained.
+ """
+
+ # range of x and y
+ xmin = np.nanmin(x)
+ xmax = np.nanmax(x)
+ ymin = np.nanmin(y)
+ ymax = np.nanmax(y)
+
+ # 2D histogram. TypeError when values=None
+ # https://stackoverflow.com/questions/60623899/why-is-binned-statistic-2d-now-throwing-typeerror
+ hist = binned_statistic_2d(x=x, y=y, values=x, statistic='count',
+ bins=num_bins, range=[[xmin, xmax], [ymin, ymax]])
+ return hist
+
+
+@log_execution_time()
+def calc_kde_gridpoints(kernel, x_edge, y_edge):
+ """
+ Calculate density values on each gridpoint
+
+ Parameters
+ ----------
+ kernel:
+ Fitted density estimator
+ x_edge, y_edge: array
+ Vales fo binning
+
+ Returns
+ -------
+ kde_gridpoints: array
+ Density values on each gridpoint
+ shape = len(x_edge), len(y_edge)
+ x_grid, y_grid: array
+ x-y values of each gridpoint
+ len = len(x_edge) * len(y_edge)
+ """
+
+ # calculate density of each gridpoints
+ x_grid, y_grid = np.meshgrid(x_edge, y_edge)
+ x_grid = x_grid.ravel()
+ y_grid = y_grid.ravel()
+ kde_gridpoints = kernel(np.vstack((x_grid, y_grid))).reshape((len(x_edge), len(y_edge)))
+ return kde_gridpoints, x_grid, y_grid
+
+
+@log_execution_time()
+def get_outlier_flg(hist, kde_gridpoints, num_outliers, num_bins):
+ """
+ Get outlier flag of each data point
+ Data with low density is estimated as outliers.
+
+ Parameters
+ ----------
+ hist:
+ Fitted 2D histogram
+ kde_gridpoints: array
+ Density values on each gridpoint
+ num_outliers: int
+ Number of outliers to show in scatter plot
+
+ Returns
+ -------
+ flg_outlier: array
+ True/False values
+ """
+
+ num_cells = hist.statistic.shape[0]
+
+ # we have to be careful that
+ # `hist.binnumber` returns bin index of (num_bins+2, num_bins+2) array,
+ # where +2 is for boundaries of each dimension
+ # https://stackoverflow.com/questions/31708773/translate-scipy-stats-binned-statistic-2ds-binnumber-to-a-x-y-bin
+ idx_cells_scipy = (np.arange(0, (num_cells + 2) ** 2)).reshape(num_cells + 2, num_cells + 2)
+ idx_cells_scipy = idx_cells_scipy[1:(num_cells + 1), 1:(num_cells + 1)].ravel()
+
+ # density of each cells (average of surrounding girdpoints)
+ ave_filter = np.ones((2, 2)) / 4.0
+ kde_cells = convolve2d(ave_filter, kde_gridpoints, mode='valid')
+
+ cnts = hist.statistic.ravel()
+ idx_cells = np.argsort(kde_cells.T.ravel())
+
+ csum = 0
+ for k, cell in enumerate(idx_cells):
+ csum += cnts[cell]
+ if csum > num_outliers:
+ break
+
+ idx_outlier_cells = idx_cells_scipy[idx_cells[:(k + 1)]]
+ flg_outlier = np.isin(hist.binnumber, idx_outlier_cells)
+ return flg_outlier
+
+
+@log_execution_time()
+def gen_scatter_n_contour_data(dic_param: dict, dic_data, use_contour):
+ scatter_contours = {}
+ array_formval = dic_param[ARRAY_FORMVAL]
+ num_sensor = len(array_formval)
+ for i in range(num_sensor - 1):
+ c_idx = i + 1
+ array_formval_i = array_formval[i]
+ proc_id_i = array_formval_i.get(END_PROC)
+ col_id_i = array_formval_i.get(GET02_VALS_SELECT)
+ array_y_i = dic_data[proc_id_i][col_id_i]
+
+ for k in range(i + 1, num_sensor):
+ r_idx = k + 1
+ array_formval_k = array_formval[k]
+ proc_id_k = array_formval_k.get(END_PROC)
+ col_id_k = array_formval_k.get(GET02_VALS_SELECT)
+ array_y_k = dic_data[proc_id_k][col_id_k]
+
+ contour_data, scatter_data = gen_scatter_n_contour_data_pair(array_y_i, array_y_k, use_contour)
+ scatter_contours['{}-{}'.format(r_idx, c_idx)] = {
+ 'contour_data': contour_data,
+ 'scatter_data': scatter_data,
+ 'proc_id_x': proc_id_i,
+ 'col_id_x': col_id_i,
+ 'proc_id_y': proc_id_k,
+ 'col_id_y': col_id_k,
+ }
+
+ return scatter_contours
+
+
+@log_execution_time()
+def partial_corr(data):
+ # transpose dataframe before compute correlation
+ # correlation_mat = np.corrcoef(data.T)
+ correlation_mat = np.cov(data.T, ddof=0)
+ # It is safer to calculate inverse by (Moore-Penrose) pseudo inverse, in case of singular matrix
+ precision_mat = pinv(correlation_mat)
+
+ parcor_mat = np.zeros_like(correlation_mat)
+ np.fill_diagonal(parcor_mat, np.diag(correlation_mat))
+
+ rowidx, colidx = np.triu_indices(parcor_mat.shape[0])
+ for i, j in zip(rowidx, colidx):
+ if i == j:
+ continue
+ parcor = - precision_mat[i, j] / np.sqrt(precision_mat[i, i] * precision_mat[j, j])
+ parcor_mat[i, j] = parcor
+ parcor_mat[j, i] = parcor
+
+ return parcor_mat, correlation_mat
+
+
+@log_execution_time()
+def calc_partial_corr(dic_param):
+ plot_list = {}
+ for plotdata in dic_param[ARRAY_PLOTDATA]:
+ plot_list[plotdata[END_COL_ID]] = plotdata[ARRAY_Y]
+
+ df = pd.DataFrame(plot_list)
+ df.dropna(inplace=True)
+ columns = df.columns.to_list()
+
+ corrs = {
+ CORR: {}, # correlation coefficient
+ PCORR: {}, # partial correlation
+ NTOTALS: {}
+ }
+
+ if df.shape[0]:
+ scaler = StandardScaler()
+ data = scaler.fit_transform(df)
+ p_corr_mat, corr_mat = partial_corr(data)
+
+ # df:
+ # 0 col1 col2 col3 col4
+ # col1 1 x x x
+ # col2 x 1 x x
+ # col3 x x 1 x
+ # col4 x x x 1
+
+ # push item into dict_param
+ for k, col in enumerate(columns):
+ corrs[CORR][col] = {}
+ corrs[PCORR][col] = {}
+ corrs[NTOTALS][col] = df.shape[0]
+ for i, row in enumerate(columns):
+ corrs[CORR][col][row] = signify_digit(corr_mat[k][i])
+ corrs[PCORR][col][row] = signify_digit(p_corr_mat[k][i])
+
+ dic_param[CORRS] = corrs
+ return dic_param
+
+
+def remove_unused_params(dic_param):
+ for plot_data in dic_param[ARRAY_PLOTDATA]:
+ del plot_data[ARRAY_X]
+ del plot_data[ARRAY_Y]
+
+ # del dic_param[SERIAL_DATA]
+ # del dic_param[TIMES]
+
+ return dic_param
diff --git a/histview2/api/parallel_plot/controllers.py b/histview2/api/parallel_plot/controllers.py
new file mode 100644
index 0000000..1d63db1
--- /dev/null
+++ b/histview2/api/parallel_plot/controllers.py
@@ -0,0 +1,59 @@
+import timeit
+
+import simplejson
+from flask import Blueprint, request
+
+from histview2 import dic_yaml_config_file
+from histview2.api.parallel_plot.services import gen_graph_paracords
+from histview2.common.constants import *
+from histview2.common.services import http_content
+from histview2.common.services.form_env import parse_multi_filter_into_one
+from histview2.common.services.import_export_config_n_data import get_dic_form_from_debug_info, \
+ set_export_dataset_id_to_dic_param
+from histview2.common.trace_data_log import save_input_data_to_file, EventType
+
+api_paracords_blueprint = Blueprint(
+ 'api_paracords',
+ __name__,
+ url_prefix='/histview2/api/pcp'
+)
+
+# ローカルパラメータの設定
+local_params = {
+ "config_yaml_fname_proc": dic_yaml_config_file[YAML_PROC],
+ "config_yaml_fname_histview2": dic_yaml_config_file[YAML_CONFIG_HISTVIEW2],
+ "config_yaml_fname_db": dic_yaml_config_file[YAML_CONFIG_DB],
+}
+
+
+@api_paracords_blueprint.route('/index', methods=['POST'])
+def trace_data():
+ """
+ Trace Data API
+ return dictionary
+ """
+ start = timeit.default_timer()
+ dic_form = request.form.to_dict(flat=False)
+ save_input_data_to_file(dic_form, EventType.PCP)
+ dic_param = parse_multi_filter_into_one(dic_form)
+
+ # check if we run debug mode (import mode)
+ dic_param = get_dic_form_from_debug_info(dic_param)
+
+ dic_param = gen_graph_paracords(dic_param)
+
+ stop = timeit.default_timer()
+ dic_param['backend_time'] = stop - start
+
+ # export mode ( output for export mode )
+ set_export_dataset_id_to_dic_param(dic_param)
+
+ # trace_data.htmlをもとにHTML生成
+ out_dict = simplejson.dumps(dic_param, ensure_ascii=False, default=http_content.json_serial, ignore_nan=True)
+ return out_dict, 200
+
+
+@api_paracords_blueprint.route('/testme', methods=['GET'])
+def testme():
+ # TODO: remove API test function
+ return 'OK', 200
diff --git a/histview2/api/parallel_plot/services.py b/histview2/api/parallel_plot/services.py
new file mode 100644
index 0000000..6a3e6c0
--- /dev/null
+++ b/histview2/api/parallel_plot/services.py
@@ -0,0 +1,1142 @@
+import json
+import traceback
+from collections import defaultdict
+from typing import Dict, List
+
+import numpy as np
+import pandas as pd
+from loguru import logger
+from numpy import quantile
+from pandas import DataFrame
+from sqlalchemy import and_, or_
+
+from histview2 import db
+from histview2.api.analyze.services.pca import remove_outlier
+from histview2.api.trace_data.services.regex_infinity import validate_numeric_minus, validate_numeric_plus, \
+ validate_string
+from histview2.api.trace_data.services.time_series_chart import main_check_filter_detail_match_graph_data, \
+ get_data_from_db
+from histview2.common.common_utils import convert_time, add_days, gen_sql_label, \
+ gen_sql_like_value, chunks
+from histview2.common.constants import *
+from histview2.common.logger import log_execution_time
+from histview2.common.memoize import memoize
+from histview2.common.scheduler import dic_running_job, JobType
+from histview2.common.services.form_env import bind_dic_param_to_class
+from histview2.common.services.sse import notify_progress
+from histview2.common.trace_data_log import trace_log, TraceErrKey, EventAction, Target, EventType
+from histview2.setting_module.models import CfgConstant, CfgProcess, CfgProcessColumn, CfgFilterDetail
+from histview2.trace_data.models import find_cycle_class, GlobalRelation, Sensor, Cycle, find_sensor_class
+from histview2.trace_data.schemas import DicParam, EndProc, ConditionProc, CategoryProc
+
+
+@log_execution_time('[TRACE DATA]')
+@notify_progress(60)
+@trace_log((TraceErrKey.TYPE, TraceErrKey.ACTION, TraceErrKey.TARGET),
+ (EventType.PCP, EventAction.PLOT, Target.GRAPH), send_ga=True)
+@memoize(is_save_file=True)
+def gen_graph_paracords(dic_param):
+ """tracing data to show graph
+ 1 start point x n end point
+ filter by condition point
+ https://files.slack.com/files-pri/TJHPR9BN3-F01GG67J84C/image.pngnts that between start point and end_point
+ """
+ # bind dic_param
+ graph_param = bind_dic_param_to_class(dic_param)
+
+ dic_proc_cfgs = get_procs_in_dic_param(graph_param)
+
+ # add start proc
+ graph_param.add_start_proc_to_array_formval()
+
+ # add condition procs
+ graph_param.add_cond_procs_to_array_formval()
+
+ # add category
+ graph_param.add_cate_procs_to_array_formval()
+
+ # get serials
+ for proc in graph_param.array_formval:
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ serial_ids = [serial.id for serial in proc_cfg.get_serials(column_name_only=False)]
+ proc.add_cols(serial_ids)
+
+ # get data from database
+ df, actual_record_number, is_res_limited = get_data_from_db(graph_param)
+
+ # check filter match or not ( for GUI show )
+ matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids = main_check_filter_detail_match_graph_data(
+ graph_param, df)
+
+ # matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids
+ dic_param[MATCHED_FILTER_IDS] = matched_filter_ids
+ dic_param[UNMATCHED_FILTER_IDS] = unmatched_filter_ids
+ dic_param[NOT_EXACT_MATCH_FILTER_IDS] = not_exact_match_filter_ids
+
+ # order data by serials
+ # df = check_and_order_data(df, graph_param, dic_proc_cfgs)
+ if int(dic_param[COMMON][IS_REMOVE_OUTLIER]) == 1:
+ numeric_cols = []
+ objective_var = int(dic_param[COMMON]['objectiveVar'][0]) if dic_param[COMMON]['objectiveVar'] else None
+ for proc in graph_param.array_formval:
+ end_cols = []
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ end_col_ids = graph_param.get_sensor_cols(proc.proc_id)
+ if objective_var and objective_var in end_col_ids:
+ end_cols = proc_cfg.get_cols([objective_var])
+
+ if len(end_cols):
+ numeric_cols = [gen_sql_label(col.id, col.column_name) for col in end_cols
+ if DataType[col.data_type] in [DataType.REAL, DataType.INTEGER]]
+
+ df_numeric: DataFrame = df[numeric_cols]
+ df_numeric = remove_outlier(df_numeric)
+ df[numeric_cols] = df_numeric[numeric_cols].to_numpy()
+ df = df.dropna(subset=numeric_cols)
+
+ # flag to show that trace result was limited
+ dic_param[DATA_SIZE] = df.memory_usage(deep=True).sum()
+ dic_param[IS_RES_LIMITED] = is_res_limited
+
+ # create output data
+ orig_graph_param = bind_dic_param_to_class(dic_param)
+ orig_graph_param.add_cate_procs_to_array_formval()
+ dic_data = gen_dic_data_from_df(df, orig_graph_param)
+ gen_dic_serial_data_from_df(df, dic_proc_cfgs, dic_param)
+
+ dic_param[ARRAY_FORMVAL], dic_param[ARRAY_PLOTDATA] \
+ = gen_plotdata(orig_graph_param, dic_data, dic_proc_cfgs)
+ dic_param[ACTUAL_RECORD_NUMBER] = actual_record_number
+
+ return dic_param
+
+
+def gen_blank_df_end_col(proc: EndProc):
+ params = {gen_sql_label(col_id, proc.col_names[idx]): [] for idx, col_id in enumerate(proc.col_ids)}
+ params.update({Cycle.id.key: [], Cycle.global_id.key: [], Cycle.time.key: []})
+ return pd.DataFrame(params)
+
+
+def gen_df_end(proc: EndProc, proc_cfg: CfgProcess, start_relate_ids=None, start_tm=None, end_tm=None):
+ proc_id = proc.proc_id
+
+ # get serials
+ serials = proc_cfg.get_serials(column_name_only=False)
+ serials = [gen_sql_label(serial.id, serial.column_name) for serial in serials]
+
+ # get sensor values
+ df_end = get_sensor_values(proc, start_relate_ids=start_relate_ids, start_tm=start_tm, end_tm=end_tm)
+ if df_end.empty:
+ df_end = gen_blank_df_end_col(proc)
+
+ # filter duplicate
+ if df_end.columns.size:
+ df_end = df_end[df_end.eval('global_id.notnull()')]
+
+ # drop duplicate
+ if df_end.columns.size:
+ df_end = df_end.drop_duplicates(subset=serials, keep='last')
+
+ # set index
+ if df_end.columns.size:
+ df_end.set_index(Cycle.global_id.key, inplace=True)
+
+ return df_end
+
+
+def gen_df_end_same_with_start(proc: EndProc, proc_cfg: CfgProcess, start_tm, end_tm, drop_duplicate=True):
+ # proc_id = proc.proc_id
+
+ # get serials
+ serials = proc_cfg.get_serials(column_name_only=False)
+ serials = [gen_sql_label(serial.id, serial.column_name) for serial in serials]
+
+ # get sensor values
+ df_end = get_sensor_values(proc, start_tm=start_tm, end_tm=end_tm, use_global_id=False)
+ if df_end.empty:
+ return pd.DataFrame()
+
+ df_end.set_index(Cycle.id.key, inplace=True)
+
+ # if only 1 proc, show all data without filter duplicate
+ if drop_duplicate: # TODO ask PO
+ df_end.drop_duplicates(subset=serials, keep='last', inplace=True)
+
+ return df_end
+
+
+def filter_proc_same_with_start(proc: ConditionProc, start_tm, end_tm):
+ if not proc.dic_col_id_filters:
+ return None
+
+ cond_records = get_cond_data(proc, start_tm=start_tm, end_tm=end_tm, use_global_id=False)
+ # important : None is no filter, [] is no data
+ if cond_records is None:
+ return None
+
+ return [cycle.id for cycle in cond_records]
+
+
+def filter_proc(proc: ConditionProc, start_relate_ids=None, start_tm=None, end_tm=None):
+ if not proc.dic_col_id_filters:
+ return None
+
+ cond_records = get_cond_data(proc, start_relate_ids=start_relate_ids, start_tm=start_tm, end_tm=end_tm)
+ # important : None is no filter, [] is no data
+ if cond_records is None:
+ return None
+
+ return [cycle.global_id for cycle in cond_records]
+
+
+def create_rsuffix(proc_id):
+ return '_{}'.format(proc_id)
+
+
+def graph_one_proc(proc_cfg: CfgProcess, graph_param: DicParam, start_tm, end_tm, sql_limit):
+ """ get data from database
+
+ Arguments:
+ trace {[type]} -- [description]
+ dic_param {[type]} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+
+ # start proc
+ # proc_id = graph_param.common.start_proc
+ proc_id = proc_cfg.id # or graph_param.common.start_proc
+ data = get_start_proc_data(proc_id, start_tm, end_tm, with_limit=sql_limit, with_time_order=True)
+ # no data
+ if not data:
+ return gen_blank_df(graph_param)
+
+ df_start = pd.DataFrame(data)
+ df_start.set_index(Cycle.id.key, inplace=True)
+
+ # condition
+ for proc in graph_param.common.cond_procs:
+ ids = filter_proc_same_with_start(proc, start_tm, end_tm)
+ if ids is None:
+ continue
+
+ df_start = df_start[df_start.index.isin(ids)]
+
+ # end proc
+ for proc in graph_param.array_formval:
+ df_end = gen_df_end_same_with_start(proc, proc_cfg, start_tm, end_tm, drop_duplicate=False)
+ df_start = df_start.join(df_end, rsuffix=create_rsuffix(proc.proc_id)).reset_index()
+
+ return df_start
+
+
+def graph_many_proc(dic_proc_cfgs: Dict[int, CfgProcess], graph_param: DicParam, start_tm, end_tm, sql_limit):
+ """ get data from database
+
+ Arguments:
+ trace {[type]} -- [description]
+ dic_param {[type]} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+ # start proc
+ start_proc_id = graph_param.common.start_proc
+
+ # without relate
+ data = get_start_proc_data(start_proc_id, start_tm, end_tm, with_limit=sql_limit)
+ # no data
+ if not data:
+ return gen_blank_df(graph_param), False
+
+ df_start = pd.DataFrame(data)
+
+ # with relate
+ data_with_relate_id = get_start_proc_data_with_relate_id(start_proc_id, start_tm, end_tm, with_limit=sql_limit)
+ if data_with_relate_id:
+ df_start_with_relate_id = pd.DataFrame(data_with_relate_id)
+ df_start = df_start.append(df_start_with_relate_id, ignore_index=True)
+
+ # downcast data type
+ # data_types = {Cycle.global_id.key: np.int64, Cycle.is_outlier.key: 'category'}
+ # for col in data_types:
+ # df_start[col].replace({np.nan: None}, inplace=True)
+ # df_start = df_start.astype(data_types)
+
+ start_relate_ids = list(df_start[df_start.eval('global_id.notnull()')][Cycle.global_id.key])
+
+ is_res_limited = True
+ if len(start_relate_ids) < 5000:
+ start_relate_ids = [start_relate_ids[x:x + 900] for x in range(0, len(start_relate_ids), 900)]
+ is_res_limited = False
+ else:
+ start_relate_ids = None
+
+ # set index
+ df_start.set_index(Cycle.id.key, drop=False, inplace=True)
+
+ # condition that same with start
+ cycle_ids = None
+ is_filter = False
+ for proc in graph_param.common.cond_procs:
+ if not proc.proc_id == start_proc_id:
+ continue
+
+ ids = filter_proc_same_with_start(proc, start_tm, end_tm)
+ if ids is None:
+ continue
+
+ if cycle_ids is None:
+ cycle_ids = set(ids)
+ else:
+ cycle_ids.intersection_update(ids)
+
+ is_filter = True
+
+ if is_filter:
+ df_start = df_start[df_start.index.isin(cycle_ids)]
+ if not df_start.columns.size:
+ return gen_blank_df(graph_param), False
+
+ # end proc that same with start
+ for proc in graph_param.array_formval:
+ if not proc.proc_id == start_proc_id:
+ continue
+
+ # get sensor value data
+ df_end = gen_df_end_same_with_start(proc, dic_proc_cfgs[proc.proc_id], start_tm, end_tm)
+ df_start = df_start.join(df_end, how='inner', rsuffix=create_rsuffix(proc.proc_id))
+
+ if not df_start.columns.size:
+ return gen_blank_df(graph_param), False
+
+ # get min max time {proc_id:[min,max]}
+ e_start_tm = convert_time(start_tm, return_string=False)
+ e_start_tm = add_days(e_start_tm, -14)
+ e_start_tm = convert_time(e_start_tm)
+ e_end_tm = convert_time(end_tm, return_string=False)
+ e_end_tm = add_days(e_end_tm, 14)
+ e_end_tm = convert_time(e_end_tm)
+
+ global_ids = None
+ is_filter = False
+ for proc in graph_param.common.cond_procs:
+ if proc.proc_id == start_proc_id:
+ continue
+
+ ids = filter_proc(proc, start_relate_ids, e_start_tm, e_end_tm)
+ if ids is None:
+ continue
+
+ if global_ids is None:
+ global_ids = set(ids)
+ else:
+ global_ids.intersection_update(ids)
+
+ is_filter = True
+
+ if is_filter:
+ if data_with_relate_id:
+ idxs = df_start[df_start[Cycle.global_id.key].isin(global_ids)].index
+ idxs = set(idxs)
+ df_start = df_start.loc[idxs]
+ # df_start_grp = df_start.groupby(df_start.index)
+ # df_start = df_start[
+ # df_start_grp[Cycle.global_id.key].transform(lambda sub_df: sub_df.isin(global_ids).any())]
+ else:
+ df_start = df_start[df_start[Cycle.global_id.key].isin(global_ids)]
+
+ # set new Index
+ df_start.set_index(Cycle.global_id.key, inplace=True)
+
+ # end proc
+ for proc in graph_param.array_formval:
+ if proc.proc_id == start_proc_id:
+ continue
+
+ df_end = gen_df_end(proc, dic_proc_cfgs[proc.proc_id], start_relate_ids, e_start_tm, e_end_tm)
+ df_start = df_start.join(df_end, rsuffix=create_rsuffix(proc.proc_id))
+
+ # group by cycle id to drop duplicate ( 1:n with global relation)
+ df_start.set_index(Cycle.id.key, inplace=True)
+ if data_with_relate_id:
+ df_start = df_start.groupby(df_start.index).first().reset_index()
+ # df_start = df_start.groupby(df_start.index).agg(lambda vals: vals.loc[~vals.isnull()].iloc[0])
+ # df_start = df_start.groupby(df_start.index).agg(
+ # lambda vals: next((val for val in vals if val is not None), None))
+
+ # sort by time
+ df_start.sort_values(Cycle.time.key, inplace=True)
+
+ return df_start, is_res_limited
+
+
+@log_execution_time()
+def validate_data(df):
+ # regex filter exclude columns
+ exclude_cols = [Cycle.id.key, Cycle.global_id.key, Cycle.time.key, Cycle.is_outlier.key]
+
+ # convert data types
+ df = df.convert_dtypes()
+
+ # integer cols
+ int_cols = df.select_dtypes(include='integer').columns.tolist()
+ return_vals = [pd.NA, pd.NA]
+ for col in int_cols:
+ if col in exclude_cols:
+ continue
+
+ df = validate_numeric_minus(df, col, return_vals)
+ df = validate_numeric_plus(df, col, return_vals + [pd.NA])
+
+ # float
+ float_cols = df.select_dtypes(include='float').columns.tolist()
+ return_neg_vals = [float('-inf'), float('-inf')]
+ return_pos_vals = [float('inf'), float('inf'), np.NAN]
+ for col in float_cols:
+ if col in exclude_cols:
+ continue
+
+ df = validate_numeric_minus(df, col, return_neg_vals)
+ df = validate_numeric_plus(df, col, return_pos_vals)
+
+ # non-numeric cols
+ for col in df.columns:
+ if col in exclude_cols:
+ continue
+
+ if col in int_cols or col in float_cols:
+ continue
+ df = validate_string(df, col)
+
+ return df
+
+
+@log_execution_time()
+def gen_dic_data_from_df(df: DataFrame, graph_param: DicParam):
+ dic_data = defaultdict(dict)
+ for proc in graph_param.array_formval:
+ for col_id, col_name in zip(proc.col_ids, proc.col_names):
+ sql_label = gen_sql_label(col_id, col_name)
+ if sql_label in df.columns:
+ dic_data[proc.proc_id][col_id] = df[sql_label].replace({np.nan: None}).tolist()
+ else:
+ dic_data[proc.proc_id][col_id] = [None] * df.index.size
+
+ dic_data[proc.proc_id][Cycle.time.key] = []
+ time_col_alias = '{}_{}'.format(Cycle.time.key, proc.proc_id)
+ if time_col_alias in df:
+ dic_data[proc.proc_id][Cycle.time.key] = df[time_col_alias].replace({np.nan: None}).tolist()
+
+ return dic_data
+
+
+@log_execution_time()
+def gen_dic_serial_data_from_df(df: DataFrame, dic_proc_cfgs, dic_param):
+ dic_param[SERIAL_DATA] = dict()
+ for proc_id, proc_cfg in dic_proc_cfgs.items():
+ serial_cols = proc_cfg.get_serials(column_name_only=False)
+ sql_labels = [gen_sql_label(serial_col.id, serial_col.column_name) for serial_col in serial_cols]
+ if sql_labels and all(item in df.columns for item in sql_labels):
+ dic_param[SERIAL_DATA][proc_id] = df[sql_labels] \
+ .replace({np.nan: ''}) \
+ .to_records(index=False) \
+ .tolist()
+ else:
+ dic_param[SERIAL_DATA][proc_id] = []
+
+
+@log_execution_time()
+def group_by_start_cycle(data):
+ dic_cycle_idx = {}
+ relate_ids = []
+ idxs = []
+ cycle_ids = []
+
+ cnt = 0
+ for idx, row in enumerate(data):
+ current_idx = dic_cycle_idx.get(row.id)
+ if current_idx is None:
+ dic_cycle_idx[row.id] = cnt
+
+ # relate
+ relate_ids.append(gen_relate_ids(row))
+
+ cycle_ids.append([row.id])
+ idxs.append(idx)
+ cnt += 1
+ else:
+ if row.relate_id:
+ relate_ids[current_idx].append(row.relate_id)
+
+ return cycle_ids, relate_ids, idxs
+
+
+@log_execution_time()
+def filter_data(start_proc_name, cycle_ids, relate_ids, cycle_conds):
+ if not cycle_conds:
+ return [i for i in range(len(relate_ids))]
+
+ filter_idxs = []
+ for idx, [cycle_id, relate_id] in enumerate(zip(cycle_ids, relate_ids)):
+ checks = [None] * len(cycle_conds)
+ for chk_idx, [proc_name, chk_keys] in enumerate(cycle_conds):
+ if proc_name == start_proc_name:
+ ids = cycle_id
+ else:
+ ids = relate_id
+
+ for id in ids:
+ if checks[chk_idx] is None and id in chk_keys:
+ checks[chk_idx] = True
+
+ if all(checks):
+ filter_idxs.append(idx)
+
+ return filter_idxs
+
+
+@log_execution_time()
+def gen_filtered_data(relate_ids, filtered_relate_idxs, data, data_idxs):
+ times = []
+ cycles = []
+ outliers = []
+ new_relate_ids = []
+
+ for idx in filtered_relate_idxs:
+ data_idx = data_idxs[idx]
+ row = data[data_idx]
+ new_relate_ids.append(relate_ids[idx])
+ times.append(row.time)
+ cycles.append([row.id])
+ outliers.append(row.is_outlier)
+
+ return new_relate_ids, times, cycles, outliers
+
+
+@log_execution_time()
+def gen_null_array_for_sensor(graph_param: DicParam, arr_len, show_none=True):
+ ele = None if show_none else ''
+ dic_proc_sensors = defaultdict(dict)
+
+ for proc in graph_param.array_formval:
+ for sensor in proc.col_ids:
+ dic_proc_sensors[proc.proc_id][sensor] = [ele] * arr_len
+
+ return dic_proc_sensors
+
+
+@log_execution_time()
+def gen_final_data(graphic_param: DicParam, cycle_ids, relate_ids, dic_sensor_vals, show_none=True):
+ # create null arrays
+ dic_data = gen_null_array_for_sensor(graphic_param, len(relate_ids), show_none=show_none)
+
+ for idx, [cycle_id, relate_id] in enumerate(zip(cycle_ids, relate_ids)):
+ for proc_id, dic_col_data in dic_data.items():
+ if proc_id == graphic_param.common.start_proc:
+ ids = cycle_id
+ else:
+ ids = relate_id
+
+ for id in ids:
+ if id is None:
+ continue
+
+ row = dic_sensor_vals[proc_id].get(id)
+ if not row:
+ continue
+
+ for col_id in dic_col_data:
+ # TODO : Duy name is id or column name
+ # val = getattr(row, SQL_COL_PREFIX + sensor_name, None)
+ val = getattr(row, str(col_id), None)
+ if val is None:
+ continue
+
+ dic_data[proc_id][col_id][idx] = val
+
+ return dic_data
+
+
+@log_execution_time()
+def get_proc_ids(procs):
+ pass
+
+
+@log_execution_time()
+def get_start_proc_data_with_relate_id(proc_id, start_tm, end_tm, with_limit=None):
+ """
+ inner join with relate table
+ :param proc_id:
+ :param start_tm:
+ :param end_tm:
+ :param with_limit:
+ :return:
+ """
+ # start proc subquery
+ cycle_cls = find_cycle_class(proc_id)
+ data = db.session.query(cycle_cls.id, GlobalRelation.relate_id.label(Cycle.global_id.key), cycle_cls.time,
+ cycle_cls.is_outlier)
+ data = data.filter(cycle_cls.process_id == proc_id)
+ data = data.filter(cycle_cls.time >= start_tm)
+ data = data.filter(cycle_cls.time < end_tm)
+
+ # join global relation
+ data = data.join(GlobalRelation, GlobalRelation.global_id == cycle_cls.global_id)
+
+ if with_limit:
+ data = data.limit(with_limit)
+
+ data = data.all()
+
+ return data
+
+
+@log_execution_time()
+def get_start_proc_data(proc_id, start_tm, end_tm, with_limit=None, with_time_order=None):
+ """
+ get start proc only (with out relation)
+ :param proc_id:
+ :param start_tm:
+ :param end_tm:
+ :param with_limit:
+ :param with_time_order:
+ :return:
+ """
+ cycle_cls = find_cycle_class(proc_id)
+ cycle = db.session.query(cycle_cls.id, cycle_cls.global_id, cycle_cls.time, cycle_cls.is_outlier)
+ cycle = cycle.filter(cycle_cls.process_id == proc_id)
+ cycle = cycle.filter(cycle_cls.time >= start_tm)
+ cycle = cycle.filter(cycle_cls.time < end_tm)
+
+ if with_time_order:
+ cycle = cycle.order_by(cycle_cls.time)
+
+ if with_limit:
+ cycle = cycle.limit(with_limit)
+
+ cycle = cycle.all()
+
+ return cycle
+
+
+def get_sensor_values_chunk(data_query, chunk_sensor, dic_sensors, cycle_cls, start_relate_ids, start_tm, end_tm):
+ for col_id, col_name in chunk_sensor:
+ sensor = dic_sensors[col_name]
+ sensor_val_cls = find_sensor_class(sensor.id, DataType(sensor.type), auto_alias=True)
+ sensor_val = sensor_val_cls.coef(col_id)
+
+ data_query = data_query.outerjoin(
+ sensor_val_cls,
+ and_(sensor_val_cls.cycle_id == cycle_cls.id, sensor_val_cls.sensor_id == sensor.id)
+ )
+
+ data_query = data_query.add_columns(sensor_val)
+
+ # chunk
+ if start_relate_ids:
+ records = []
+ for ids in start_relate_ids:
+ temp = data_query.filter(cycle_cls.global_id.in_(ids))
+ records += temp.all()
+ id_key = Cycle.global_id.key
+ else:
+ data_query = data_query.filter(cycle_cls.time >= start_tm)
+ data_query = data_query.filter(cycle_cls.time < end_tm)
+ records = data_query.all()
+ id_key = Cycle.id.key
+
+ if records:
+ return pd.DataFrame(records)
+ else:
+ params = {gen_sql_label(col_id, col_name) for col_id, col_name in chunk_sensor}
+ params.update({
+ id_key: [],
+ Cycle.time.key: [],
+ })
+ df_chunk = pd.DataFrame({gen_sql_label(col_id, col_name): [] for col_id, col_name in chunk_sensor})
+ return df_chunk
+
+
+@log_execution_time()
+def get_sensor_values(proc: EndProc, start_relate_ids=None, start_tm=None, end_tm=None, use_global_id=True):
+ """gen inner join sql for all column in 1 proc
+
+ Arguments:
+ proc_id {[string]} -- [process id]
+ cols {[list]} -- [column name list]
+ """
+ dic_sensors = gen_dic_sensors(proc.proc_id, proc.col_names)
+
+ cycle_cls = find_cycle_class(proc.proc_id)
+ if use_global_id:
+ data = db.session.query(cycle_cls.global_id, cycle_cls.time)
+ else:
+ data = db.session.query(cycle_cls.id, cycle_cls.time)
+
+ data = data.filter(cycle_cls.process_id == proc.proc_id)
+ dataframes = []
+ all_sensors = list(zip(proc.col_ids, proc.col_names))
+ for idx, chunk_sensor in enumerate(chunks(all_sensors, 50)):
+ df_chunk = get_sensor_values_chunk(data, chunk_sensor, dic_sensors, cycle_cls, start_relate_ids,
+ start_tm, end_tm)
+ if idx != 0 and Cycle.time.key in df_chunk.columns:
+ df_chunk = df_chunk.drop(Cycle.time.key, axis=1)
+ dataframes.append(df_chunk)
+
+ df = pd.concat([dfc.set_index(dfc.columns[0]) for dfc in dataframes], ignore_index=False, axis=1).reset_index()
+
+ return df
+
+
+@log_execution_time()
+def get_cond_data(proc: ConditionProc, start_relate_ids=None, start_tm=None, end_tm=None, use_global_id=True):
+ """generate subquery for every condition procs
+ """
+ # get sensor info ex: sensor id , data type (int,real,text)
+ filter_query = Sensor.query.filter(Sensor.process_id == proc.proc_id)
+
+ # filter
+ cycle_cls = find_cycle_class(proc.proc_id)
+ if use_global_id:
+ data = db.session.query(cycle_cls.global_id)
+ else:
+ data = db.session.query(cycle_cls.id)
+
+ data = data.filter(cycle_cls.process_id == proc.proc_id)
+
+ # for filter_sensor in filter_sensors:
+ for col_name, filter_details in proc.dic_col_name_filters.items():
+ sensor = filter_query.filter(Sensor.column_name == col_name).first()
+ sensor_val = find_sensor_class(sensor.id, DataType(sensor.type), auto_alias=True)
+
+ ands = []
+ for filter_detail in filter_details:
+ comp_ins = []
+ comp_likes = []
+ comp_regexps = []
+ cfg_filter_detail: CfgFilterDetail
+ for cfg_filter_detail in filter_detail.cfg_filter_details:
+ val = cfg_filter_detail.filter_condition
+ if cfg_filter_detail.filter_function == FilterFunc.REGEX.name:
+ comp_regexps.append(val)
+ elif not cfg_filter_detail.filter_function \
+ or cfg_filter_detail.filter_function == FilterFunc.MATCHES.name:
+ comp_ins.append(val)
+ else:
+ comp_likes.extend(gen_sql_like_value(val, FilterFunc[cfg_filter_detail.filter_function],
+ position=cfg_filter_detail.filter_from_pos))
+
+ ands.append(
+ or_(
+ sensor_val.value.in_(comp_ins),
+ *[sensor_val.value.op(SQL_REGEXP_FUNC)(val) for val in comp_regexps if val is not None],
+ *[sensor_val.value.like(val) for val in comp_likes if val is not None],
+ )
+ )
+
+ data = data.join(
+ sensor_val, and_(
+ sensor_val.cycle_id == cycle_cls.id,
+ sensor_val.sensor_id == sensor.id,
+ *ands,
+ )
+ )
+
+ # chunk
+ if start_relate_ids:
+ records = []
+ for ids in start_relate_ids:
+ temp = data.filter(cycle_cls.global_id.in_(ids))
+ records += temp.all()
+ else:
+ data = data.filter(cycle_cls.time >= start_tm)
+ data = data.filter(cycle_cls.time < end_tm)
+ records = data.all()
+
+ return records
+
+
+@log_execution_time()
+def gen_dic_sensors(proc_id, cols=None):
+ """gen dictionary of sensors
+ {column_name: T_sensor instance}
+
+ Arguments:
+ proc_id {string} -- process id
+ """
+
+ sensors = Sensor.query.filter(Sensor.process_id == proc_id)
+ if cols:
+ sensors = sensors.filter(Sensor.column_name.in_(cols))
+
+ return {sensor.column_name: sensor for sensor in sensors}
+
+
+def order_end_proc_sensor(orig_graph_param: DicParam):
+ dic_orders = {}
+ for proc in orig_graph_param.array_formval:
+ proc_id = proc.proc_id
+ orders = CfgConstant.get_value_by_type_name(type=CfgConstantType.TS_CARD_ORDER.name, name=proc_id) or '{}'
+ orders = json.loads(orders)
+ if orders:
+ dic_orders[proc_id] = orders
+
+ lst_proc_end_col = []
+ for proc in orig_graph_param.array_formval:
+ proc_id = proc.proc_id
+ for col_id in proc.col_ids:
+ proc_order = dic_orders.get(proc_id) or {}
+ order = proc_order.get(str(col_id)) or 999
+ lst_proc_end_col.append((proc_id, col_id, order))
+
+ return sorted(lst_proc_end_col, key=lambda x: x[-1])
+
+
+@log_execution_time()
+@notify_progress(50)
+def gen_plotdata(orig_graph_param: DicParam, dic_data, dic_proc_cfg):
+ # re-order proc-sensors to show to UI
+ lst_proc_end_col = order_end_proc_sensor(orig_graph_param)
+
+ plotdatas = []
+ array_formval = []
+ for proc_id, col_id, _ in lst_proc_end_col:
+ col_detail = {}
+ rank_value = {}
+ get_cols = dic_proc_cfg[proc_id].get_cols([col_id])
+ array_y = dic_data[proc_id][col_id]
+
+ if get_cols:
+ # remove none from data
+ array_y_without_na = pd.DataFrame(array_y).dropna()
+ array_y_without_na = array_y_without_na[0].to_list() if not array_y_without_na.empty else []
+ # if get_cols[0].data_type == DataType.TEXT.name and len(array_y_without_na):
+ if get_cols[0].data_type in [DataType.TEXT.name, DataType.INTEGER.name]:
+ cat_array_y = pd.Series(array_y).astype('category').cat
+ array_y = cat_array_y.codes.tolist()
+
+ rank_value = {-1: NA_STR}
+ if (len(cat_array_y.categories.to_list())):
+ rank_value = dict(enumerate(cat_array_y.categories))
+ col_detail = {
+ 'id': col_id,
+ 'name': get_cols[0].name,
+ 'type': get_cols[0].data_type,
+ 'proc_id': proc_id,
+ 'proc_name': get_cols[0].cfg_process.name
+ }
+
+ plotdata = dict(array_y=array_y, col_detail=col_detail, rank_value=rank_value)
+ plotdatas.append(plotdata)
+
+ array_formval.append({
+ END_PROC: proc_id,
+ GET02_VALS_SELECT: col_id
+ })
+
+ return array_formval, plotdatas
+
+
+@log_execution_time()
+def gen_category_data(dic_proc_cfgs: Dict[int, CfgProcess], cate_procs: List[CategoryProc], dic_data):
+ plotdatas = []
+ for proc in cate_procs:
+ proc_id = proc.proc_id
+ dic_proc = dic_data.get(proc_id)
+ if dic_proc is None:
+ continue
+
+ proc_cfg = dic_proc_cfgs[proc_id]
+ dic_column_cfgs: Dict[int, CfgProcessColumn] = {col.id: col for col in proc_cfg.columns}
+
+ for col_id, col_show_name in zip(proc.col_ids, proc.col_show_names):
+ array_y = dic_proc.get(col_id)
+ if array_y is None:
+ continue
+
+ plotdata = dict(proc_name=proc_id, proc_master_name=proc_cfg.name,
+ column_name=col_id, column_master_name=col_show_name,
+ data=array_y)
+ plotdatas.append(plotdata)
+
+ return plotdatas
+
+
+@log_execution_time()
+def clear_all_keyword(dic_param):
+ """ clear [All] keyword in selectbox
+
+ Arguments:
+ dic_param {json} -- [params from client]
+ """
+ dic_common = dic_param[COMMON]
+ cate_procs = dic_common.get(CATE_PROCS, [])
+ dic_formval = dic_param[ARRAY_FORMVAL]
+ for idx in range(len(dic_formval)):
+ select_vals = dic_formval[idx][GET02_VALS_SELECT]
+ if isinstance(select_vals, (list, tuple)):
+ dic_formval[idx][GET02_VALS_SELECT] = [val for val in select_vals if val not in [SELECT_ALL, NO_FILTER]]
+ else:
+ dic_formval[idx][GET02_VALS_SELECT] = [select_vals]
+
+ for idx in range(len(cate_procs)):
+ select_vals = cate_procs[idx][GET02_CATE_SELECT]
+ if isinstance(select_vals, (list, tuple)):
+ cate_procs[idx][GET02_CATE_SELECT] = [val for val in select_vals if val not in [SELECT_ALL, NO_FILTER]]
+ else:
+ cate_procs[idx][GET02_CATE_SELECT] = [select_vals]
+
+ # Need NO_FILTER keyword to decide filter or not , so we can not remove NO_FILTER keyword here.
+ for cond in dic_common[COND_PROCS]:
+ for key, value in cond.items():
+ if isinstance(value, (list, tuple)):
+ vals = value
+ else:
+ vals = [value]
+
+ if NO_FILTER in vals:
+ continue
+
+ cond[key] = [val for val in vals if not val == SELECT_ALL]
+
+
+@log_execution_time()
+def update_outlier_flg(proc_id, cycle_ids, is_outlier):
+ """update outlier to t_cycle table
+
+ Arguments:
+ cycle_ids {[type]} -- [description]
+ is_outlier {[type]} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+
+ # get global_ids linked to target cycles
+ cycle_cls = find_cycle_class(proc_id)
+ cycle_recs = cycle_cls.get_cycles_by_ids(cycle_ids)
+ if not cycle_recs:
+ return True
+
+ global_ids = []
+ for rec in cycle_recs:
+ if rec.global_id:
+ global_ids.append(rec.global_id)
+ else:
+ rec.is_outlier = is_outlier
+
+ target_global_ids = GlobalRelation.get_all_relations_by_globals(global_ids, set_done_globals=set())
+
+ # update outlier for linked global ids
+ # TODO: fix front end
+ cycle_cls.update_outlier_by_global_ids(list(target_global_ids), is_outlier)
+
+ db.session.commit()
+ return True
+
+
+@log_execution_time()
+def get_serials(trace, proc_name):
+ return [s.split()[0] for s in trace.hist2_yaml.get_serial_col(proc_name) if s]
+
+
+@log_execution_time()
+def get_date_col(trace, proc_name):
+ date_col = trace.hist2_yaml.get_date_col(proc_name)
+ date_col = date_col.split()[0]
+ return date_col
+
+
+def gen_new_dic_param(dic_param, dic_non_sensor, start_proc_first=False):
+ pass
+
+
+def get_non_sensor_cols(dic_proc_cfgs: Dict[int, CfgProcess], graph_param: DicParam):
+ """get non sensor headers
+
+ Arguments:
+ trace {[type]} -- [description]
+ dic_param {[type]} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+ dic_header = {}
+
+ for proc in graph_param.array_formval:
+ proc_id = proc.proc_id
+ proc_cfg = dic_proc_cfgs[proc_id]
+ serials = proc_cfg.get_serials()
+ date_col = proc_cfg.get_date_col()
+ cols = serials + [date_col]
+ dic_header[proc_id] = cols
+
+ # start proc
+ proc_id = graph_param.common.start_proc
+ if not dic_header.get(proc_id):
+ proc_cfg = dic_proc_cfgs[proc_id]
+ serials = proc_cfg.get_serials()
+ date_col = proc_cfg.get_date_col()
+ cols = serials + [date_col]
+ dic_header[proc_id] = cols
+
+ return dic_header
+
+
+def get_cate_var(graph_param: DicParam):
+ cate_procs = graph_param.common.cate_procs
+ if cate_procs:
+ return {ele[CATE_PROC]: ele[GET02_CATE_SELECT] for ele in cate_procs if
+ ele.get(CATE_PROC) and ele.get(GET02_CATE_SELECT)}
+
+ return None
+
+
+def gen_relate_ids(row):
+ """
+ gen start proc relate ids
+ """
+
+ relate_ids = []
+ if row.global_id:
+ relate_ids.append(row.global_id)
+ if row.relate_id:
+ relate_ids.append(row.relate_id)
+
+ return relate_ids
+
+
+def is_import_job_running():
+ return any([job.startswith(str(JobType.FACTORY_IMPORT)) or job.startswith(str(JobType.CSV_IMPORT))
+ for job in set(dic_running_job.keys())])
+
+
+@log_execution_time()
+def make_irregular_data_none(dic_param):
+ array_plotdata = dic_param.get(ARRAY_PLOTDATA)
+ for num, plotdata in enumerate(array_plotdata):
+ array_y = plotdata.get(ARRAY_Y) or []
+ array_y_type = plotdata.get(ARRAY_Y_TYPE) or []
+ if array_y_type: # use y_type to check for irregular data
+ array_plotdata[num][ARRAY_Y] = \
+ [None if array_y_type[idx] not in (
+ YType.NORMAL.value, YType.OUTLIER.value, YType.NEG_OUTLIER.value) else e
+ for idx, e in enumerate(array_y)]
+ else: # or use value to check for irregular data directly
+ array_plotdata[num][ARRAY_Y] = \
+ [None if e == float('inf') or e == float('-inf') else e for e in array_y]
+ return dic_param
+
+
+def get_maxmax_minmin_chartinfo(chart_infos):
+ y_min = float('inf')
+ y_max = float('-inf')
+ for chart_info in chart_infos:
+ c_y_min = chart_info.get(Y_MIN) if chart_info.get(Y_MIN) is not None else float('inf')
+ if y_min > c_y_min:
+ y_min = c_y_min
+ c_y_max = chart_info.get(Y_MAX) if chart_info.get(Y_MAX) is not None else float('-inf')
+ if y_max < c_y_max:
+ y_max = c_y_max
+ # Default (y_min, y_max) = (0, 1) if can not found min/max
+ # y_min = 0 if y_min == float('inf') else y_min
+ # y_max = (y_min + 1) if y_max == float('-inf') else y_max
+ y_min = None if y_min == float('inf') else y_min
+ y_max = None if y_max == float('-inf') else y_max
+ return [y_min, y_max]
+
+
+def produce_irregular_plotdata(dic_param):
+ array_plotdata = dic_param.get(ARRAY_PLOTDATA)
+ for num, plotdata in enumerate(array_plotdata):
+ array_y = plotdata.get(ARRAY_Y) or []
+
+ # calculate upper/lower limit
+ chart_infos = plotdata[CHART_INFOS] or []
+ y_min, y_max = get_maxmax_minmin_chartinfo(chart_infos)
+
+ if y_max is None or y_min is None:
+ whisker_lower, whisker_upper = calc_upper_lower_whisker(array_y)
+ y_min = whisker_lower if y_min is None else y_min
+ y_max = whisker_upper if y_max is None else y_max
+
+ # create new irregular_plotdata of array_y
+ array_y_type = []
+ for idx, e in enumerate(array_y):
+ # convert inf/none to min/max; nan/na is not supported
+ if e is None:
+ array_y_type.append(YType.NONE.value)
+ elif e == float('inf'):
+ array_y_type.append(YType.INF.value)
+ elif e == float('-inf'):
+ array_y_type.append(YType.NEG_INF.value)
+ else: # normal values
+ # convert outlier to min/max
+ # if e > y-max or e < y-min:
+ if y_max is not None and e > y_max:
+ # Sprint 79 #12: Keep actual value, FE display actual value
+ # array_plotdata[num][ARRAY_Y][idx] = y_max
+ array_y_type.append(YType.OUTLIER.value)
+ elif y_min is not None and e < y_min:
+ # array_plotdata[num][ARRAY_Y][idx] = y_min
+ array_y_type.append(YType.NEG_OUTLIER.value)
+ else:
+ array_y_type.append(YType.NORMAL.value)
+
+ array_plotdata[num][ARRAY_Y_TYPE] = array_y_type
+ array_plotdata[num][Y_MAX] = y_max
+ array_plotdata[num][Y_MIN] = y_min
+
+
+def calc_upper_lower_whisker(arr):
+ arr = [e for e in arr if e not in {None, float('inf'), float('-inf')}]
+ if arr:
+ q1 = quantile(arr, 0.25, interpolation='midpoint')
+ q3 = quantile(arr, 0.75, interpolation='midpoint')
+ iqr = q3 - q1
+ if iqr:
+ whisker_lower = q1 - 2.5 * iqr
+ whisker_upper = q3 + 2.5 * iqr
+ else:
+ whisker_lower = 0.9 * min(arr)
+ whisker_upper = 1.1 * max(arr)
+ return whisker_lower, whisker_upper
+ return None, None
+
+
+def save_proc_sensor_order_to_db(orders):
+ try:
+ for proc_code, new_orders in orders.items():
+ CfgConstant.create_or_merge_by_type(const_type=CfgConstantType.TS_CARD_ORDER.name,
+ const_name=proc_code,
+ const_value=new_orders)
+ except Exception as ex:
+ traceback.print_exc()
+ logger.error(ex)
+
+
+def get_procs_in_dic_param(graph_param: DicParam):
+ """
+ get process
+ :param graph_param:
+ :return:
+ """
+ procs = set()
+ procs.add(graph_param.common.start_proc)
+ for proc in graph_param.common.cond_procs:
+ procs.add(proc.proc_id)
+
+ for proc in graph_param.common.cate_procs:
+ procs.add(proc.proc_id)
+
+ for proc in graph_param.array_formval:
+ procs.add(proc.proc_id)
+
+ return {proc.id: proc for proc in CfgProcess.get_procs(procs)}
+
+
+def gen_blank_df(graph_param: DicParam):
+ data = {Cycle.time.key: [], Cycle.is_outlier.key: []}
+ return pd.DataFrame(data)
diff --git a/histview2/api/ridgeline_plot/controllers.py b/histview2/api/ridgeline_plot/controllers.py
new file mode 100644
index 0000000..e3b59d4
--- /dev/null
+++ b/histview2/api/ridgeline_plot/controllers.py
@@ -0,0 +1,142 @@
+import json
+import timeit
+
+from flask import Blueprint, request, Response
+
+from histview2.api.ridgeline_plot.services import (gen_trace_data_by_categorical_var,
+ customize_dict_param, gen_rlp_data_by_term, gen_csv_data,
+ csv_export_dispatch, save_input_data_to_file,
+ merge_multiple_dic_params,
+ gen_trace_data_by_cyclic)
+from histview2.common.services import http_content, csv_content
+from histview2.common.services.form_env import (parse_multi_filter_into_one,
+ parse_request_params,
+ bind_multiple_end_proc_rlp)
+from histview2.common.services.import_export_config_n_data import get_dic_form_from_debug_info, \
+ set_export_dataset_id_to_dic_param
+from histview2.common.trace_data_log import EventType
+from histview2.common.yaml_utils import *
+
+api_ridgeline_plot_blueprint = Blueprint(
+ 'api_ridgeline_plot',
+ __name__,
+ url_prefix='/histview2/api/rlp'
+)
+
+RLP_MAX_GRAPH = 20
+
+
+@api_ridgeline_plot_blueprint.route('/index', methods=['POST'])
+def trace_data():
+ """
+ Trace Data API
+ return dictionary
+ """
+ start = timeit.default_timer()
+ dic_form = request.form.to_dict(flat=False)
+ save_input_data_to_file(dic_form, EventType.RLP)
+ dic_param = parse_multi_filter_into_one(dic_form)
+
+ # check if we run debug mode (import mode)
+ dic_param = get_dic_form_from_debug_info(dic_param)
+
+ customize_dict_param(dic_param)
+
+ compare_type = dic_param.get(COMMON).get(COMPARE_TYPE)
+
+ if compare_type == RL_CATEGORY:
+ dic_param = gen_trace_data_by_categorical_var(dic_param)
+ if compare_type == RL_CYCLIC_TERM:
+ dic_param = gen_trace_data_by_cyclic(dic_param, RLP_MAX_GRAPH)
+ elif compare_type == RL_DIRECT_TERM:
+ dic_param = gen_rlp_data_by_term(dic_param, RLP_MAX_GRAPH)
+
+ stop = timeit.default_timer()
+ dic_param['backend_time'] = stop - start
+
+ # export mode ( output for export mode )
+ set_export_dataset_id_to_dic_param(dic_param)
+
+ # remove raw data
+ for plot in dic_param[ARRAY_PLOTDATA]:
+ del plot[RL_DATA]
+
+ out_dict = json.dumps(dic_param, ensure_ascii=False, default=http_content.json_serial)
+ return out_dict, 200
+
+
+@api_ridgeline_plot_blueprint.route('/csv_export', methods=['GET'])
+def csv_export():
+ """csv export
+
+ Returns:
+ [type] -- [description]
+ """
+
+ dic_form = parse_request_params(request)
+ multiple_dform = bind_multiple_end_proc_rlp(dic_form)
+
+ dic_datas = []
+ dic_params = []
+ for dform in multiple_dform:
+ dic_param = parse_multi_filter_into_one(dform)
+ customize_dict_param(dic_param)
+
+ dic_data = csv_export_dispatch(dic_param)
+ dic_datas.append(dic_data)
+ dic_params.append(dic_param)
+ mdic_data = merge_multiple_dic_params(dic_datas)
+ mdic_param = merge_multiple_dic_params(dic_params)
+ if not mdic_data:
+ return {}, 200
+
+ csv_str = gen_csv_data(mdic_param, mdic_data, mdic_param[COMMON][GET02_VALS_SELECT],
+ mdic_param[COMMON][CLIENT_TIMEZONE])
+
+ csv_filename = csv_content.gen_csv_fname()
+
+ response = Response(csv_str.encode("utf-8-sig"), mimetype="text/csv",
+ headers={
+ "Content-Disposition": "attachment;filename={}".format(csv_filename),
+ })
+ response.charset = "utf-8-sig"
+
+ return response
+
+
+@api_ridgeline_plot_blueprint.route('/tsv_export', methods=['GET'])
+def tsv_export():
+ """tsv export
+
+ Returns:
+ [type] -- [description]
+ """
+ dic_form = parse_request_params(request)
+ multiple_dform = bind_multiple_end_proc_rlp(dic_form)
+
+ dic_datas = []
+ dic_params = []
+ for dform in multiple_dform:
+ dic_param = parse_multi_filter_into_one(dform)
+ customize_dict_param(dic_param)
+ dic_data = csv_export_dispatch(dic_param)
+ dic_datas.append(dic_data)
+ dic_params.append(dic_param)
+
+ mdic_data = merge_multiple_dic_params(dic_datas)
+ mdic_param = merge_multiple_dic_params(dic_params)
+ if not mdic_data:
+ return {}, 200
+
+ csv_str = gen_csv_data(mdic_param, mdic_data, mdic_param[COMMON][GET02_VALS_SELECT],
+ mdic_param[COMMON][CLIENT_TIMEZONE], delimiter='\t')
+
+ csv_filename = csv_content.gen_csv_fname("tsv")
+
+ response = Response(csv_str.encode("utf-8-sig"), mimetype="text/tsv",
+ headers={
+ "Content-Disposition": "attachment;filename={}".format(csv_filename),
+ })
+ response.charset = "utf-8-sig"
+
+ return response
diff --git a/histview2/api/ridgeline_plot/services.py b/histview2/api/ridgeline_plot/services.py
new file mode 100644
index 0000000..4c2c2ca
--- /dev/null
+++ b/histview2/api/ridgeline_plot/services.py
@@ -0,0 +1,845 @@
+from collections import defaultdict
+from copy import deepcopy
+
+import numpy as np
+import pandas as pd
+import pytz
+from dateutil import tz
+
+from histview2.api.categorical_plot.services import (gen_graph_param,
+ category_bind_dic_param_to_class,
+ gen_trace_data_by_cyclic_common, split_data_by_condition,
+ customize_dict_param_common)
+from histview2.api.trace_data.services.time_series_chart import (get_data_from_db, gen_new_dic_param,
+ get_non_sensor_cols, gen_graph,
+ gen_dic_data_from_df, get_procs_in_dic_param,
+ main_check_filter_detail_match_graph_data,
+ get_cfg_proc_col_info)
+from histview2.common.common_utils import (start_of_minute, end_of_minute)
+from histview2.common.constants import *
+from histview2.common.memoize import memoize
+from histview2.common.services.ana_inf_data import get_bound, get_grid_points, calculate_kde_for_ridgeline
+from histview2.common.services.sse import notify_progress
+from histview2.common.timezone_utils import convert_dt_str_to_timezone
+from histview2.common.trace_data_log import *
+from histview2.setting_module.models import CfgProcess, CfgProcessColumn
+from histview2.trace_data.models import Cycle
+from histview2.trace_data.schemas import DicParam
+
+
+@log_execution_time()
+@trace_log((TraceErrKey.TYPE, TraceErrKey.ACTION, TraceErrKey.TARGET),
+ (EventType.RLP, EventAction.PLOT, Target.GRAPH), send_ga=True)
+@memoize(is_save_file=True)
+def gen_trace_data_by_cyclic(dic_param, max_graph=None):
+ dic_param = gen_trace_data_by_cyclic_common(dic_param)
+ dic_plotdata = defaultdict(list)
+ for plotdata in dic_param[ARRAY_PLOTDATA]:
+ dic_plotdata[plotdata['end_col']].append(plotdata)
+
+ dic_param[ARRAY_PLOTDATA], dic_param[IS_GRAPH_LIMITED] = gen_cyclic_term_plotdata(dic_plotdata, dic_param,
+ max_graph)
+
+ # calculate emd data
+ cal_emd_data(dic_param)
+ gen_rlp_kde(dic_param)
+
+ return dic_param
+
+
+@log_execution_time()
+@trace_log((TraceErrKey.TYPE, TraceErrKey.ACTION, TraceErrKey.TARGET),
+ (EventType.RLP, EventAction.PLOT, Target.GRAPH), send_ga=True)
+@memoize(is_save_file=True)
+def gen_trace_data_by_categorical_var(dic_param, max_graph=None):
+ """tracing data to show graph
+ 1 start point x n end point
+ filter by condition points that between start point and end_point
+ """
+ # gen graph_param
+ graph_param, dic_proc_cfgs = gen_graph_param(dic_param)
+
+ # get data from database
+ df, actual_record_number, is_res_limited = get_data_from_db(graph_param)
+
+ # check filter match or not ( for GUI show )
+ matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids = main_check_filter_detail_match_graph_data(
+ graph_param, df)
+
+ # matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids
+ dic_param[MATCHED_FILTER_IDS] = matched_filter_ids
+ dic_param[UNMATCHED_FILTER_IDS] = unmatched_filter_ids
+ dic_param[NOT_EXACT_MATCH_FILTER_IDS] = not_exact_match_filter_ids
+
+ # gen dic_data
+ dic_data = gen_dic_data_from_df(df, graph_param)
+
+ # flag to show that trace result was limited
+ dic_param[IS_RES_LIMITED] = is_res_limited
+
+ # convert proc to cols dic
+ # transform raw data to graph data
+ # create output data
+ orig_graph_param: DicParam = category_bind_dic_param_to_class(dic_param)
+ dic_data, is_graph_limited = split_data_by_condition(dic_data, orig_graph_param, max_graph)
+ dic_param[IS_GRAPH_LIMITED] = is_graph_limited
+
+ end_cols = []
+ for param in orig_graph_param.array_formval:
+ end_cols += param.col_ids
+
+ dic_param[ARRAY_PLOTDATA] = gen_custom_plotdata(dic_data, end_cols)
+ dic_param[ACTUAL_RECORD_NUMBER] = actual_record_number
+ dic_param[TIMES] = df[Cycle.time.key].tolist()
+
+ # calculate emd data
+ cal_emd_data(dic_param)
+ gen_rlp_kde(dic_param)
+
+ return dic_param
+
+
+@log_execution_time()
+@trace_log((TraceErrKey.TYPE, TraceErrKey.ACTION, TraceErrKey.TARGET),
+ (EventType.RLP, EventAction.PLOT, Target.GRAPH), send_ga=True)
+@memoize(is_save_file=True)
+def gen_rlp_data_by_term(dic_param, max_graph=None):
+ """rlp data to show graph
+ filter by condition points that between start point and end_point
+ """
+
+ dic_param[ARRAY_PLOTDATA] = []
+ terms = dic_param.get(TIME_CONDS) or []
+
+ dic_param[MATCHED_FILTER_IDS] = []
+ dic_param[UNMATCHED_FILTER_IDS] = []
+ dic_param[NOT_EXACT_MATCH_FILTER_IDS] = []
+
+ dic_rlp = defaultdict(dict)
+ term_results = []
+ for term in terms:
+ # create dic_param for each term from original dic_param
+ term_dic_param = deepcopy(dic_param)
+ term_dic_param[TIME_CONDS] = [term]
+ term_dic_param[COMMON][START_DATE] = term[START_DATE]
+ term_dic_param[COMMON][START_TM] = term[START_TM]
+ term_dic_param[COMMON][END_DATE] = term[END_DATE]
+ term_dic_param[COMMON][END_TM] = term[END_TM]
+
+ # get data from database + visual setting from yaml
+ term_result = gen_graph(term_dic_param)
+
+ # matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids
+ dic_param[MATCHED_FILTER_IDS] += term_result.get(MATCHED_FILTER_IDS, [])
+ dic_param[UNMATCHED_FILTER_IDS] += term_result.get(UNMATCHED_FILTER_IDS, [])
+ dic_param[NOT_EXACT_MATCH_FILTER_IDS] += term_result.get(NOT_EXACT_MATCH_FILTER_IDS, [])
+ dic_param[ACTUAL_RECORD_NUMBER] = term_result.get(ACTUAL_RECORD_NUMBER, 0)
+
+ term_results.append(term_result)
+
+ # rpl_array_data
+ dic_rlp, is_graph_limited = transform_data_to_rlp(term_results, max_graph)
+ dic_param[IS_GRAPH_LIMITED] = is_graph_limited
+ dic_param[ARRAY_PLOTDATA] = [plotdata for dic_cat_exp in dic_rlp.values() for plotdata in dic_cat_exp.values()]
+
+ # calculate emd data
+ cal_emd_data(dic_param)
+ gen_rlp_kde(dic_param)
+
+ return dic_param
+
+
+def transform_data_to_rlp(term_results, max_graph=None):
+ is_graph_limited = False
+ dic_plots = defaultdict(dict)
+ count = 0
+ for term_result in term_results:
+ time_range = term_result[TIME_CONDS][0][START_DT] + 'Z' + ' | ' + term_result[TIME_CONDS][0][END_DT] + 'Z'
+ for dic_plot in term_result[ARRAY_PLOTDATA]:
+ selected_sensor = int(dic_plot[END_COL_ID])
+ array_y = dic_plot[ARRAY_Y]
+ sensor_name = dic_plot[END_COL_NAME]
+ proc_name = dic_plot[END_PROC_NAME]
+ group_name = dic_plot.get(CAT_EXP_BOX) or ''
+
+ if selected_sensor in dic_plots:
+ if group_name in dic_plots[selected_sensor]:
+ plotdata = dic_plots[selected_sensor][group_name]
+ else:
+ if max_graph and count >= max_graph:
+ is_graph_limited = True
+ continue
+
+ plotdata = gen_blank_rlp_plot(proc_name=proc_name, sensor_name=sensor_name, group_name=group_name)
+ dic_plots[selected_sensor][group_name] = plotdata
+ count += 1
+ else:
+ if max_graph and count >= max_graph:
+ is_graph_limited = True
+ continue
+
+ plotdata = gen_blank_rlp_plot(proc_name=proc_name, sensor_name=sensor_name, group_name=group_name)
+ dic_plots[selected_sensor][group_name] = plotdata
+ count += 1
+
+ plotdata[RL_DATA].extend(array_y)
+ plotdata[RL_GROUPS].extend([time_range] * len(array_y))
+ rlpdata = dict(array_x=array_y, cate_name=time_range)
+ plotdata[RL_RIDGELINES].append(rlpdata)
+
+ return dic_plots, is_graph_limited
+
+
+def merge_dict(dict1, dict2):
+ """ Merge dictionaries and keep values of common keys in list"""
+ if not dict1:
+ dict1 = {}
+ if not dict2:
+ dict2 = {}
+
+ dict3 = {**dict1, **dict2}
+ for key, value in dict3.items():
+ if key in dict1 and key in dict2:
+ if isinstance(value, str) or isinstance(value, int):
+ dict3[key] = value
+ elif isinstance(value, list):
+ dict3[key] = value + dict1[key]
+ else:
+ dict3[key] = merge_dict(value, dict1[key])
+ return dict3
+
+
+@log_execution_time()
+def get_data(trace, dic_param):
+ """get data from database
+
+ Arguments:
+ dic_param {[type]} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+ db_code = trace.proc_yaml.get_db_id(dic_param[COMMON][START_PROC])
+ is_efa = trace.db_yaml.get_etl_func(db_code)
+ compare_type = dic_param[COMMON][COMPARE_TYPE]
+
+ dic_cate_var = None
+ if is_efa:
+ # get all checked cols
+ dic_non_sensor = get_checked_cols(trace, dic_param)
+ else:
+ # get serials + date
+ dic_non_sensor = get_non_sensor_cols(trace, dic_param)
+ # get category var and val
+
+ if compare_type == RL_CATEGORY:
+ dic_cate_var = get_cate_var(dic_param)
+
+ # edit dic_param
+ edited_dic_param = gen_new_dic_param(dic_param, dic_non_sensor)
+
+ # cate var and val
+ if dic_cate_var:
+ edited_dic_param = gen_new_dic_param(edited_dic_param, dic_cate_var)
+
+ if compare_type == RL_CATEGORY:
+ edited_dic_param = add_cond_to_dic_param(edited_dic_param)
+
+ # get data from database
+ dic_data, times, _, _, actual_record_number, is_res_limited = get_data_from_db(trace, edited_dic_param)
+
+ return dic_data, times, actual_record_number, is_res_limited
+
+
+@log_execution_time()
+def customize_dict_param(dic_param):
+ """ Combine start_time, end_time, start_date, end_date into one object
+
+ Arguments:
+ dic_form {[type]} -- [description]
+ """
+ # end_proc
+ dic_end_procs = customize_dict_param_common(dic_param)
+ dic_param[COMMON][END_PROC] = dic_end_procs
+ dic_param[COMMON][GET02_VALS_SELECT] = list(dic_end_procs)
+
+ # time
+ start_dates = dic_param.get(COMMON).get(START_DATE)
+ start_times = dic_param.get(COMMON).get(START_TM)
+ end_dates = dic_param.get(COMMON).get(END_DATE)
+ end_times = dic_param.get(COMMON).get(END_TM)
+
+ if type(start_dates) is not list and type(start_dates) is not tuple:
+ start_dates = [start_dates]
+ start_times = [start_times]
+ end_dates = [end_dates]
+ end_times = [end_times]
+
+ if start_dates and start_times and end_dates and end_times \
+ and len(start_dates) == len(start_times) == len(end_dates) == len(end_times):
+ names = [START_DATE, START_TM, END_DATE, END_TM]
+ lst_datetimes = [dict(zip(names, row)) for row in zip(start_dates, start_times, end_dates, end_times)]
+ for idx, time_cond in enumerate(lst_datetimes):
+ start_dt = start_of_minute(time_cond.get(START_DATE), time_cond.get(START_TM))
+ end_dt = end_of_minute(time_cond.get(END_DATE), time_cond.get(END_TM))
+ lst_datetimes[idx][START_DT] = start_dt
+ lst_datetimes[idx][END_DT] = end_dt
+ dic_param[TIME_CONDS] = lst_datetimes
+ else:
+ dic_param[TIME_CONDS] = []
+
+
+def convert_end_cols_to_array(dic_param):
+ end_col_alias = dic_param[COMMON][GET02_VALS_SELECT]
+ if type(end_col_alias) == str:
+ dic_param[COMMON][GET02_VALS_SELECT] = [end_col_alias]
+
+ from_end_col_alias = dic_param[ARRAY_FORMVAL][0][GET02_VALS_SELECT]
+ if type(from_end_col_alias) == str:
+ dic_param[ARRAY_FORMVAL][0][GET02_VALS_SELECT] = [from_end_col_alias]
+
+
+@log_execution_time()
+def split_data_by_cyclic_terms(dic_data, times, dic_param):
+ """split data by condition
+
+ Arguments:
+ data {[type]} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+ # proc_name = dic_param[COMMON][START_PROC]
+ # proc_yaml = ProcConfigYaml()
+ # checked_cols = proc_yaml.get_checked_columns(proc_name)
+
+ proc_id = dic_param.common.start_proc
+ # cate_col = dic_param.common.cate_procs[0].col_ids[0]
+ # cates = sorted(set(dic_data[proc_id][cate_col]))
+ # end_cols = dic_param.array_formval[0].col_ids
+
+ cyclic_terms = dic_param.cyclic_terms
+ dic_output = {}
+
+ # end_col_alias = dic_param[ARRAY_FORMVAL][0][GET02_VALS_SELECT]
+ # end_col_alias = dic_param.array_formval[0].col_names
+ end_col_ids = dic_param.array_formval[0].col_ids
+ for end_col in end_col_ids:
+ # end_col = checked_cols.get(end_col)[YAML_COL_NAMES]
+ # end_col_name = end_col_alias[k]
+ dic_output[end_col] = {term: [] for term in cyclic_terms}
+
+ for idx, t_cycle_time in enumerate(times):
+ for end_col in end_col_ids:
+ # end_col = checked_cols.get(end_col)[YAML_COL_NAMES]
+ # end_col_name = end_col_alias[k]
+ end_col_data = dic_data[proc_id][end_col]
+ for cyclic_term in cyclic_terms:
+ if cyclic_term[0] <= t_cycle_time <= cyclic_term[1]:
+ dic_output[end_col][cyclic_term].append(end_col_data[idx])
+
+ return dic_output
+
+
+# @log_execution_time()
+# def split_data_by_condition(dic_data, graph_param: DicParam):
+# """split data by condition
+#
+# Arguments:
+# data {[type]} -- [description]
+#
+# Returns:
+# [type] -- [description]
+# """
+# proc_id = graph_param.common.start_proc
+# cate_col = graph_param.common.cate_procs[0].col_ids[0]
+# no_null_data_set = set([x for x in set(dic_data[proc_id][cate_col]) if x is not None])
+# cates = sorted(no_null_data_set)
+# dic_output = {}
+#
+# end_cols = graph_param.array_formval[0].col_ids
+# for end_col in end_cols:
+# dic_output[end_col] = {cate: [] for cate in cates}
+#
+# for cate_val, end_val in zip(dic_data[proc_id][cate_col], dic_data[proc_id][end_col]):
+# if cate_val is not None:
+# dic_output[end_col][cate_val].append(end_val)
+#
+# return dic_output
+
+
+def get_cate_var(dic_param):
+ cate_vars = dic_param[COMMON].get(f'{CATE_VARIABLE}1', [])
+ if not isinstance(cate_vars, (list, tuple)):
+ cate_vars = [cate_vars]
+
+ return {dic_param[COMMON][START_PROC]: cate_vars}
+
+
+def add_cond_to_dic_param(dic_param):
+ cate_var = dic_param[COMMON].get(f'{CATE_VARIABLE}1')
+ cate_vals = dic_param[COMMON].get(f'{CATE_VALUE_MULTI}1', [])
+ if not isinstance(cate_vals, (list, tuple)):
+ cate_vals = [cate_vals]
+
+ edited_dic_param = deepcopy(dic_param)
+
+ # start proc
+ proc_name = dic_param[COMMON][START_PROC]
+
+ cond_proc = None
+ for ele in edited_dic_param[COMMON][COND_PROCS]:
+ if ele[COND_PROC] == proc_name:
+ cond_proc = ele
+ break
+
+ if cond_proc:
+ cond_proc.update({cate_var: cate_vals})
+ else:
+ cond_proc = {COND_PROC: proc_name, cate_var: cate_vals}
+ edited_dic_param[COMMON][COND_PROCS].append(cond_proc)
+
+ return edited_dic_param
+
+
+@log_execution_time()
+def cal_emd_data(dic_param):
+ array_plotdatas = dic_param.get(ARRAY_PLOTDATA) or {}
+ num_bins = 100
+ emds = []
+ for sensor_dat in array_plotdatas:
+ data = sensor_dat[RL_DATA]
+ if not len(data):
+ continue
+
+ group_ids = sensor_dat[RL_GROUPS]
+
+ # convert to dataframe
+ dic_emds = {RL_GROUPS: group_ids, 'data': data}
+ df = pd.DataFrame(dic_emds)
+
+ # dropna before calc emd
+ df = df.replace(dict.fromkeys([np.inf, -np.inf, np.nan], np.nan)).dropna()
+ group_ids = df[RL_GROUPS]
+
+ # revert original dataframe without groups
+ df.drop(RL_GROUPS, inplace=True, axis=1)
+ emd_stacked_without_nan = df.to_numpy()
+ emd_array = calc_emd_for_ridgeline(emd_stacked_without_nan, np.array(group_ids), num_bins)
+
+ emds.append(np.stack(emd_array, axis=-1).tolist()[0])
+
+ dic_param[RL_EMD] = emds
+
+
+@log_execution_time()
+def calc_emd_for_ridgeline(data, group_id, num_bins, signed=True, diff=False):
+ """
+ Calculate Earth Mover's Distance (EMD) for each sensor data
+
+ Inputs:
+ data [2d numpy array]
+ group_id [1d numpy array]
+ num_bins [integer]
+ signed [boolean] if True, return EMD without taking np.abs()
+ diff [boolean] if True, calculates EMD based on the diff of 1-step
+ Returns:
+ emd_mat [2d numpy array] (group_id x num of sensors)
+ """
+
+ # in case when data was 1d array (only 1 sensor selected)
+ if len(data.shape) == 1:
+ data = data.reshape(-1, 1)
+
+ num_groups = len(np.unique(group_id))
+ num_sensors = data.shape[1]
+ emd_mat = np.zeros((num_groups, num_sensors))
+
+ # calculate emd sequence in each sensor
+ for sensor in np.arange(num_sensors):
+ dens_mat = np.zeros((num_groups, num_bins))
+ x = data[:, sensor]
+
+ # generate bins for histograms
+ x_wo_none = x[x != None]
+ group_id_wo_none = np.delete(group_id, np.where(x == None))
+
+ x_min = np.nanmin(x_wo_none)
+ x_max = np.nanmax(x_wo_none)
+ # in case of 0 standard deviation
+ if x_min == x_max:
+ x_min -= 4
+ x_max += 4
+ bins = np.linspace(x_min, x_max, num=num_bins + 1)
+
+ # histogram for all group_ids
+ for g, grp in enumerate(np.unique(group_id_wo_none)):
+ bin_count, _ = np.histogram(x_wo_none[group_id_wo_none == grp], bins=bins)
+ dens_mat[g, :] = bin_count / np.sum(bin_count)
+
+ # reference density (first density or previous density)
+ if diff:
+ ref_density = np.vstack([dens_mat[0, :], dens_mat[:(num_groups - 1), :]])
+ else:
+ ref_density = np.tile(dens_mat[0, :], (num_groups, 1))
+
+ # calculate emd (matrix multiplication form)
+ if signed:
+ emd = (dens_mat - ref_density) @ np.arange(1, num_bins + 1).reshape(-1, 1)
+ else:
+ emd = np.zeros(num_groups)
+ for g, _ in enumerate(np.unique(group_id_wo_none)):
+ # exact 1D EMD
+ # https://en.wikipedia.org/wiki/Earth_mover%27s_distance#Computing_the_EMD
+ emd_1d = np.zeros(num_bins + 1)
+ for bin_idx in range(1, num_bins + 1):
+ emd_1d[bin_idx] = ref_density[g, bin_idx - 1] - dens_mat[g, bin_idx - 1] + emd_1d[bin_idx - 1]
+ emd[g] = np.sum(np.abs(emd_1d))
+
+ # scale emd to have original unit
+ emd = emd / (num_bins - 1) * (x_max - x_min)
+ emd_mat[:, sensor] = emd.reshape(-1)
+
+ return emd_mat
+
+
+@log_execution_time()
+def gen_cyclic_term_plotdata(dic_data, dic_param, max_graph=None):
+ is_graph_limited = False
+ plotdatas = []
+ sensors = dic_param[COMMON][GET02_VALS_SELECT]
+
+ for k, sensor in enumerate(sensors):
+ if max_graph and len(plotdatas) >= max_graph:
+ plotdatas = plotdatas[:max_graph]
+ is_graph_limited = True
+ break
+
+ dic_group_by_cat = {}
+ for dic_plot in dic_data[int(sensor)]:
+ array_y = dic_plot[ARRAY_Y]
+ if array_y:
+ term_obj = dic_param[TIME_CONDS][dic_plot['term_id']]
+ cate_name_str = f'{term_obj[START_DT]} | {term_obj[END_DT]}'
+ group_name = dic_plot.get(CAT_EXP_BOX) or ''
+
+ if group_name in dic_group_by_cat:
+ plotdata = dic_group_by_cat[group_name]
+ else:
+ plotdata = gen_blank_rlp_plot(proc_name=dic_plot[END_PROC_NAME], sensor_name=dic_plot[END_COL_NAME],
+ group_name=group_name)
+ dic_group_by_cat[group_name] = plotdata
+
+ plotdata[RL_DATA].extend(array_y)
+ plotdata[RL_GROUPS].extend([cate_name_str] * len(array_y))
+ rlpdata = dict(array_x=array_y, cate_name=cate_name_str)
+ plotdata[RL_RIDGELINES].append(rlpdata)
+
+ if dic_group_by_cat:
+ plotdatas += list(dic_group_by_cat.values())
+ return plotdatas, is_graph_limited
+
+
+@log_execution_time()
+def gen_custom_plotdata(dic_data, sensors):
+ plotdatas = []
+ dic_procs, dic_cols = get_cfg_proc_col_info(sensors)
+ for sensor_id in sensors:
+ cfg_col: CfgProcessColumn = dic_cols[sensor_id]
+ cfg_proc: CfgProcess = dic_procs[cfg_col.process_id]
+
+ plotdata = gen_blank_rlp_plot(proc_name=cfg_proc.name, sensor_name=cfg_col.name)
+ for cate_name, dic_plot in dic_data[sensor_id].items():
+ array_y = dic_plot[ARRAY_Y]
+ plotdata[RL_DATA].extend(array_y)
+ plotdata[RL_GROUPS].extend([cate_name] * len(array_y))
+ rlpdata = dict(array_x=array_y, cate_name=cate_name)
+ plotdata[RL_RIDGELINES].append(rlpdata)
+ plotdatas.append(plotdata)
+ return plotdatas
+
+
+def get_checked_cols(trace, dic_param):
+ dic_header = {}
+ for proc in dic_param[ARRAY_FORMVAL]:
+ proc_name = proc[END_PROC]
+ end_cols = proc[GET02_VALS_SELECT]
+ if isinstance(end_cols, str):
+ end_cols = [end_cols]
+
+ checked_cols = trace.proc_yaml.get_checked_columns(proc_name)
+ cols = []
+ for col, col_detail in checked_cols.items():
+ data_type = col_detail[YAML_DATA_TYPES]
+ # alias_name = col_detail[YAML_ALIASES]
+ if data_type == DataType.REAL.name or col in end_cols:
+ continue
+
+ cols.append(col)
+
+ dic_header[proc_name] = cols
+ return dic_header
+
+
+@log_execution_time()
+@notify_progress(75)
+def csv_export_dispatch(dic_param):
+ proc_name = dic_param.get(COMMON).get(END_PROC)
+ time_conds = dic_param.get(TIME_CONDS)
+ compare_type = dic_param.get(COMMON).get(COMPARE_TYPE)
+
+ if not proc_name or not time_conds:
+ return False
+
+ # convert to array to query data for many sensors
+ convert_end_cols_to_array(dic_param)
+ cate_var = None
+ if compare_type == RL_CATEGORY:
+ cate_var = dic_param[COMMON].get(f'{CATE_VARIABLE}1')
+ if not cate_var:
+ return False
+
+ if isinstance(cate_var, (list, tuple)):
+ cate_var = cate_var[0]
+
+ dic_param = gen_trace_data_by_categorical_var(dic_param)
+ if compare_type == RL_CYCLIC_TERM:
+ cate_var = RL_PERIOD
+ dic_param = gen_trace_data_by_cyclic(dic_param)
+ elif compare_type == RL_DIRECT_TERM:
+ cate_var = RL_PERIOD
+ dic_param = gen_rlp_data_by_term(dic_param)
+
+ # cate name for emd
+ cate_vals = [dic_ridge[RL_CATE_NAME] for dic_ridge in dic_param[ARRAY_PLOTDATA][0][RL_RIDGELINES]]
+ dic_data = {proc_name: {cate_var: cate_vals, **dict(zip(dic_param[COMMON][GET02_VALS_SELECT], dic_param[RL_EMD]))}}
+
+ return dic_data
+
+
+@log_execution_time()
+def gen_csv_data(dic_param, dic_data, sensors, client_tz, delimiter=None): # get the most cover flows
+ """tracing data to show csv
+ 1 start point x n end point
+ filter by condition points that between start point and end_point
+ """
+
+ if delimiter:
+ csv_data = to_csv(dic_param, dic_data, sensors, client_tz, delimiter=delimiter)
+ else:
+ csv_data = to_csv(dic_param, dic_data, sensors, client_tz)
+
+ return csv_data
+
+
+@log_execution_time()
+def to_csv(dic_param, dic_data, sensors, client_tz=None, newline='\n', delimiter=','):
+ """generate csv export string
+
+ Arguments:
+ trace {[Trace]} -- [tracing information]
+ dic_data {[dictionary]} -- [export data]
+
+ Keyword Arguments:
+ newline {str} -- [description] (default: {'\n'})
+ delimiter {str} -- [description] (default: {','})
+
+ Returns:
+ [type] -- [description]
+ """
+ out_str = ''
+
+ graph_param = category_bind_dic_param_to_class(dic_param)
+ dic_proc_cfgs = get_procs_in_dic_param(graph_param)
+
+ # get columns
+ cols = []
+ for proc_id, data in dic_data.items():
+ # get master name of proc
+ proc_cfg: CfgProcess = dic_proc_cfgs[int(proc_id)]
+ proc_master_name = proc_cfg.name
+
+ cols = []
+ for col in data:
+ if col == RL_PERIOD:
+ dt_frm, dt_to = col.split('|')
+ cols.append(f'{proc_cfg.name}|{dt_frm}')
+ cols.append(f'{proc_cfg.name}|{dt_to}')
+ else:
+ column = CfgProcessColumn.query.get(int(col))
+ show_col = column.name
+ if col in sensors:
+ show_col += '|emd'
+
+ # col_name = CfgProcessColumn.get_by_col_name(proc_id,col)
+ cols.append(f'{proc_cfg.name}|{show_col}')
+
+ out_str += delimiter.join(cols)
+ out_str += newline
+
+ # get rows
+ merged_rows = []
+ for proc_id, proc_values in dic_data.items():
+ for col_name, col_values in proc_values.items():
+ if col_name == RL_PERIOD:
+ # get client timezone
+ client_timezone = pytz.timezone(client_tz) if client_tz else tz.tzlocal()
+ # client_timezone = tz.gettz(client_tz or None) or tz.tzlocal()
+
+ arr_from = []
+ arr_to = []
+ for val in col_values:
+ dt_frm, dt_to = val.split('|')
+ arr_from.append(convert_dt_str_to_timezone(client_timezone, dt_frm))
+ arr_to.append(convert_dt_str_to_timezone(client_timezone, dt_to))
+
+ merged_rows.append(arr_from)
+ merged_rows.append(arr_to)
+ else:
+ merged_rows.append(col_values)
+
+ for row in zip(*merged_rows):
+ if row[0] == '':
+ continue
+ out_str += delimiter.join([str(i) for i in row])
+ out_str += newline
+
+ return out_str
+
+
+@log_execution_time()
+def gen_rlp_kde(dic_param):
+ # retrieve the ridge-lines from array_plotdata
+ array_plotdata = dic_param.get(ARRAY_PLOTDATA)
+ for _, plotdata in enumerate(array_plotdata):
+ # plotdata[RL_KDE] = {}
+ plotdata_rlp = plotdata.get(RL_RIDGELINES)
+
+ bounds = get_bound(plotdata_rlp)
+ grid_points = get_grid_points(plotdata_rlp, bounds=bounds)
+ for num, ridgeline in enumerate(plotdata_rlp):
+ array_x = ridgeline.get(ARRAY_X)
+ ridgeline[RL_KDE] = calculate_kde_for_ridgeline(array_x, grid_points, height=3)
+
+ res = transform_rlp_kde(dic_param)
+ return res
+
+
+@log_execution_time()
+def transform_rlp_kde(dic_param):
+ default_hist_bins = 128
+ # retrieve the ridge-lines from array_plotdata
+ array_plotdata = dic_param.get(ARRAY_PLOTDATA)
+
+ # scale ratio from the maximum value of RLP chart's x-axis,
+ # RLP line height, default is 2% chart
+ scale_ratio = 0.02
+
+ for _, plotdata in enumerate(array_plotdata):
+ plotdata_rlp = plotdata.get(RL_RIDGELINES)
+
+ start_value = 0.1
+ # calculate the step value between 2 line
+ total_lines = len(plotdata_rlp)
+
+ if total_lines > 1:
+ line_steps = 1 / (total_lines - 1)
+ else:
+ # if data have one ridge line only, the first line will be draw from x=0.1 in xaxis
+ line_steps = 0.1
+
+ plotdata[RL_XAXIS] = []
+ # distinct groups
+ plotdata[RL_CATES] = list(dict.fromkeys(plotdata[RL_GROUPS]))
+ # plotdata['categories'] = distinct_rlp_groups(plotdata['groups'])
+ rlp_range_min = []
+ rlp_range_max = []
+
+ # get max value from kde, use to make new xaxis range
+ max_kde_list = []
+ tmp_histlabel = []
+ for num, ridgeline in enumerate(plotdata_rlp):
+ # calculate trans value from start_value and line_steps
+ trans_val = start_value + (num * line_steps)
+ kde_data = ridgeline.get(RL_KDE)
+
+ if kde_data[RL_DEN_VAL]:
+ max_value = max(kde_data[RL_DEN_VAL]) + trans_val
+ max_kde_list.append(max_value)
+
+ if len(kde_data[RL_HIST_LABELS]) > 1:
+ tmp_histlabel = kde_data[RL_HIST_LABELS]
+
+ for num, ridgeline in enumerate(plotdata_rlp):
+ kde_data = ridgeline.get(RL_KDE)
+
+ # calculate trans value from start_value and line_steps
+ trans_val = start_value + (num * line_steps)
+ trans_val_list = [trans_val] * len(kde_data[RL_DEN_VAL])
+ trans_obj = {RL_ORG_DEN: kde_data[RL_DEN_VAL], RL_TRANS_VAL: trans_val_list}
+ trans_val_df = pd.DataFrame(trans_obj)
+
+ # devide by maximum value of density, except max = 0
+ max_den_val = trans_val_df[RL_ORG_DEN].max()
+ if max_den_val:
+ trans_kde_val = trans_val_df[RL_ORG_DEN] / max_den_val
+ else:
+ trans_kde_val = trans_val_df[RL_ORG_DEN]
+
+ # convert to new value with line by steps and scale ratio
+ new_kde_df = (trans_kde_val * scale_ratio) + trans_val_df[RL_TRANS_VAL]
+ new_kde_val = new_kde_df.to_list()
+
+ ridgeline[RL_TRANS_DEN] = new_kde_val
+ if (len(new_kde_val) == 1):
+ ridgeline[RL_TRANS_DEN] = trans_val_list * default_hist_bins
+ ridgeline[RL_KDE][RL_HIST_LABELS] = tmp_histlabel * default_hist_bins
+
+ plotdata[RL_XAXIS].append(trans_val)
+
+ # get min/max range from numpy array kde_data
+ if kde_data[RL_DEN_VAL]:
+ if kde_data[RL_HIST_LABELS]:
+ rlp_range_min.append(min(kde_data[RL_HIST_LABELS]))
+ rlp_range_max.append(max(kde_data[RL_HIST_LABELS]))
+
+ # delete un-use params in rigdeline node
+ ridgeline[RL_DATA_COUNTS] = len(ridgeline[ARRAY_X])
+ del ridgeline[ARRAY_X]
+ del ridgeline[RL_KDE][RL_HIST_COUNTS]
+ del ridgeline[RL_KDE][RL_DEN_VAL]
+ if rlp_range_min:
+ rlp_yaxis_min = round(min(rlp_range_min)) if len(rlp_range_min) > 1 else round(rlp_range_min[0])
+ else:
+ rlp_yaxis_min = 0
+
+ if rlp_range_max:
+ rlp_yaxis_max = round(min(rlp_range_max)) if len(rlp_range_max) > 1 else round(rlp_range_max[0])
+ else:
+ rlp_yaxis_max = 0
+ plotdata[RL_YAXIS] = [rlp_yaxis_min, rlp_yaxis_max]
+
+ # delete groups
+ del plotdata[RL_GROUPS]
+
+ return dic_param
+
+
+def distinct_rlp_groups(groups):
+ unique_groups = []
+ for group_name in groups:
+ if group_name not in unique_groups:
+ unique_groups.append(group_name)
+ return unique_groups
+
+
+def merge_multiple_dic_params(dic_params):
+ if len(dic_params) > 1:
+ merged_dic_params = merge_dict(*dic_params)
+ return merged_dic_params
+ return dic_params[0]
+
+
+def gen_blank_rlp_plot(proc_name='', sensor_name='', group_name=''):
+ return {RL_DATA: [], RL_GROUPS: [], RL_RIDGELINES: [], RL_SENSOR_NAME: sensor_name, RL_PROC_NAME: proc_name,
+ CAT_EXP_BOX: group_name}
diff --git a/histview2/api/sankey_plot/controllers.py b/histview2/api/sankey_plot/controllers.py
new file mode 100644
index 0000000..f5880e9
--- /dev/null
+++ b/histview2/api/sankey_plot/controllers.py
@@ -0,0 +1,59 @@
+import json
+import timeit
+
+from flask import Blueprint, request
+
+from histview2.api.ridgeline_plot.services \
+ import customize_dict_param, convert_end_cols_to_array
+from histview2.api.sankey_plot.sankey_glasso.sankey_services import gen_graph_sankey_group_lasso
+from histview2.common.services import http_content
+from histview2.common.services.form_env import parse_multi_filter_into_one
+from histview2.common.services.import_export_config_n_data import get_dic_form_from_debug_info, \
+ set_export_dataset_id_to_dic_param
+from histview2.common.trace_data_log import save_input_data_to_file, EventType
+from histview2.common.yaml_utils import *
+
+api_sankey_plot_blueprint = Blueprint(
+ 'api_sankey_plot',
+ __name__,
+ url_prefix='/histview2/api/skd'
+)
+
+
+@api_sankey_plot_blueprint.route('/index', methods=['POST'])
+def trace_data():
+ """
+ Trace Data API
+ return dictionary
+ """
+ start = timeit.default_timer()
+ dic_form = request.form.to_dict(flat=False)
+
+ # save dic_form to pickle (for future debug)
+ save_input_data_to_file(dic_form, EventType.SKD)
+
+ dic_param = parse_multi_filter_into_one(dic_form)
+
+ # check if we run debug mode (import mode)
+ dic_param = get_dic_form_from_debug_info(dic_param)
+
+ customize_dict_param(dic_param)
+
+ proc_name = dic_param.get(COMMON).get(END_PROC)
+ time_conds = dic_param.get(TIME_CONDS)
+
+ if not proc_name or not time_conds:
+ return {}, 200
+
+ # convert to array to query data for many sensors
+ convert_end_cols_to_array(dic_param)
+
+ dic_param = gen_graph_sankey_group_lasso(dic_param)
+ stop = timeit.default_timer()
+ dic_param['backend_time'] = stop - start
+
+ # export mode ( output for export mode )
+ set_export_dataset_id_to_dic_param(dic_param)
+
+ out_dict = json.dumps(dic_param, ensure_ascii=False, default=http_content.json_serial)
+ return out_dict, 200
diff --git a/histview2/api/sankey_plot/sankey_glasso/glasso.py b/histview2/api/sankey_plot/sankey_glasso/glasso.py
new file mode 100644
index 0000000..b172a37
--- /dev/null
+++ b/histview2/api/sankey_plot/sankey_glasso/glasso.py
@@ -0,0 +1,222 @@
+
+import numpy as np
+from sklearn.covariance import graphical_lasso, empirical_covariance
+from sklearn.preprocessing import StandardScaler
+
+
+class GaussianGraphicalModel():
+ '''
+ Calculate sparse partial correlation matrix with GraphicalLASSO
+ This implementation is not a standard usage;
+ penalty factor alpha will be automatically selected based on
+ number of variables directly connected with target variables.
+
+ alpha in which
+ minimum number of variables directly connected with targets exceed `num_directs`
+ is selected.
+
+ For example, assume we have 2 targets and `num_directs` is set to 3.
+ We search alpha that gives more than 2 directly connected variables for both targets.
+
+ By restricting the number of connection,
+ we seek to obtain more interpretable results.
+ Tuning based on ExtendedBIC was difficult to obtain desired results.
+
+ Parameters:
+ ----------
+ alpha (optional): float
+ Shrinkage paramteter alpha. High value returns more sparse result.
+ If this value is specified, do not search and force to use this value.
+
+ num_directs (optional): int
+ Objective number of directly connected variables of target(s).
+ If not given, this will be automatically set.
+
+ Attributes:
+ ----------
+ scaler: StandardScaler object
+ Used for anomaly detection (when we want to use results for new data)
+
+ parcor: NumpyArray of shape (X.shape[1], X.shape[1])
+ Partial correlation matrix
+ '''
+
+ def __init__(self, alpha=None, num_directs=None):
+ self.alpha = alpha
+ self.num_directs = num_directs
+ self.scaler = None
+ self.pmat = None
+ self.parcor = None
+
+ def fit(self, X, idx_tgt):
+ '''
+ Fit GraphcialLASSO
+
+ Inputs:
+ ----------
+ X: 2d NumpyArray or pandas dataframe of size (sample_size, num_sensors)
+ sensor data
+ idx_tgt: list
+ column index of target variable(s)
+ '''
+
+ # scaling
+ scaler = StandardScaler().fit(X)
+ X = scaler.transform(X)
+
+ # covariance matrix
+ emp_cov = empirical_covariance(X)
+
+ # glasso (if alpha is given, force to use it)
+ if self.alpha is None:
+ parcor = self._calc_parcor_tuned(emp_cov, idx_tgt)
+ else:
+ parcor = self._calc_parcor(emp_cov, self.alpha)
+
+
+ self.scaler = scaler
+ self.parcor = parcor
+
+ def _calc_parcor(self, emp_cov, alpha):
+ '''
+ Calculate sparse partial correlation matrix with glasso
+
+ Inputs:
+ ----------
+ emp_cov: NumpyArray of shape (X.shape[1], X.shape[1])
+ alpha: float
+
+ Returns:
+ ----------
+ parcor: [NumpyArray] of shape (X.shape[1], X.shape[1])
+ '''
+ # precision matrix
+ pmat = graphical_lasso(emp_cov, alpha)[1]
+ # presicion matrix and partial correlation matrix
+ parcor = - pmat / (np.sqrt(np.diag(pmat)).reshape(-1, 1) @ np.sqrt(np.diag(pmat)).reshape(1, -1))
+ # no self-loops
+ np.fill_diagonal(parcor, 0.0)
+ return parcor
+
+ def _calc_parcor_tuned(self, emp_cov, idx_tgt):
+ '''
+ Automatic tuning of alpha based on number of variables firectly connected to target variable(s).
+ Search from high alpha, and stop if number of variables exceed `num_directs`.
+
+ Inputs:
+ ----------
+ emp_cov: NumpyArray of shape (X.shape[1], X.shape[1])
+ idx_tgt: list
+
+ Returns:
+ ----------
+ parcor: [NumpyArray] of shape (X.shape[1], X.shape[1])
+ '''
+ # automatically set the objective of number of variables to extract
+ if self.num_directs is None:
+ num_sensors = emp_cov.shape[0]
+ num_targets = len(idx_tgt)
+ num_exploratory = num_sensors - num_targets
+ num_directs_cand = np.max([3, 1 + np.ceil(np.sqrt(num_targets))])
+ self.num_directs = int(np.min([num_exploratory, num_directs_cand]))
+
+ # search from high alpha
+ alphas = np.linspace(0.1, 1, 20)[::-1]
+
+ for i in range(len(alphas)):
+ # sparse partial correlation matrix
+ parcor = self._calc_parcor(emp_cov, alphas[i])
+ # count number of directly connected variables
+ num_dir_vars = self._count_direct_vars(parcor, idx_tgt)
+ # do we have enough variables?
+ if num_dir_vars >= self.num_directs:
+ print('Converged. alpha: {}, num_dir_vars: {}'.format(alphas[i], num_dir_vars))
+ break
+
+ self.alpha = alphas[i]
+ return parcor
+
+ def _count_direct_vars(self, parcor, idx_tgt):
+ '''
+ Calculate number of directly connected variables to targets.
+ If multiple targets are specified, return the minimum number of connections.
+
+ Inputs:
+ ----------
+ parcor: NumpyArray of shape (X.shape[1], X.shape[1])
+ idx_tgt: list
+
+ Returns:
+ ----------
+ int of number of directly connected variables
+ '''
+ num_directs = []
+ for i in range(len(idx_tgt)):
+ num_directs.append(len(np.where(np.abs(parcor[idx_tgt[i], :]) > 0)[0]))
+ return min(num_directs)
+
+
+def remove_unnecessary_edges(parcor, idx_tgt, num_layers=2):
+ '''
+ Remove unnecessary edges from partial correlation matrix
+
+ Inputs:
+ ----------
+ parcor : 2d NumpyArray
+ partial correlation matrix (num_cols x num_cols)
+
+ idx_tgt : list
+ index list of target column(s)
+
+ num_layers : int, default=2
+ number of layers (not including target variables) to extract
+
+ Returns:
+ ----------
+ adj_mat : 2d NumpyArray which indicate adjacency matrix.
+ This array is strictly upper triangular, and
+ (j, i) th element indicate partial correlation between (j, i).
+ '''
+
+ def extract_connected_idx(parcor, source_idx, from_idx):
+ ''' Extract index of directly connected nodes '''
+ idx_connected = []
+ for i in range(len(source_idx)):
+ idx_cand = np.where(np.abs(parcor[:, source_idx[i]]) > 0)[0]
+ idx_connected.extend(from_idx[idx_cand])
+ idx_connected = np.unique(idx_connected)
+ return idx_connected
+
+ def parcor2adj(parcor, dic_idx, num_layers):
+ ''' Convert partial correlation matrix to adjacency matrix '''
+ # remove all paths inside each layer (including target)
+ adj_mat = parcor.copy()
+ for i in range(num_layers + 1):
+ idx_curr_layer = dic_idx['idx_layer' + str(i)]
+ if len(idx_curr_layer) > 0:
+ adj_mat[np.ix_(idx_curr_layer, idx_curr_layer)] = 0.0
+
+ # remove all path from/to remaining nodes
+ if len(dic_idx['idx_remain']) > 0:
+ adj_mat[dic_idx['idx_remain'], :] = 0.0
+ adj_mat[:, dic_idx['idx_remain']] = 0.0
+
+ # we only need upper triangularelements
+ adj_mat = np.triu(adj_mat)
+ return adj_mat
+
+ # recursively extract directly correlated variables starting from target variables
+ idx_all = np.arange(0, parcor.shape[0], 1)
+ dic_idx = {'idx_all': idx_all,
+ 'idx_layer0': idx_tgt,
+ 'idx_remain': np.setdiff1d(idx_all, idx_tgt)}
+
+ for i in range(num_layers):
+ parcor_ = parcor[dic_idx['idx_remain'], :]
+ prev_layer = 'idx_layer' + str(i)
+ curr_layer = 'idx_layer' + str(i + 1)
+ dic_idx[curr_layer] = extract_connected_idx(parcor_, dic_idx[prev_layer], dic_idx['idx_remain'])
+ dic_idx['idx_remain'] = np.setdiff1d(dic_idx['idx_remain'], dic_idx[curr_layer])
+
+ adj_mat = parcor2adj(parcor, dic_idx, num_layers)
+ return adj_mat
diff --git a/histview2/api/sankey_plot/sankey_glasso/grplasso.py b/histview2/api/sankey_plot/sankey_glasso/grplasso.py
new file mode 100644
index 0000000..687c42e
--- /dev/null
+++ b/histview2/api/sankey_plot/sankey_glasso/grplasso.py
@@ -0,0 +1,474 @@
+import numpy as np
+import colorsys
+from sklearn.linear_model import Ridge
+from sklearn.preprocessing import StandardScaler
+from group_lasso import GroupLasso
+
+# - preprocess_skdpage()
+# # group lasso
+# - calc_coef_and_group_order()
+# - fit_grplasso()
+# - calc_bic()
+# - determine_group_order()
+# - fit_ridge()
+# # generate dict for skd and barchart
+# - GroupSankeyDataProcessor
+
+
+def preprocess_skdpage(X,
+ y,
+ groups: list,
+ colnames_x: list,
+ colname_y: str,
+ penalty_factors=[0.0, 10.0, 20.0, 50.0],
+ max_datapoints=5000,
+ verbose=False):
+ """
+ Main function to generate data for SkD page (with group lasso)
+
+ Parameters
+ ----------
+ X : 2d NumpyArray
+ Explanatory variables.
+ y : 1d NumpyArray
+ Objective variable.
+ groups : list
+ A list of groups assigned to each explanatory variables.
+ Example: ["process0", "process0", "process1", "process2", ...]
+ colnames_x : list
+ A list of sensor names of explanatory variables.
+ colname_y : str
+ A string of the name of objective variable.
+ penalty_factors : list
+ Reguralization factor for group lasso.
+ max_datapoints : int
+ If X.shape[0] exceeds this value, take random samples from X to save time.
+ verbose : True/False
+ If True, print info
+
+ Returns
+ ----------
+ dic_skd : dict
+ A set of data used for sankey diagram
+ dic_bar : dict
+ A set of data used for barchart
+ """
+
+ # prepare group information
+ uniq_grps, idx_grps = np.unique(groups, return_inverse=True)
+ dic_groups = {'colnames_x': colnames_x,
+ 'colname_y': colname_y,
+ 'groups': groups, # group names (raw)
+ 'idx_grps': idx_grps, # group names (int)
+ 'uniq_grps': uniq_grps, # unique group names
+ 'num_grps': len(uniq_grps)} # number of unique groups
+
+ # resample data if exceed max_datapoints
+ if X.shape[0] > max_datapoints:
+ idx = np.random.choice(X.shape[0], size=max_datapoints, replace=False)
+ X = X[idx, :].copy()
+ y = y[idx].copy()
+ if verbose:
+ print("Number of data points exceeded {}. Data is automatically resampled. ".format(max_datapoints))
+
+ # group lasso and ridge regression
+ coef, group_order = calc_coef_and_group_order(X, y, dic_groups, penalty_factors, verbose=verbose)
+
+ # skd data
+ processor = GroupSankeyDataProcessor(coef, dic_groups, group_order, verbose=verbose)
+ dic_skd, dic_bar = processor.gen_dicts()
+ return dic_skd, dic_bar
+
+
+def calc_coef_and_group_order(X, y, dic_groups, penalty_factors=[0, 0.5, 1.0, 5.0], verbose=False):
+ """
+ Calculate connection strength from x to y, and importance order of groups.
+ If only single group is given, just fit with ridge regression.
+
+ Parameters
+ ----------
+ X : 2d NumpyArray
+ Explanatory variables.
+ y : 1d NumpyArray
+ Objective variable.
+ dic_groups : dict
+ A dictionary with group information
+ penalty_factors : list
+ Reguralization factor for group lasso.
+ verbose : True/False
+
+ Returns
+ ----------
+ coef : 1d numpy array
+ regression coefficients.
+ group_order : list
+ A list of order of groups, where less important is on the left.
+ """
+
+ coef = np.zeros(X.shape[1])
+ group_order = np.arange(dic_groups["num_grps"])
+
+ X = StandardScaler().fit_transform(X)
+ y = StandardScaler().fit_transform(y)
+
+ # groups lasso (if 2 or more groups are given)
+ idx_for_ridge = np.arange(X.shape[1])
+ if dic_groups["num_grps"] > 1:
+ # fit group lasso with various penalty factors (no L1 penalty)
+ coef_history, bic = fit_grplasso(X, y.flatten(), dic_groups["idx_grps"], penalty_factors, verbose)
+ # determine order of groups
+ group_order = determine_group_order(coef_history, dic_groups)
+ # use selected columns ffor ridge regression
+ idx_for_ridge = np.where(np.abs(coef_history[np.argmin(bic), :]) > 0.0)[0]
+
+ # re-calculate coefficients with ridge regression
+ if len(idx_for_ridge) == 0:
+ # just in case when all columns are deleted
+ idx_for_ridge = np.arange(X.shape[1])
+ coef[idx_for_ridge] = fit_ridge(X[:, idx_for_ridge], y)
+
+ if verbose:
+ print("==========")
+ print('Group order: {}'.format(dic_groups["uniq_grps"][group_order]))
+ print('Index used for ridge: {}'.format(idx_for_ridge))
+ print('Coef: {}'.format(coef))
+
+ return coef, group_order
+
+
+def fit_grplasso(X, y, grps, penalty_factors=[0.01, 0.1, 1.0, 10.0, 100.0], verbose=False):
+ """
+ Fit group lasso in each penalty factor.
+
+ Parameters
+ ----------
+ X : 2d NumpyArray
+ Explanatory variables.
+ y : 1d NumpyArray
+ Objective variable.
+ grps : 1d NumpyArray
+ Group ID assigned to each explanatory variable.
+ penalty_factors : list
+ Lasso reguralization factor for group lasso.
+
+ Returns
+ ----------
+ coef_history: 2d NumpyArray
+ Regression coefficients. (len(penalty_facotrs) x X.shape[1]).
+ bic : 1d NumpyArray
+ BIC in each penalty_faactor. Smaller the better.
+ """
+
+ bic = np.empty(len(penalty_factors))
+ coef_history = np.empty((len(penalty_factors), X.shape[1]))
+
+ for i, rho in enumerate(penalty_factors):
+ if verbose:
+ print("==========")
+ print("Fitting with penalty: {}".format(rho))
+ gl = GroupLasso(
+ groups=grps,
+ group_reg=rho,
+ l1_reg=0.0,
+ frobenius_lipschitz=False,
+ scale_reg="inverse_group_size",
+ supress_warning=True,
+ n_iter=200,
+ tol=1e-2)
+ gl.fit(X, y)
+ coef_history[i, :] = gl.coef_.flatten()
+ bic[i] = calc_bic(gl.predict(X).flatten(), y, gl.coef_)
+ if verbose:
+ print("BIC={}".format(np.round(bic[i], 2)))
+ print("Number of dropped columns: {}".format(np.sum(coef_history[i, :] == 0.0)))
+
+ return coef_history, bic
+
+
+def calc_bic(y_est, y_true, coef):
+ # calc_bic(mse, sample_size, coef):
+ """
+ Calculate Bayesian Information Criteria (BIC).
+
+ Parameters
+ ----------
+ mse : float
+ Mean square error.
+ sample_size: int
+ Number of data points.
+ coef : 1d NumpyArray
+ Coefficients of linear regression.
+
+ Returns
+ ----------
+ bic : float
+ Calculated BIC. Smaller the better.
+ """
+
+ # from sklearn
+ # https://github.com/scikit-learn/scikit-learn/blob/0d378913b/sklearn/linear_model/_least_angle.py#L1957
+ n_samples = len(y_est)
+ resid = y_est - y_true
+ mean_squared_error = np.mean(resid**2)
+ sigma2 = np.var(y_true)
+ eps64 = np.finfo("float64").eps
+ K = np.log(n_samples)
+ df = np.sum(np.abs(coef) > 0.0)
+ bic = n_samples * mean_squared_error / (sigma2 + eps64) + K * df
+ return bic
+
+
+def determine_group_order(coef_history, dic_groups):
+ """
+ Determine order of groups (from less important to important)
+
+ Parameters
+ ----------
+ coef_history: 2d NumpyArray
+ Regression coefficients. (len(penalty_facotrs) x X.shape[1]).
+ dic_groups : dict
+ A dictionary with group information
+
+ Returns
+ ----------
+ group_order : 1d NumpyArray
+ Order of groups, where less important is on the left.
+ """
+
+ group_order = []
+
+ num_groups = dic_groups["num_grps"]
+ num_penalties = coef_history.shape[0]
+
+ sum_coef_per_groups_old = np.zeros(num_groups)
+ # add to group_order if coefficients shrink to zero
+ for i in range(num_penalties):
+ abs_coef = np.abs(coef_history[i, :])
+ sum_coef_per_groups = np.bincount(dic_groups["idx_grps"], weights=abs_coef)
+ zero_coef_groups = np.where(sum_coef_per_groups == 0)[0]
+ new_zero_coef_groups = np.setdiff1d(zero_coef_groups, group_order)
+ ordered_new_zero_coef_groups = new_zero_coef_groups[np.argsort(sum_coef_per_groups_old[new_zero_coef_groups])]
+ group_order.extend(ordered_new_zero_coef_groups)
+ sum_coef_per_groups_old = sum_coef_per_groups
+
+ # add remaining groups (order by sum of coefficients)
+ if len(group_order) < coef_history.shape[1]:
+ remain_groups = np.setdiff1d(np.arange(num_groups), group_order)
+ idx_sort_desc = np.argsort(sum_coef_per_groups[remain_groups])[::-1]
+ remain_groups = remain_groups[idx_sort_desc]
+ group_order.extend(remain_groups)
+
+ return np.array(group_order)[::-1]
+
+
+def fit_ridge(X, y, alpha=0.1):
+ """
+ Fit ridge regression
+
+ Parameters
+ ----------
+ X : 2d NumpyArray
+ Explanatory variables.
+ y : 1d NumpyArray
+ Objective variable.
+ alpha: float
+ Regularization parameter for L2
+
+ Returns
+ ----------
+ coef : 1d numpy array
+ regression coefficients.
+ """
+
+ model = Ridge(alpha=alpha)
+ model.fit(X, y)
+ coef = model.coef_
+ return coef.flatten()
+
+
+class GroupSankeyDataProcessor():
+ def __init__(self,
+ coef,
+ dic_groups,
+ group_order,
+ color_y="lightgray",
+ color_link_positive='rgba(44, 160, 44, 0.4)', # green-like color
+ color_link_negative='rgba(214, 39, 40, 0.4)', # red-like color
+ limits_sensor={'xmin': 0.20, 'xmax': 0.00, 'ymin': 0.00, 'ymax': 1.00},
+ limits_groups={'xmin': 0.75, 'xmax': 0.40, 'ymin': 0.00, 'ymax': 1.00},
+ verbose=False
+ ):
+
+ # group info and coefficients
+ self.dic_groups = dic_groups
+ self.dic_groups["group_order"] = group_order
+ self.coef_raw = coef
+ self.coef_grps = np.bincount(self.dic_groups["idx_grps"], weights=np.abs(coef))
+ self.idx_grp_remained = np.where(np.abs(self.coef_grps) > 0.0)[0]
+ self.idx_col_remained = np.where(np.abs(coef) > 0.0)[0]
+ self.num_grp_remained = len(self.idx_grp_remained)
+ self.num_col_remained = len(self.idx_col_remained)
+ self.coef_remained = coef[self.idx_col_remained]
+
+ # parameters for visualization
+ self.color_y = color_y
+ self.color_link_positive = color_link_positive
+ self.color_link_negative = color_link_negative
+ self.limits_sensor = limits_sensor
+ self.limits_groups = limits_groups
+ self.verbose = verbose
+
+ def gen_dicts(self):
+ # generate dictionaries for skd and barchart
+ dic_skd = self._gen_sankey_data()
+ dic_bar = self._gen_barchart_data()
+ return dic_skd, dic_bar
+
+ def _gen_sankey_data(self):
+ # Sankey data. node positions are determined by group order.
+ # dictionary for sankey
+ self.dic_skd = {'node_labels': np.hstack([self.dic_groups["colnames_x"][self.idx_col_remained],
+ self.dic_groups["uniq_grps"][self.idx_grp_remained],
+ self.dic_groups["colname_y"]]),
+ 'source': [],
+ 'target': [],
+ 'node_color': [],
+ 'edge_value': [],
+ 'edge_color': []}
+
+ self._add_node_colors()
+ self._add_links_from_x_to_group()
+ self._add_links_from_group_to_y()
+ self._add_node_position()
+ return self.dic_skd
+
+ def _gen_barchart_data(self):
+ # Barchart data. y-axis corresponds to sankey diagram.
+ ord_sort = np.argsort(self.dic_skd['node_y'][:self.num_col_remained])
+ # ord_sort = np.concatenate([ord_sort[:np.sum(self.coef_raw == 0.0)], ord_sort[np.sum(self.coef_raw == 0.0):]])
+ colors = [self.color_link_negative if x < 0 else self.color_link_positive for x in self.coef_remained[ord_sort]]
+ # from IPython.core.debugger import Pdb; Pdb().set_trace()
+ dic_bar = {"coef": self.coef_remained[ord_sort],
+ "sensor_names": self.dic_groups["colnames_x"][self.idx_col_remained][ord_sort],
+ "bar_colors": colors}
+ return dic_bar
+
+ def _add_node_colors(self):
+ # Define node colors (x, groups, y)
+ palette = self._get_N_HexCol(len(self.dic_groups['uniq_grps']))
+ for i in self.idx_col_remained:
+ self.dic_skd["node_color"].append(palette[self._sensor_id_to_group_id(i)])
+ for i in self.idx_grp_remained:
+ self.dic_skd["node_color"].append(palette[i])
+ self.dic_skd["node_color"].append(self.color_y)
+
+ def _get_N_HexCol(self, N=5):
+ # Random color generator
+ # https://stackoverflow.com/questions/876853/generating-color-ranges-in-python
+ HSV_tuples = [(x * 1.0 / N, 0.5, 0.5) for x in range(N)]
+ hex_out = []
+ for rgb in HSV_tuples:
+ rgb = map(lambda x: int(x * 255), colorsys.hsv_to_rgb(*rgb))
+ hex_out.append('#%02x%02x%02x' % tuple(rgb))
+ return hex_out
+
+ def _add_links_from_x_to_group(self):
+ # Add links: x -> groups
+ edge_colors = [self.color_link_positive if x > 0 else self.color_link_negative for x in self.coef_remained]
+ for i in range(self.num_col_remained):
+ self.dic_skd['source'].append(i)
+ self.dic_skd['target'].append(self._sensor_node_id_to_group_node_id(i))
+ self.dic_skd['edge_value'].append(np.abs(self.coef_remained[i]))
+ self.dic_skd['edge_color'].append(edge_colors[i])
+
+ def _add_links_from_group_to_y(self):
+ # Add links: groups -> y
+ for i in range(self.num_grp_remained):
+ self.dic_skd['source'].append(self.num_col_remained + i)
+ self.dic_skd['target'].append(self.num_col_remained + self.num_grp_remained)
+ self.dic_skd['edge_value'].append(self.coef_grps[self.idx_grp_remained[i]])
+ self.dic_skd['edge_color'].append("#696969")
+
+ def _sensor_id_to_group_id(self, sensor_id):
+ group_id = self.dic_groups["idx_grps"][sensor_id]
+ return int(group_id)
+
+ def _sensor_id_to_sensor_node_id(self, sensor_id):
+ node_id = np.where(self.idx_col_remained == sensor_id)[0]
+ return int(node_id)
+
+ def _sensor_node_id_to_group_node_id(self, node_id):
+ group_id = self.dic_groups["idx_grps"][self.idx_col_remained[node_id]]
+ node_id = self._group_id_to_group_node_id(group_id)
+ return int(node_id)
+
+ def _group_id_to_group_node_id(self, group_id):
+ node_id = self.num_col_remained + np.where(self.idx_grp_remained == group_id)[0]
+ return int(node_id)
+
+ def _add_node_position(self):
+ # Add node positon
+ # Node position of groups are generated according to selection process of GroupLASSO.
+ # What makes complicated is that, we have to create a list of length(number of nodes shown in graph).
+ # nodes shown/not shown is determined by edge values.
+ num_nodes = len(self.dic_skd['node_labels'])
+ node_x = np.array([np.nan] * num_nodes)
+ node_y = np.array([np.nan] * num_nodes)
+
+ # generate node positions: groups
+ xvals_grp = np.linspace(self.limits_groups["xmin"], self.limits_groups["xmax"], self.num_grp_remained)
+ wt_groups = self.coef_grps
+ wt_sensor = np.abs(self.coef_raw)
+
+ groups_y = self.limits_groups["ymin"]
+ sensor_y = self.limits_sensor["ymin"]
+ cnt_grp = 0
+ # from IPython.core.debugger import Pdb; Pdb().set_trace()
+ # position of group nodes
+ for grp_idx in self.dic_groups["group_order"]:
+
+ if grp_idx not in self.idx_grp_remained:
+ continue
+
+ grp_name = self.dic_groups["uniq_grps"][grp_idx]
+ node_id = self._group_id_to_group_node_id(grp_idx)
+ node_label = self.dic_skd["node_labels"][node_id]
+ wt = np.max([0.05, wt_groups[grp_idx]])
+
+ node_x[node_id] = xvals_grp[cnt_grp]
+ node_y[node_id] = groups_y + (wt / 2)
+ groups_y += wt
+
+ # position of sensor nodes
+ idx_sensors_in_group = self.idx_col_remained[self.dic_groups['idx_grps'][self.idx_col_remained] == grp_idx]
+ for j in idx_sensors_in_group:
+ wt = wt_sensor[j]
+ if self.verbose:
+ print("j={}, name={}, wt={}".format(j, self.dic_skd["node_labels"][j], wt))
+ node_x[self._sensor_id_to_sensor_node_id(j)] = 0.05
+ node_y[self._sensor_id_to_sensor_node_id(j)] = sensor_y + (wt / 2)
+ sensor_y += wt
+ cnt_grp += 1
+
+ # normalize y positions
+ node_groups_on_graph = [self._group_id_to_group_node_id(x) for x in self.idx_grp_remained]
+ node_sensor_on_graph = [self._sensor_id_to_sensor_node_id(x) for x in self.idx_col_remained]
+ node_y[node_groups_on_graph] = node_y[node_groups_on_graph] / np.max(node_y[node_groups_on_graph])
+ node_y[node_sensor_on_graph] = node_y[node_sensor_on_graph] / np.max(node_y[node_sensor_on_graph])
+ if self.num_grp_remained == 1:
+ node_y[node_groups_on_graph] = 0.5
+ node_x[node_groups_on_graph] = 0.5
+
+ # position of objective variable
+ node_x[-1] = 0.90
+ node_y[-1] = 0.50
+ self.dic_skd['node_x'] = node_x
+ self.dic_skd['node_y'] = node_y
+
+ if self.verbose:
+ print('num_nodes: {}'.format(num_nodes))
+ print("Node x, y positions in Skd:\n{}".format(
+ np.vstack([self.dic_skd["node_labels"],
+ np.round(node_x, 2),
+ np.round(node_y, 2)]).T))
+
diff --git a/histview2/api/sankey_plot/sankey_glasso/sankey_services.py b/histview2/api/sankey_plot/sankey_glasso/sankey_services.py
new file mode 100644
index 0000000..88a843d
--- /dev/null
+++ b/histview2/api/sankey_plot/sankey_glasso/sankey_services.py
@@ -0,0 +1,287 @@
+# GraphicalLASSO is implemented in glasso.py
+# Let us define 2 more functions here
+from collections import defaultdict
+
+import numpy as np
+import pandas as pd
+
+from histview2.api.sankey_plot.sankey_glasso import glasso
+from histview2.api.sankey_plot.sankey_glasso.grplasso import preprocess_skdpage
+from histview2.api.trace_data.services.time_series_chart import get_procs_in_dic_param, get_data_from_db, \
+ main_check_filter_detail_match_graph_data
+from histview2.common.common_utils import gen_sql_label, zero_variance
+from histview2.common.constants import *
+from histview2.common.logger import log_execution_time
+from histview2.common.memoize import memoize
+from histview2.common.services.form_env import bind_dic_param_to_class
+from histview2.common.trace_data_log import TraceErrKey, EventType, EventAction, Target, trace_log
+from histview2.setting_module.models import CfgProcessColumn
+
+colors = [
+ '#ec654a', '#915558', '#02c39a', '#c3b281',
+ '#f15bb5', '#e3c20b', '#ab2f1e', '#024381',
+ '#750704', '#0d639d', '#db1168', '#4895ef',
+ '#00f5d4', '#037003', '#510ca0', '#ff7aa2',
+ '#985d02', '#642902', '#70e000', '#9c5fea',
+]
+
+
+@log_execution_time()
+def gen_sankeydata_from_adj(adj_mat):
+ '''
+ Helper function to generate source/target/value/color lists from adjacency matrix
+
+ Inputs:
+ ----------
+ adj_mat: NumpyArray of shape (num of sensors, num of sensors)
+ Adjacency matrix. This matrix is strictly upper triangular, and
+ (j, i) th element indicate partial correlation between (j, i)
+
+ Returns:
+ ----------
+ see preprocess_for_sankey_glasso()
+ '''
+
+ # define colors for positive/negative correlation
+ color_positive = 'rgba(44, 160, 44, 0.2)' # green-like color
+ color_negative = 'rgba(214, 39, 40, 0.2)' # red-like color
+
+ d = adj_mat.shape[0]
+ source = []
+ target = []
+ value = []
+ color = []
+ for i in range(d):
+ for j in range(d):
+ source.append(i)
+ target.append(j)
+ value.append(np.abs(adj_mat[j, i])) # sankey can not handle negative valued links
+ color.append(color_positive if adj_mat[j, i] > 0 else color_negative)
+ return source, target, value, color
+
+
+@log_execution_time()
+def preprocess_for_sankey_glasso(X, idx_target, num_layers=2):
+ '''
+ Preprocessing for Sankey Diagrams
+ Generate source/target/value/color lists from given sensor data
+
+ Inputs:
+ ----------
+ X: NumpyArray or pandas dataframe of shape (num of records, num of sensors)
+ sensor data
+
+ idx_target: list of integers
+ column index of target sensor(s)
+
+ num_layers: integer of greater than 0. default=1
+ maximum number of paths from target column(s)
+
+ Returns:
+ ----------
+ lists of integers to pass to sankey diagram (sankey diagram)
+ source, target, value, color
+ '''
+
+ # generate instance and fit glasso (large alpha returns more sparse result)
+ ggm = glasso.GaussianGraphicalModel()
+ ggm.fit(X, idx_target)
+
+ # extract column index of (target/direct/indirect) sensors,
+ # and remove unnecessary edges
+ adj_mat = glasso.remove_unnecessary_edges(ggm.parcor, idx_target, num_layers)
+
+ # convert data to pass to sankey
+ source, target, value, color = gen_sankeydata_from_adj(adj_mat)
+ return source, target, value, color
+
+
+@log_execution_time()
+@trace_log((TraceErrKey.TYPE, TraceErrKey.ACTION, TraceErrKey.TARGET),
+ (EventType.SKD, EventAction.PLOT, Target.GRAPH), send_ga=True)
+@memoize(is_save_file=True)
+def gen_graph_sankey_group_lasso(dic_param):
+ """tracing data to show graph
+ 1 start point x n end point
+ filter by condition point
+ https://files.slack.com/files-pri/TJHPR9BN3-F01GG67J84C/image.pngnts that between start point and end_point
+ """
+ # bind dic_param
+ graph_param = bind_dic_param_to_class(dic_param)
+
+ dic_proc_cfgs = get_procs_in_dic_param(graph_param)
+
+ # add start proc
+ graph_param.add_start_proc_to_array_formval()
+
+ # add category
+ graph_param.add_cate_procs_to_array_formval()
+
+ # add condition procs
+ graph_param.add_cond_procs_to_array_formval()
+
+ # get serials
+ for proc in graph_param.array_formval:
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ serial_ids = [serial.id for serial in proc_cfg.get_serials(column_name_only=False)]
+ proc.add_cols(serial_ids)
+
+ # get data from database
+ df, actual_record_number, is_res_limited = get_data_from_db(graph_param)
+
+ # check filter match or not ( for GUI show )
+ matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids = main_check_filter_detail_match_graph_data(
+ graph_param, df)
+
+ # matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids
+ dic_param[MATCHED_FILTER_IDS] = matched_filter_ids
+ dic_param[UNMATCHED_FILTER_IDS] = unmatched_filter_ids
+ dic_param[NOT_EXACT_MATCH_FILTER_IDS] = not_exact_match_filter_ids
+
+ # sensors
+ dic_param['plotly_data'] = gen_plotly_data(dic_skd={}, dic_bar={})
+ if not df.empty:
+ orig_graph_param = bind_dic_param_to_class(dic_param)
+ dic_label_id, dic_id_name, dic_col_proc_id = get_sensors_objective_explanation(orig_graph_param)
+ df_sensors: pd.DataFrame = df[dic_label_id]
+ df_sensors = df_sensors.rename(columns=dic_label_id)
+ df_sensors, data_clean, errors = clean_input_data(df_sensors)
+ if data_clean and not errors:
+ # prepare column names and process names
+ y_id = graph_param.common.objective_var
+ y_col = (y_id, dic_id_name[y_id])
+ x_cols = {key: val for key, val in dic_id_name.items() if key != y_id}
+ groups = [dic_proc_cfgs.get(proc_id).name for key, proc_id in dic_col_proc_id.items() if key != y_id]
+
+ dic_skd, dic_bar = gen_sankey_grouplasso_plot_data(df_sensors, x_cols, y_col, groups)
+ dic_param['plotly_data'] = gen_plotly_data(dic_skd, dic_bar)
+ if errors:
+ dic_param['errors'] = errors
+
+ dic_param[DATA_SIZE] = df.memory_usage(deep=True).sum()
+ dic_param[IS_RES_LIMITED] = is_res_limited
+ dic_param[ACTUAL_RECORD_NUMBER] = actual_record_number
+
+ return dic_param
+
+
+def gen_sensor_headers(orig_graph_param):
+ target_sensor_ids = []
+ for proc in orig_graph_param.array_formval:
+ target_sensor_ids.extend(proc.col_ids)
+ return target_sensor_ids
+
+
+@log_execution_time()
+def clean_input_data(df: pd.DataFrame):
+ df = df.replace(dict.fromkeys([np.inf, -np.inf, np.nan], np.nan)).dropna(how='any')
+ print('shape: {}'.format(df.shape))
+ data_clean = True
+ errors = []
+ if zero_variance(df):
+ data_clean = False
+ errors.append(ErrorMsg.E_ZERO_VARIANCE.name)
+ if df.empty:
+ data_clean = False
+ errors.append(ErrorMsg.E_ALL_NA.name)
+
+ return df, data_clean, errors
+
+
+@log_execution_time()
+def gen_sankey_grouplasso_plot_data(df: pd.DataFrame, x_cols, y_col, groups):
+ # names
+ y_col_id, y_col_name = y_col
+ x_col_names = np.array(list(x_cols.values()))
+
+ # Inputs
+ x_2d = df[x_cols].values
+ y_1d = df[[y_col_id]].values
+
+ # please set verbose=False if info should not be printed
+ dic_skd, dic_bar = preprocess_skdpage(x_2d, y_1d, groups, x_col_names, y_col_name,
+ penalty_factors=[0.0, 0.1, 0.3, 1.0],
+ max_datapoints=10000,
+ verbose=True)
+
+ return dic_skd, dic_bar
+
+
+def gen_plotly_data(dic_skd: dict, dic_bar: dict):
+ return dict(
+ sankey_trace=plot_sankey_grplasso(defaultdict(list, dic_skd)),
+ bar_trace=plot_barchart_grplasso(defaultdict(list, dic_bar)),
+ )
+
+
+def plot_sankey_grplasso(dic_skd: defaultdict):
+ sankey_trace = dict(arrangement="snap",
+ node=dict(
+ pad=20,
+ thickness=20,
+ label=dic_skd["node_labels"],
+ color=dic_skd["node_color"],
+ x=dic_skd["node_x"],
+ y=dic_skd["node_y"]
+ ),
+ link=dict(
+ source=dic_skd["source"],
+ target=dic_skd["target"],
+ value=dic_skd["edge_value"],
+ color=dic_skd["edge_color"]
+ ))
+ return sankey_trace
+
+
+def plot_barchart_grplasso(dic_bar: defaultdict):
+ bar_trace = dict(
+ y=dic_bar["sensor_names"],
+ x=np.abs(dic_bar["coef"]),
+ name=None,
+ orientation="h",
+ marker_color=dic_bar["bar_colors"],
+ hovertemplate="%{text}",
+ text=np.round(dic_bar["coef"], 5)
+ )
+ return bar_trace
+
+
+@log_execution_time()
+def gen_sankey_plot_data(x: pd.DataFrame, idx_tgt, num_layers, dic_label_id, dic_proc_cfgs, target_proc):
+ # preprocess
+ source, target, value, color = preprocess_for_sankey_glasso(x, idx_tgt, num_layers)
+
+ # sensor names are also required for sankey diagram
+ col_ids = [dic_label_id.get(c) for c in x.columns.values]
+ cols = CfgProcessColumn.get_by_ids(col_ids) or []
+ dic_cols = {col.id: '{} | {}'.format(dic_proc_cfgs.get(col.process_id).name, col.name) for col in cols}
+ node_labels = [dic_cols.get(col_id) for col_id in col_ids]
+ dic_proc_color = {}
+
+ for idx, proc_id in enumerate(dic_proc_cfgs.keys()):
+ dic_proc_color[proc_id] = SKD_TARGET_PROC_CLR if (proc_id == target_proc) else colors[idx % len(colors)]
+ dic_col_color = {col.id: dic_proc_color.get(col.process_id) for col in cols}
+ node_colors = [dic_col_color.get(col_id) for col_id in col_ids]
+
+ return {
+ 'source': source,
+ 'target': target,
+ 'value': value,
+ 'color': color,
+ 'node_labels': node_labels,
+ 'node_colors': node_colors,
+ }
+
+
+def get_sensors_objective_explanation(orig_graph_param):
+ dic_label_id = {}
+ dic_id_name = {}
+ dic_col_proc_id = {}
+ for proc in orig_graph_param.array_formval:
+ for col_id, col_name in zip(proc.col_ids, proc.col_names):
+ label = gen_sql_label(col_id, col_name)
+ dic_label_id[label] = col_id
+ dic_id_name[col_id] = col_name
+ dic_col_proc_id[col_id] = proc.proc_id
+
+ return dic_label_id, dic_id_name, dic_col_proc_id
diff --git a/histview2/api/sankey_plot/services.py b/histview2/api/sankey_plot/services.py
new file mode 100644
index 0000000..e69de29
diff --git a/histview2/api/scatter_plot/controllers.py b/histview2/api/scatter_plot/controllers.py
new file mode 100644
index 0000000..80b5448
--- /dev/null
+++ b/histview2/api/scatter_plot/controllers.py
@@ -0,0 +1,55 @@
+import timeit
+
+import simplejson
+from flask import Blueprint, request
+
+from histview2.api.scatter_plot.services import gen_scatter_plot
+from histview2.common.pysize import get_size
+from histview2.common.services import http_content
+from histview2.common.services.form_env import parse_multi_filter_into_one
+from histview2.common.services.import_export_config_n_data import get_dic_form_from_debug_info, \
+ set_export_dataset_id_to_dic_param
+from histview2.common.trace_data_log import is_send_google_analytics, save_input_data_to_file, EventType
+
+api_scatter_blueprint = Blueprint(
+ 'api_scatter_module',
+ __name__,
+ url_prefix='/histview2/api/scp'
+)
+
+
+@api_scatter_blueprint.route('/plot', methods=['POST'])
+def trace_data():
+ """
+ Trace Data API
+ return dictionary
+ """
+ start = timeit.default_timer()
+ dic_form = request.form.to_dict(flat=False)
+ save_input_data_to_file(dic_form, EventType.SCP)
+ dic_param = parse_multi_filter_into_one(dic_form)
+
+ # check if we run debug mode (import mode)
+ dic_param = get_dic_form_from_debug_info(dic_param)
+
+ # if universal call gen_dframe else gen_results
+ orig_send_ga_flg = is_send_google_analytics
+ dic_param = gen_scatter_plot(dic_param)
+
+ # send Google Analytics changed flag
+ if orig_send_ga_flg and not is_send_google_analytics:
+ dic_param.update({'is_send_ga_off': True})
+
+ # calculate data size to send gtag
+ data_size = get_size(dic_param)
+ dic_param['data_size'] = data_size
+
+ stop = timeit.default_timer()
+ dic_param['backend_time'] = stop - start
+
+ # export mode ( output for export mode )
+ set_export_dataset_id_to_dic_param(dic_param)
+
+ out_dict = simplejson.dumps(dic_param, ensure_ascii=False, default=http_content.json_serial, ignore_nan=True)
+
+ return out_dict, 200
diff --git a/histview2/api/scatter_plot/services.py b/histview2/api/scatter_plot/services.py
new file mode 100644
index 0000000..ae9b378
--- /dev/null
+++ b/histview2/api/scatter_plot/services.py
@@ -0,0 +1,1101 @@
+import math
+import re
+from collections import Counter
+from copy import deepcopy
+from typing import List
+
+import numpy as np
+import pandas as pd
+from numpy import matrix
+from pandas import Series, RangeIndex, Index
+
+from histview2.api.categorical_plot.services import produce_cyclic_terms, gen_dic_param_terms, gen_time_conditions
+from histview2.api.trace_data.services.time_series_chart import (get_data_from_db,
+ main_check_filter_detail_match_graph_data,
+ calc_raw_common_scale_y,
+ calc_scale_info, get_procs_in_dic_param,
+ gen_unique_data, filter_df,
+ customize_dic_param_for_reuse_cache,
+ get_chart_info_detail)
+from histview2.common.common_utils import gen_sql_label
+from histview2.common.constants import ACTUAL_RECORD_NUMBER, \
+ IS_RES_LIMITED, ARRAY_Y, MATCHED_FILTER_IDS, UNMATCHED_FILTER_IDS, NOT_EXACT_MATCH_FILTER_IDS, ARRAY_X, \
+ TIMES, COLORS, H_LABEL, V_LABEL, DataType, CHART_TYPE, CYCLIC_DIV_NUM, COMMON, START_DATE, \
+ START_TM, END_DATE, END_TM, ELAPSED_TIME, ARRAY_Z, ChartType, SCALE_COLOR, END_COL_ID, END_PROC_ID, \
+ SCALE_COMMON, SCALE_THRESHOLD, SCALE_AUTO, SCALE_FULL, SCALE_Y, SCALE_X, TIME_MIN, TIME_MAX, \
+ ORIG_ARRAY_Z, SUMMARIES, N_TOTAL, UNIQUE_CATEGORIES, UNIQUE_DIV, UNIQUE_COLOR, CAT_EXP_BOX, X_THRESHOLD, \
+ Y_THRESHOLD, SCALE_SETTING, \
+ CHART_INFOS, X_SERIAL, Y_SERIAL, ARRAY_PLOTDATA, IS_DATA_LIMITED, ColorOrder, TIME_NUMBERINGS, SORT_KEY, \
+ VAR_TRACE_TIME
+from histview2.common.memoize import memoize
+from histview2.common.services.form_env import bind_dic_param_to_class
+from histview2.common.services.sse import notify_progress
+from histview2.common.services.statistics import calc_summary_elements
+from histview2.common.trace_data_log import *
+from histview2.setting_module.models import CfgProcessColumn
+from histview2.trace_data.models import Cycle
+
+DATA_COUNT_COL = '__data_count_col__'
+MATRIX = 7
+SCATTER_PLOT_MAX_POINT = 500_000
+HEATMAP_COL_ROW = 100
+TOTAL_VIOLIN_PLOT = 200
+
+
+@log_execution_time('[SCATTER PLOT]')
+@notify_progress(60)
+@trace_log((TraceErrKey.TYPE, TraceErrKey.ACTION, TraceErrKey.TARGET),
+ (EventType.SCP, EventAction.PLOT, Target.GRAPH), send_ga=True)
+@memoize(is_save_file=True)
+def gen_scatter_plot(dic_param):
+ """tracing data to show graph
+ 1 start point x n end point
+ filter by condition points that between start point and end_point
+ """
+ recent_flg = False
+ for key in dic_param[COMMON]:
+ if str(key).startswith(VAR_TRACE_TIME):
+ if dic_param[COMMON][key] == 'recent':
+ recent_flg = True
+ break
+
+ is_data_limited = False
+ # for caching
+ dic_param, cat_exp, _, dic_cat_filters, use_expired_cache, temp_serial_column, temp_serial_order, *_, matrix_col, \
+ color_order = customize_dic_param_for_reuse_cache(dic_param)
+ matrix_col = matrix_col if matrix_col else MATRIX
+
+ # cyclic
+ terms = None
+ if dic_param[COMMON].get(CYCLIC_DIV_NUM):
+ produce_cyclic_terms(dic_param)
+ terms = gen_dic_param_terms(dic_param)
+
+ # get x,y,color, levels, cat_div information
+ orig_graph_param = bind_dic_param_to_class(dic_param)
+ threshold_filter_detail_ids = orig_graph_param.common.threshold_boxes
+ dic_proc_cfgs = get_procs_in_dic_param(orig_graph_param)
+ scatter_xy_ids = []
+ scatter_xy_names = []
+ scatter_proc_ids = []
+ for proc in orig_graph_param.array_formval:
+ scatter_proc_ids.append(proc.proc_id)
+ scatter_xy_ids = scatter_xy_ids + proc.col_ids
+ scatter_xy_names = scatter_xy_names + proc.col_names
+
+ x_proc_id = scatter_proc_ids[0]
+ y_proc_id = scatter_proc_ids[-1]
+ x_id = scatter_xy_ids[0]
+ y_id = scatter_xy_ids[-1]
+ x_name = scatter_xy_names[0]
+ y_name = scatter_xy_names[-1]
+ x_label = gen_sql_label(x_id, x_name)
+ y_label = gen_sql_label(y_id, y_name)
+
+ color_id = orig_graph_param.common.color_var
+ cat_div_id = orig_graph_param.common.div_by_cat
+ level_ids = cat_exp if cat_exp else orig_graph_param.common.cat_exp
+ col_ids = [col for col in list(set([x_id, y_id, color_id, cat_div_id] + level_ids)) if col]
+ dic_cols = {cfg_col.id: cfg_col for cfg_col in CfgProcessColumn.get_by_ids(col_ids)}
+
+ color_label = gen_sql_label(color_id, dic_cols[color_id].column_name) if color_id else None
+ level_labels = [gen_sql_label(id, dic_cols[id].column_name) for id in level_ids]
+ cat_div_label = gen_sql_label(cat_div_id, dic_cols[cat_div_id].column_name) if cat_div_id else None
+ if orig_graph_param.common.compare_type == 'directTerm':
+ matched_filter_ids = []
+ unmatched_filter_ids = []
+ not_exact_match_filter_ids = []
+ actual_record_number = 0
+ is_res_limited = False
+
+ dic_dfs = {}
+ terms = gen_time_conditions(dic_param)
+ df = None
+ for term in terms:
+ # create dic_param for each term from original dic_param
+ term_dic_param = deepcopy(dic_param)
+ term_dic_param[COMMON][START_DATE] = term[START_DATE]
+ term_dic_param[COMMON][START_TM] = term[START_TM]
+ term_dic_param[COMMON][END_DATE] = term[END_DATE]
+ term_dic_param[COMMON][END_TM] = term[END_TM]
+ h_keys = (term[START_DATE], term[START_TM], term[END_DATE], term[END_TM])
+
+ # query data and gen df
+ df_term, graph_param, record_number, _is_res_limited = gen_df(term_dic_param,
+ _use_expired_cache=use_expired_cache)
+ if df is None:
+ df = df_term.copy()
+ else:
+ df = pd.concat([df, df_term])
+
+ # filter list
+ df_term = filter_df(df_term, dic_cat_filters)
+
+ if _is_res_limited:
+ is_res_limited = _is_res_limited
+
+ # check filter match or not ( for GUI show )
+ filter_ids = main_check_filter_detail_match_graph_data(graph_param, df_term)
+
+ # matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids, actual records
+ actual_record_number += record_number
+ matched_filter_ids += filter_ids[0]
+ unmatched_filter_ids += filter_ids[1]
+ not_exact_match_filter_ids += filter_ids[2]
+
+ dic_dfs[h_keys] = df_term
+
+ # gen scatters
+ output_graphs, output_times = gen_scatter_by_direct_term(matrix_col, dic_dfs, x_proc_id, y_proc_id, x_label,
+ y_label, color_label, level_labels)
+ else:
+ # query data and gen df
+ df, graph_param, actual_record_number, is_res_limited = gen_df(dic_param,
+ _use_expired_cache=use_expired_cache)
+
+ # filter list
+ df_sub = filter_df(df, dic_cat_filters)
+
+ # check filter match or not ( for GUI show )
+ matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids = \
+ main_check_filter_detail_match_graph_data(graph_param, df_sub)
+
+ if orig_graph_param.common.div_by_data_number:
+ output_graphs, output_times = gen_scatter_data_count(matrix_col, df_sub, x_proc_id, y_proc_id, x_label,
+ y_label, orig_graph_param.common.div_by_data_number,
+ color_label, level_labels, recent_flg)
+ elif orig_graph_param.common.cyclic_div_num:
+ output_graphs, output_times = gen_scatter_by_cyclic(matrix_col, df_sub, x_proc_id, y_proc_id, x_label,
+ y_label, terms, color_label, level_labels)
+ else:
+ output_graphs, output_times = gen_scatter_cat_div(matrix_col, df_sub, x_proc_id, y_proc_id, x_label,
+ y_label, cat_div_label, color_label, level_labels)
+
+ # check graphs
+ if not output_graphs:
+ return dic_param
+
+ # chart type
+ series_keys = [ARRAY_X, ARRAY_Y, COLORS, TIMES]
+ chart_type = get_chart_type(x_id, y_id, dic_cols)
+ dic_param[CHART_TYPE] = chart_type
+ if chart_type == ChartType.HEATMAP.value:
+ # get unique data
+ dic_unique_cate = gen_unique_data(df, dic_proc_cfgs, [id for id in (x_id, y_id) if id])
+
+ # get color for filter
+ dic_unique_color = gen_unique_data(df, dic_proc_cfgs, [])
+
+ # get div for filter
+ dic_unique_div = gen_unique_data(df, dic_proc_cfgs, [id for id in [cat_div_id] if id])
+
+ # gen matrix
+ all_x, all_y = get_heatmap_distinct(output_graphs)
+ for graph in output_graphs:
+ # handle x, y, z data
+ array_x = graph[ARRAY_X]
+ array_y = graph[ARRAY_Y]
+ unique_x = set(array_x.drop_duplicates().tolist())
+ unique_y = set(array_y.drop_duplicates().tolist())
+
+ missing_x = all_x - unique_x
+ missing_y = all_y - unique_y
+ array_z = pd.crosstab(array_y, array_x)
+ for key in missing_x:
+ array_z[key] = None
+
+ sorted_cols = sorted(array_z.columns)
+ array_z = array_z[sorted_cols]
+
+ missing_data = [None] * len(missing_y)
+ df_missing = pd.DataFrame({col: missing_data for col in array_z.columns}, index=missing_y)
+ array_z = pd.concat([array_z, df_missing])
+ array_z.sort_index(inplace=True)
+
+ # limit 10K cells
+ if array_z.size > HEATMAP_COL_ROW * HEATMAP_COL_ROW:
+ array_z = array_z[:HEATMAP_COL_ROW][array_z.columns[:HEATMAP_COL_ROW]]
+
+ graph[ARRAY_X] = array_z.columns
+ graph[ARRAY_Y] = array_z.index
+ graph[ORIG_ARRAY_Z] = matrix(array_z)
+
+ # ratio
+ z_count = len(array_x)
+ array_z = array_z * 100 // z_count
+ graph[ARRAY_Z] = matrix(array_z)
+
+ # reduce sending data to browser
+ graph[COLORS] = []
+ graph[X_SERIAL] = []
+ graph[Y_SERIAL] = []
+ graph[TIMES] = []
+ graph[ELAPSED_TIME] = []
+
+ elif chart_type == ChartType.SCATTER.value:
+ # get unique data
+ dic_unique_cate = gen_unique_data(df, dic_proc_cfgs, [])
+
+ # get color for filter
+ dic_unique_color = gen_unique_data(df, dic_proc_cfgs, [id for id in {color_id} if id])
+
+ # get div for filter
+ dic_unique_div = gen_unique_data(df, dic_proc_cfgs, [id for id in {cat_div_id} if id])
+
+ # gen scatter matrix
+ data_per_graph = SCATTER_PLOT_MAX_POINT / len(output_graphs)
+ color_scale = []
+ for graph, (x_times, y_times) in zip(output_graphs, output_times):
+ # limit and sort by color
+ df_graph, _is_data_limited = gen_df_limit_data(graph, series_keys, data_per_graph)
+ if _is_data_limited:
+ is_data_limited = True
+
+ # sort by color (high frequency first)
+ color_col = ELAPSED_TIME
+ df_graph[color_col] = calc_elapsed_times(df_graph, TIMES)
+ if color_order is ColorOrder.DATA:
+ color_col = COLORS if COLORS in df_graph else ARRAY_X
+ df_graph = sort_df(df_graph, [color_col])
+ elif color_order is ColorOrder.TIME:
+ color_col = TIME_NUMBERINGS
+ df_graph[color_col] = pd.to_datetime(df_graph[TIMES]).rank().convert_dtypes()
+
+ color_scale += df_graph[color_col].tolist()
+
+ # group by and count frequency
+ df_graph['__count__'] = df_graph.groupby(color_col)[color_col].transform('count')
+ df_graph.sort_values('__count__', inplace=True, ascending=False)
+ df_graph.drop('__count__', axis=1, inplace=True)
+
+ for key in df_graph.columns:
+ graph[key] = df_graph[key].tolist()
+
+ # chart infos
+ x_chart_infos, _ = get_chart_info_detail(x_times or graph[TIMES], x_id, threshold_filter_detail_ids)
+ graph[X_THRESHOLD] = x_chart_infos[-1] if x_chart_infos else None
+ y_chart_infos, _ = get_chart_info_detail(y_times or graph[TIMES], y_id, threshold_filter_detail_ids)
+ graph[Y_THRESHOLD] = y_chart_infos[-1] if y_chart_infos else None
+ else:
+ group_by_cols = []
+ unique_data_cols = [cat_div_id, color_id]
+ if DataType[dic_cols[x_id].data_type] is DataType.TEXT:
+ str_col = ARRAY_X
+ number_col = ARRAY_Y
+ group_by_cols.append(str_col)
+ unique_data_cols.append(x_id)
+ dic_param['string_axis'] = 'x'
+ if color_id and color_id != x_id:
+ group_by_cols.append(COLORS)
+ else:
+ str_col = ARRAY_Y
+ number_col = ARRAY_X
+ group_by_cols.append(str_col)
+ unique_data_cols.append(x_id)
+ dic_param['string_axis'] = 'y'
+ if color_id and color_id != y_id:
+ group_by_cols.append(COLORS)
+
+ number_of_graph = min(len(output_graphs), matrix_col ** 2) or 1
+ limit_violin_per_graph = math.floor(TOTAL_VIOLIN_PLOT / number_of_graph)
+ most_vals, is_reduce_violin_number = get_most_common_in_graphs(output_graphs, group_by_cols,
+ limit_violin_per_graph)
+ number_of_violin = (len(most_vals) * number_of_graph) or 1
+ max_n_per_violin = math.floor(10_000 / number_of_violin)
+
+ # for show message reduced number of violin chart
+ dic_param['is_reduce_violin_number'] = is_reduce_violin_number
+
+ # get unique data
+ dic_unique_cate = gen_unique_data(df, dic_proc_cfgs, [])
+
+ # get color for filter
+ dic_unique_color = gen_unique_data(df, dic_proc_cfgs, [id for id in {color_id} if id])
+
+ # get div for filter
+ dic_unique_div = gen_unique_data(df, dic_proc_cfgs, [id for id in {cat_div_id} if id])
+
+ # gen violin data
+ for graph, (x_times, y_times) in zip(output_graphs, output_times):
+ # limit and sort by color
+ df_graph, _is_data_limited = gen_df_limit_data(graph, series_keys)
+ if _is_data_limited:
+ is_data_limited = True
+
+ df_graph = filter_violin_df(df_graph, group_by_cols, most_vals)
+ df_graph = sort_df(df_graph, group_by_cols)
+
+ # get hover information
+ dic_summaries = {}
+ str_col_vals = []
+ num_col_vals = []
+ for key, df_sub in df_graph.groupby(group_by_cols):
+ if isinstance(key, (list, tuple)):
+ key = '|'.join(key)
+
+ vals = df_sub[number_col].tolist()
+ dic_summaries[key] = calc_summary_elements(
+ {ARRAY_X: df_sub[TIMES].tolist(), ARRAY_Y: vals})
+
+ # resample_data = df_sub[number_col]
+ # resample_data = resample_by_sort(df_sub[number_col], max_n_per_violin)
+ resample_data = resample_preserve_min_med_max(df_sub[number_col], max_n_per_violin)
+ # todo: remove q2 computing after demonstration
+ # if df_sub[number_col].size:
+ # q2_raw_data = np.quantile(df_sub[number_col], [0.5])
+ # q2_new_data = np.quantile(resample_data, [0.5])
+ # q2_old_data = np.quantile(resample_data_old, [0.5])
+
+ if resample_data is not None:
+ vals = resample_data.tolist()
+ is_data_limited = True
+
+ str_col_vals.append(key)
+ num_col_vals.append(vals)
+
+ graph[str_col] = str_col_vals
+ graph[number_col] = num_col_vals
+ graph[SUMMARIES] = dic_summaries
+
+ # reduce sending data to browser
+ graph[COLORS] = []
+ graph[X_SERIAL] = []
+ graph[Y_SERIAL] = []
+ graph[TIMES] = []
+ graph[ELAPSED_TIME] = []
+
+ if number_col == ARRAY_X:
+ x_chart_infos, _ = get_chart_info_detail(x_times or graph[TIMES], x_id, threshold_filter_detail_ids)
+ graph[X_THRESHOLD] = x_chart_infos[-1] if x_chart_infos else None
+ else:
+ y_chart_infos, _ = get_chart_info_detail(y_times or graph[TIMES], y_id, threshold_filter_detail_ids)
+ graph[Y_THRESHOLD] = y_chart_infos[-1] if y_chart_infos else None
+
+ # TODO : we should calc box plot and kde before send to front end to improve performance
+
+ # matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids
+ dic_param[MATCHED_FILTER_IDS] = matched_filter_ids
+ dic_param[UNMATCHED_FILTER_IDS] = unmatched_filter_ids
+ dic_param[NOT_EXACT_MATCH_FILTER_IDS] = not_exact_match_filter_ids
+
+ # flag to show that trace result was limited
+ dic_param[ACTUAL_RECORD_NUMBER] = actual_record_number
+ dic_param[IS_RES_LIMITED] = is_res_limited
+
+ # check show col and row labels
+ is_show_h_label = False
+ is_show_v_label = False
+ is_show_first_h_label = False
+ v_labels = list({graph[V_LABEL] for graph in output_graphs})
+ h_labels = list({graph[H_LABEL] for graph in output_graphs})
+ if len(h_labels) > 1 or (h_labels and h_labels[0]):
+ is_show_h_label = True
+
+ if len(output_graphs) > len(h_labels):
+ is_show_first_h_label = True
+
+ if len(v_labels) > 1 or (v_labels and v_labels[0]):
+ is_show_v_label = True
+
+ # column names
+ dic_param['x_name'] = dic_cols[x_id].name if x_id else None
+ dic_param['y_name'] = dic_cols[y_id].name if y_id else None
+ dic_param['color_name'] = dic_cols[color_id].name if color_id else None
+ dic_param['color_type'] = dic_cols[color_id].data_type if color_id else None
+ dic_param['div_name'] = dic_cols[cat_div_id].name if cat_div_id else None
+ dic_param['div_data_type'] = dic_cols[cat_div_id].data_type if cat_div_id else None
+ dic_param['level_names'] = [dic_cols[level_id].name for level_id in level_ids] if level_ids else None
+ dic_param['is_show_v_label'] = is_show_v_label
+ dic_param['is_show_h_label'] = is_show_h_label
+ dic_param['is_show_first_h_label'] = is_show_first_h_label
+ dic_param['is_filtered'] = True if dic_cat_filters else False
+
+ # add proc name for x and y column
+ dic_param['x_proc'] = dic_proc_cfgs[dic_cols[x_id].process_id].name if x_id else None
+ dic_param['y_proc'] = dic_proc_cfgs[dic_cols[y_id].process_id].name if y_id else None
+
+ # min, max color
+ # TODO: maybe we need to get chart infor for color to get ymax ymin of all chart infos
+ if color_order is ColorOrder.DATA:
+ dic_scale_color = calc_scale(df, color_id, color_label, dic_cols)
+ else:
+ df_color_scale = pd.DataFrame({color_col: color_scale})
+ dic_scale_color = calc_scale(df_color_scale, None, color_col, dic_cols)
+
+ dic_param[SCALE_COLOR] = dic_scale_color
+
+ # y scale
+ y_chart_configs = [graph[Y_THRESHOLD] for graph in output_graphs if graph.get(Y_THRESHOLD)]
+ dic_scale_y = calc_scale(df, y_id, y_label, dic_cols, y_chart_configs)
+ dic_param[SCALE_Y] = dic_scale_y
+
+ # x scale
+ x_chart_configs = [graph[X_THRESHOLD] for graph in output_graphs if graph.get(X_THRESHOLD)]
+ dic_scale_x = calc_scale(df, x_id, x_label, dic_cols, x_chart_configs)
+ dic_param[SCALE_X] = dic_scale_x
+
+ # output graphs
+ dic_param[ARRAY_PLOTDATA] = [convert_series_to_list(graph) for graph in output_graphs]
+
+ dic_cat_exp_unique = gen_unique_data(df, dic_proc_cfgs, level_ids)
+ dic_param[CAT_EXP_BOX] = list(dic_cat_exp_unique.values())
+ dic_param[UNIQUE_CATEGORIES] = list(dic_unique_cate.values())
+ dic_param[UNIQUE_DIV] = list(dic_unique_div.values())
+ dic_param[UNIQUE_COLOR] = list(dic_unique_color.values())
+ dic_param[IS_DATA_LIMITED] = is_data_limited
+ return dic_param
+
+
+@log_execution_time()
+def calc_scale(df, col_id, col_label, dic_cols, chart_configs=None):
+ if not col_id and not col_label:
+ return None
+
+ if col_id:
+ cfg_col = dic_cols.get(col_id)
+ if not cfg_col:
+ return None
+
+ if df is None or not len(df):
+ return None
+
+ if DataType[cfg_col.data_type] not in (DataType.REAL, DataType.INTEGER):
+ return None
+
+ plot = {END_PROC_ID: cfg_col.process_id, END_COL_ID: col_id, ARRAY_X: df[Cycle.time.key],
+ ARRAY_Y: df[col_label]}
+ else:
+ plot = {END_PROC_ID: None, END_COL_ID: None, ARRAY_X: [None],
+ ARRAY_Y: df[col_label]}
+
+ if chart_configs:
+ plot[CHART_INFOS] = chart_configs
+
+ min_max_list, all_min, all_max = calc_raw_common_scale_y([plot])
+ calc_scale_info([plot], min_max_list, all_min, all_max)
+
+ dic_scale = {scale_name: plot.get(scale_name) for scale_name in
+ (SCALE_SETTING, SCALE_COMMON, SCALE_THRESHOLD, SCALE_AUTO, SCALE_FULL)}
+ return dic_scale
+
+
+@log_execution_time()
+def convert_series_to_list(graph):
+ for key, series in graph.items():
+ if isinstance(series, (Series, np.ndarray, RangeIndex, Index)):
+ graph[key] = series.tolist()
+
+ return graph
+
+
+@log_execution_time()
+@memoize(is_save_file=True)
+def gen_df(dic_param, _use_expired_cache=False):
+ # bind dic_param
+ graph_param = bind_dic_param_to_class(dic_param)
+
+ # target procs
+ dic_proc_cfgs = get_procs_in_dic_param(graph_param)
+
+ # add start proc
+ graph_param.add_start_proc_to_array_formval()
+ # add condition procs
+ graph_param.add_cond_procs_to_array_formval()
+ # add level
+ graph_param.add_cat_exp_to_array_formval()
+ # add color, cat_div
+ graph_param.add_column_to_array_formval([graph_param.common.color_var, graph_param.common.div_by_cat])
+
+ # get serials
+ for proc in graph_param.array_formval:
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ serial_ids = [serial.id for serial in proc_cfg.get_serials(column_name_only=False)]
+ proc.add_cols(serial_ids)
+
+ # get data from database
+ df, actual_record_number, is_res_limited = get_data_from_db(graph_param)
+ return df, graph_param, actual_record_number, is_res_limited
+
+
+@log_execution_time()
+def get_chart_type(x_id, y_id, dic_cols):
+ number_types = (DataType.INTEGER, DataType.REAL)
+ x_type = DataType[dic_cols[x_id].data_type]
+ y_type = DataType[dic_cols[y_id].data_type]
+ if x_type in number_types and y_type in number_types:
+ return ChartType.SCATTER.value
+ elif x_type is DataType.TEXT and y_type is DataType.TEXT:
+ return ChartType.HEATMAP.value
+ else:
+ return ChartType.VIOLIN.value
+
+
+@log_execution_time()
+def split_data_by_number(df: DataFrame, count):
+ df[DATA_COUNT_COL] = df.reset_index().index // count
+ return df
+
+
+def sort_data_count_key(key):
+ new_key = re.match(r'^\d+', str(key))
+ if new_key is not None:
+ return int(new_key[0])
+
+ return key
+
+
+@log_execution_time()
+def group_by_df(df: DataFrame, cols, max_group=None, max_record_per_group=None, sort_key_func=None, reverse=True,
+ get_from_last=None):
+ dic_groups = {}
+ if not len(df):
+ return dic_groups
+
+ if not cols:
+ dic_groups[None] = df.head(max_record_per_group)
+ return dic_groups
+
+ df_groups = df.groupby(cols)
+ max_group = max_group or len(df_groups.groups)
+
+ # sort desc
+ if sort_key_func:
+ sort_func = lambda x: sort_key_func(x[0])
+ else:
+ sort_func = lambda x: int(x[0]) if str(x[0]).isnumeric() else x[0]
+
+ groups = sorted([(key, df_group) for key, df_group in df_groups], key=sort_func, reverse=reverse)
+ groups = groups[:max_group]
+ if get_from_last:
+ groups.reverse()
+
+ for key, df_group in groups:
+ dic_groups[key] = df_group.head(max_record_per_group)
+
+ return dic_groups
+
+
+@log_execution_time()
+def split_df_by_time_range(dic_df_chunks, max_group=None, max_record_per_group=None):
+ dic_groups = {}
+ max_group = max_group or len(dic_df_chunks)
+
+ count = 0
+ for key, df_group in dic_df_chunks.items():
+ # for key, df_group in df_groups.groups.items():
+ if count >= max_group:
+ break
+
+ dic_groups[key] = df_group.head(max_record_per_group)
+ count += 1
+
+ return dic_groups
+
+
+@log_execution_time()
+def drop_missing_data(df: DataFrame, cols):
+ if len(df):
+ df.dropna(subset=[col for col in cols if col], inplace=True)
+ df = df.convert_dtypes()
+ return df
+
+
+@log_execution_time()
+def get_v_keys_str(v_keys):
+ if v_keys is None:
+ v_keys_str = None
+ elif isinstance(v_keys, (list, tuple)):
+ v_keys_str = '|'.join([str(key) for key in v_keys])
+ else:
+ v_keys_str = v_keys
+ return v_keys_str
+
+
+@log_execution_time()
+def calc_elapsed_times(df_data, time_col):
+ elapsed_times = pd.to_datetime(df_data[time_col]).sort_values()
+ elapsed_times = elapsed_times.diff().dt.total_seconds().fillna(0)
+ elapsed_times = elapsed_times.sort_index()
+ elapsed_times = elapsed_times.convert_dtypes()
+ return elapsed_times
+
+
+@log_execution_time()
+def gen_scatter_data_count(matrix_col, df: DataFrame, x_proc_id, y_proc_id, x, y, data_count_div, color=None,
+ levels=None, recent_flg=None):
+ """
+ spit by data count
+ :param matrix_col:
+ :param df:
+ :param x_proc_id:
+ :param y_proc_id:
+ :param x:
+ :param y:
+ :param data_count_div:
+ :param color:
+ :param levels:
+ :return:
+ """
+ if levels is None:
+ levels = []
+
+ # time column
+ time_col = Cycle.time.key
+ # time_col = [col for col in df.columns if col.startswith(Cycle.time.key)][0]
+
+ # remove missing data
+ df = drop_missing_data(df, [x, y, color] + levels)
+
+ h_group_col = DATA_COUNT_COL
+
+ v_group_cols = [col for col in levels if col and col != h_group_col]
+
+ # graph number is depend on facet
+ max_graph = matrix_col if v_group_cols else matrix_col * matrix_col
+
+ # facet
+ dic_groups = {}
+ dic_temp_groups = group_by_df(df, v_group_cols)
+ facet_keys = [key for key, _ in Counter(dic_temp_groups.keys()).most_common(matrix_col)]
+ for key, df_group in dic_temp_groups.items():
+ if key not in facet_keys:
+ continue
+
+ df_group = split_data_by_number(df_group, data_count_div)
+ df_group = reduce_data_by_number(df_group, max_graph, recent_flg)
+
+ dic_groups[key] = group_by_df(df_group, h_group_col, max_graph, sort_key_func=sort_data_count_key,
+ reverse=recent_flg, get_from_last=recent_flg)
+
+ facet_keys.append(key)
+
+ # serials
+ x_serial_cols = CfgProcessColumn.get_serials(x_proc_id)
+ if y_proc_id == x_proc_id:
+ y_serial_cols = None
+ else:
+ y_serial_cols = CfgProcessColumn.get_serials(y_proc_id)
+
+ output_graphs = []
+ output_times = []
+ for v_keys, dic_group in dic_groups.items():
+ if v_keys not in facet_keys:
+ continue
+
+ for idx, (h_key, df_data) in enumerate(dic_group.items()):
+ if recent_flg:
+ h_key = idx
+
+ h_keys_str = f'{data_count_div * h_key + 1} – {data_count_div * (h_key + 1)}'
+
+ v_keys_str = get_v_keys_str(v_keys)
+ # elapsed_times = calc_elapsed_times(df_data, time_col)
+
+ # v_label : name ( not id )
+ dic_data = gen_dic_graphs(df_data, x, y, h_keys_str, v_keys_str, color, time_col, sort_key=h_key)
+
+ # serial
+ dic_data[X_SERIAL] = get_proc_serials(df_data, x_serial_cols)
+ dic_data[Y_SERIAL] = get_proc_serials(df_data, y_serial_cols)
+
+ output_times.append((get_proc_times(df_data, x_proc_id), get_proc_times(df_data, y_proc_id)))
+ output_graphs.append(dic_data)
+
+ return output_graphs, output_times
+
+
+@log_execution_time()
+def get_proc_times(df, proc_id):
+ col_name = f'{Cycle.time.key}_{proc_id}'
+ if col_name in df.columns:
+ return df[col_name].tolist()
+
+ return []
+
+
+@log_execution_time()
+def gen_scatter_by_cyclic(matrix_col, df: DataFrame, x_proc_id, y_proc_id, x, y, terms, color=None, levels=None):
+ """
+ split by terms
+ :param matrix_col:
+ :param df:
+ :param x_proc_id:
+ :param y_proc_id:
+ :param x:
+ :param y:
+ :param terms:
+ :param color:
+ :param levels:
+ :return:
+ """
+ if levels is None:
+ levels = []
+
+ # time column
+ time_col = Cycle.time.key
+ # time_col = [col for col in df.columns if col.startswith(Cycle.time.key)][0]
+
+ # remove missing data
+ df = drop_missing_data(df, [x, y, color] + levels)
+
+ dic_df_chunks = {}
+ df.set_index(Cycle.time.key, inplace=True, drop=False)
+ for term_id, term in enumerate(terms):
+ start_dt = term['start_dt']
+ end_dt = term['end_dt']
+ df_chunk = df[(df.index >= start_dt) & (df.index < end_dt)]
+ dic_df_chunks[(start_dt, end_dt)] = df_chunk
+
+ v_group_cols = [col for col in levels if col]
+
+ # graph number is depend on facet
+ max_graph = matrix_col if v_group_cols else matrix_col * matrix_col
+ dic_groups = split_df_by_time_range(dic_df_chunks, max_graph)
+
+ # facet
+ dic_groups = {key: group_by_df(df_group, v_group_cols) for key, df_group in dic_groups.items()}
+ facet_keys = [key for key, _ in
+ Counter([val for vals in dic_groups.values() for val in vals]).most_common(matrix_col)]
+
+ # serials
+ x_serial_cols = CfgProcessColumn.get_serials(x_proc_id)
+ if y_proc_id == x_proc_id:
+ y_serial_cols = None
+ else:
+ y_serial_cols = CfgProcessColumn.get_serials(y_proc_id)
+
+ output_graphs = []
+ output_times = []
+ for h_key, dic_group in dic_groups.items():
+ h_keys_str = f'{h_key[0]} – {h_key[1]}'
+
+ for v_keys, df_data in dic_group.items():
+ if v_keys not in facet_keys:
+ continue
+
+ v_keys_str = get_v_keys_str(v_keys)
+ # elapsed_times = calc_elapsed_times(df_data, time_col)
+
+ # v_label : name ( not id )
+ dic_data = gen_dic_graphs(df_data, x, y, h_keys_str, v_keys_str, color, time_col)
+
+ # serial
+ dic_data[X_SERIAL] = get_proc_serials(df_data, x_serial_cols)
+ dic_data[Y_SERIAL] = get_proc_serials(df_data, y_serial_cols)
+
+ output_times.append((get_proc_times(df_data, x_proc_id), get_proc_times(df_data, y_proc_id)))
+ output_graphs.append(dic_data)
+
+ return output_graphs, output_times
+
+
+@log_execution_time()
+def gen_scatter_cat_div(matrix_col, df: DataFrame, x_proc_id, y_proc_id, x, y, cat_div=None, color=None, levels=None):
+ """
+ category divide
+ :param matrix_col:
+ :param df:
+ :param x_proc_id:
+ :param y_proc_id:
+ :param x:
+ :param y:
+ :param cat_div:
+ :param color:
+ :param levels:
+ :return:
+ """
+ if levels is None:
+ levels = []
+
+ # time column
+ time_col = Cycle.time.key
+ # time_col = [col for col in df.columns if col.startswith(Cycle.time.key)][0]
+
+ # remove missing data
+ df = drop_missing_data(df, [x, y, cat_div, color] + levels)
+
+ h_group_col = cat_div
+ if not h_group_col:
+ if len(levels) > 1:
+ h_group_col = levels[-1]
+
+ v_group_cols = [col for col in levels if col and col != h_group_col]
+
+ # graph number is depend on facet
+ max_graph = matrix_col if v_group_cols else matrix_col * matrix_col
+
+ dic_groups = group_by_df(df, h_group_col, max_graph)
+
+ # facet
+ dic_groups = {key: group_by_df(df_group, v_group_cols) for key, df_group in dic_groups.items()}
+ facet_keys = [key for key, _ in
+ Counter([val for vals in dic_groups.values() for val in vals]).most_common(matrix_col)]
+
+ # serials
+ x_serial_cols = CfgProcessColumn.get_serials(x_proc_id)
+ if y_proc_id == x_proc_id:
+ y_serial_cols = None
+ else:
+ y_serial_cols = CfgProcessColumn.get_serials(y_proc_id)
+
+ output_graphs = []
+ output_times = []
+ for h_key, dic_group in dic_groups.items():
+ h_keys_str = h_key
+
+ for v_keys, df_data in dic_group.items():
+ if v_keys not in facet_keys:
+ continue
+
+ v_keys_str = get_v_keys_str(v_keys)
+ # elapsed_times = calc_elapsed_times(df_data, time_col)
+
+ # v_label : name ( not id )
+ dic_data = gen_dic_graphs(df_data, x, y, h_keys_str, v_keys_str, color, time_col)
+
+ # serial
+ dic_data[X_SERIAL] = get_proc_serials(df_data, x_serial_cols)
+ dic_data[Y_SERIAL] = get_proc_serials(df_data, y_serial_cols)
+
+ output_times.append((get_proc_times(df_data, x_proc_id), get_proc_times(df_data, y_proc_id)))
+ output_graphs.append(dic_data)
+
+ return output_graphs, output_times
+
+
+@log_execution_time()
+def gen_scatter_by_direct_term(matrix_col, dic_df_chunks, x_proc_id, y_proc_id, x, y, color=None, levels=None):
+ """
+ split by terms
+ :param matrix_col:
+ :param dic_df_chunks
+ :param x_proc_id:
+ :param y_proc_id:
+ :param x:
+ :param y:
+ :param color:
+ :param levels:
+ :return:
+ """
+ if levels is None:
+ levels = []
+
+ # time column
+ time_col = Cycle.time.key
+
+ # remove missing data
+ for key, df in dic_df_chunks.items():
+ dic_df_chunks[key] = drop_missing_data(df, [x, y, color] + levels)
+
+ v_group_cols = [col for col in levels if col]
+
+ # graph number is depend on facet
+ max_graph = matrix_col if v_group_cols else matrix_col * matrix_col
+ dic_groups = split_df_by_time_range(dic_df_chunks, max_graph)
+
+ # facet
+ dic_groups = {key: group_by_df(df_group, v_group_cols) for key, df_group in dic_groups.items()}
+ facet_keys = [key for key, _ in
+ Counter([val for vals in dic_groups.values() for val in vals]).most_common(matrix_col)]
+
+ # serials
+ x_serial_cols = CfgProcessColumn.get_serials(x_proc_id)
+ if y_proc_id == x_proc_id:
+ y_serial_cols = None
+ else:
+ y_serial_cols = CfgProcessColumn.get_serials(y_proc_id)
+
+ output_graphs = []
+ output_times = []
+ for h_key, dic_group in dic_groups.items():
+ h_keys_str = f'{h_key[0]} {h_key[1]} – {h_key[2]} {h_key[3]}'
+
+ for v_keys, df_data in dic_group.items():
+ if v_keys not in facet_keys:
+ continue
+
+ v_keys_str = get_v_keys_str(v_keys)
+ # elapsed_times = calc_elapsed_times(df_data, time_col)
+
+ # v_label : name ( not id )
+ dic_data = gen_dic_graphs(df_data, x, y, h_keys_str, v_keys_str, color, time_col)
+
+ # serial
+ dic_data[X_SERIAL] = get_proc_serials(df_data, x_serial_cols)
+ dic_data[Y_SERIAL] = get_proc_serials(df_data, y_serial_cols)
+
+ output_times.append((get_proc_times(df_data, x_proc_id), get_proc_times(df_data, y_proc_id)))
+ output_graphs.append(dic_data)
+
+ return output_graphs, output_times
+
+
+@log_execution_time()
+def gen_dic_graphs(df_data, x, y, h_keys_str, v_keys_str, color, time_col, sort_key=None):
+ times = df_data[time_col]
+ n = times.dropna().size
+ time_min = np.nanmin(times) if n else None
+ time_max = np.nanmax(times) if n else None
+
+ dic_data = {H_LABEL: h_keys_str,
+ V_LABEL: v_keys_str,
+ ARRAY_X: df_data[x],
+ ARRAY_Y: df_data[y],
+ COLORS: df_data[color] if color else [],
+ TIMES: times,
+ TIME_MIN: time_min,
+ TIME_MAX: time_max,
+ N_TOTAL: n,
+ SORT_KEY: h_keys_str if sort_key is None else sort_key,
+ }
+ return dic_data
+
+
+@log_execution_time()
+def gen_df_limit_data(graph, keys, limit=None):
+ is_limit = False
+ dic_data = {}
+ for key in keys:
+ count = len(graph[key])
+ if not count:
+ continue
+
+ if limit is None or count <= limit:
+ dic_data[key] = graph[key]
+ else:
+ is_limit = True
+ dic_data[key] = graph[key][:limit]
+
+ return pd.DataFrame(dic_data), is_limit
+
+
+@log_execution_time()
+def sort_df(df, columns):
+ cols = [col for col in columns if col in df.columns]
+ df.sort_values(by=cols, inplace=True)
+
+ return df
+
+
+@log_execution_time()
+def get_most_common_in_graphs(graphs, columns, first_most_common):
+ data = []
+ for graph in graphs:
+ vals = pd.DataFrame({col: graph[col] for col in columns}).drop_duplicates().to_records(index=False).tolist()
+ data += vals
+
+ original_vals = [key for key, _ in Counter(data).most_common(None)]
+ most_vals = [key for key, _ in Counter(data).most_common(first_most_common)]
+ if len(columns) == 1:
+ most_vals = [vals[0] for vals in most_vals]
+
+ is_reduce_violin_number = len(original_vals) > len(most_vals)
+
+ return most_vals, is_reduce_violin_number
+
+
+@log_execution_time()
+def filter_violin_df(df, cols, most_vals):
+ df_result = df[df.set_index(cols).index.isin(most_vals)]
+ return df_result
+
+
+@log_execution_time()
+def get_heatmap_distinct(graphs):
+ array_x = []
+ array_y = []
+ for graph in graphs:
+ array_x += graph[ARRAY_X].drop_duplicates().tolist()
+ array_y += graph[ARRAY_Y].drop_duplicates().tolist()
+
+ return set(array_x), set(array_y)
+
+
+@log_execution_time()
+def get_proc_serials(df: DataFrame, serial_cols: List[CfgProcessColumn]):
+ if not serial_cols:
+ return None
+
+ if df is None or len(df) == 0:
+ return None
+
+ # serials
+ serials = []
+ for col in serial_cols:
+ sql_label = gen_sql_label(col.id, col.column_name)
+ if sql_label in df.columns:
+ dic_serial = {'col_name': col.name, 'data': df[sql_label].tolist()}
+ serials.append(dic_serial)
+
+ return serials
+
+
+@log_execution_time()
+def resample_by_sort(x, max_n=10):
+ """
+ Sort data first, then extract rows (equal interval)
+ Inputs:
+ x: Pandas series
+ max_n: Maximum number of rows
+ Returns:
+ x: pandas series of length min(x.shape[0], max_n)
+ """
+ if x.shape[0] > max_n:
+ x = np.sort(x)
+ idx = np.linspace(0, len(x) - 1, max_n, dtype=int)
+ x = x[idx]
+ return x
+
+
+
+@log_execution_time()
+def reduce_data_by_number(df, max_graph, recent_flg=None):
+ if not len(df):
+ return df
+
+ if recent_flg:
+ first_num = df[DATA_COUNT_COL].iloc[-1] - max_graph
+ if first_num >= 0:
+ df = df[df[DATA_COUNT_COL] > first_num]
+ else:
+ first_num = df[DATA_COUNT_COL].iloc[0] + max_graph
+ if first_num >= 0:
+ df = df[df[DATA_COUNT_COL] < first_num]
+
+ return df
+
+@log_execution_time()
+def resample_preserve_min_med_max(x, n_after: int):
+ """ Resample x, but preserve (minimum, median, and maximum) values
+ Inputs:
+ x (1D-NumpyArray or a list)
+ n_after (int) Length of x after resampling. Must be < len(x)
+ Return:
+ x (1D-NumpyArray) Resampled data
+ """
+ if x.shape[0] > n_after:
+ # walkaround: n_after with odd number is easier
+ if n_after % 2 == 0:
+ n_after += 1
+
+ n = len(x)
+ n_half = int((n_after - 1) / 2)
+
+ # index around median
+ x = np.sort(x)
+ idx_med = (n + 1) / 2 - 1 # median
+ idx_med_l = int(np.ceil(idx_med - 1)) # left of median
+ idx_med_r = int(np.floor(idx_med + 1)) # right of median
+
+ # resampled index
+ idx_low = np.linspace(0, idx_med_l - 1, num=n_half, dtype=int)
+ idx_upp = np.linspace(idx_med_r, n - 1, num=n_half, dtype=int)
+
+ # resampling
+ if n % 2 == 1:
+ med = x[int(idx_med)]
+ x = np.concatenate((x[idx_low], [med], x[idx_upp]))
+ else:
+ med = 0.5 * (x[idx_med_l] + x[idx_med_r])
+ x = np.concatenate((x[idx_low], [med], x[idx_upp]))
+ return x
\ No newline at end of file
diff --git a/histview2/api/setting_module/__init__.py b/histview2/api/setting_module/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/histview2/api/setting_module/controllers.py b/histview2/api/setting_module/controllers.py
new file mode 100644
index 0000000..c2b57a0
--- /dev/null
+++ b/histview2/api/setting_module/controllers.py
@@ -0,0 +1,767 @@
+import json
+import os
+import traceback
+from datetime import datetime
+
+from apscheduler.triggers.date import DateTrigger
+from flask import Blueprint, request, jsonify, Response
+from flask_babel import gettext as _
+from loguru import logger
+from pytz import utc
+
+from histview2 import background_jobs
+from histview2.api.setting_module.services.common import is_local_client, save_user_settings, get_all_user_settings, \
+ delete_user_setting_by_id, get_setting, get_page_top_setting, is_title_exist, parse_user_setting
+from histview2.api.setting_module.services.data_import import add_shutdown_app_job
+from histview2.api.setting_module.services.data_import import update_or_create_constant_by_type, check_db_con
+from histview2.api.setting_module.services.filter_settings import save_filter_config, delete_cfg_filter_from_db
+from histview2.api.setting_module.services.polling_frequency import change_polling_all_interval_jobs, add_import_job
+from histview2.api.setting_module.services.process_delete import delete_proc_cfg_and_relate_jobs, del_data_source
+from histview2.api.setting_module.services.save_load_user_setting import map_form, transform_settings
+from histview2.api.setting_module.services.show_latest_record import get_latest_records, save_master_vis_config, \
+ get_last_distinct_sensor_values, preview_csv_data, gen_preview_data_check_dict
+from histview2.api.trace_data.services.proc_link import gen_global_id_job, show_proc_link_info
+from histview2.api.trace_data.services.proc_link_simulation import sim_gen_global_id
+from histview2.common.backup_db import add_backup_dbs_job
+from histview2.common.common_utils import is_empty, \
+ parse_int_value
+from histview2.common.constants import WITH_IMPORT_OPTIONS, CfgConstantType, RelationShip, ProcessCfgConst, UI_ORDER_DB, \
+ Action, appENV
+from histview2.common.cryptography_utils import encrypt
+from histview2.common.scheduler import JobType, scheduler
+from histview2.common.services import http_content
+from histview2.common.services.jp_to_romaji_utils import to_romaji
+from histview2.common.services.sse import background_announcer
+from histview2.common.yaml_utils import BasicConfigYaml
+from histview2.setting_module.models import AppLog, CfgDataSource, make_session, \
+ insert_or_update_config, crud_config, CfgCsvColumn, CfgDataSourceCSV, CfgProcess, CfgUserSetting
+from histview2.setting_module.schemas import DataSourceSchema, ProcessSchema, CfgUserSettingSchema
+from histview2.setting_module.services.background_process import get_job_detail_service
+from histview2.setting_module.services.process_config import create_or_update_process_cfg, get_process_cfg, \
+ query_database_tables, get_process_columns, get_process_filters, get_process_visualizations
+from histview2.setting_module.services.trace_config import get_all_processes_traces_info, save_trace_config_to_db, \
+ gen_cfg_trace
+
+api_setting_module_blueprint = Blueprint(
+ 'api_setting_module',
+ __name__,
+ url_prefix='/histview2/api/setting'
+)
+
+
+@api_setting_module_blueprint.route('/update_polling_freq', methods=['POST'])
+def update_polling_freq():
+ data_update = json.loads(request.data)
+ with_import_option = data_update.get(WITH_IMPORT_OPTIONS)
+ freq_min = parse_int_value(data_update.get(CfgConstantType.POLLING_FREQUENCY.name)) or 0
+
+ # save/update POLLING_FREQUENCY to db
+ freq_sec = freq_min * 60
+ update_or_create_constant_by_type(const_type=CfgConstantType.POLLING_FREQUENCY.name, value=freq_sec)
+
+ # re-set trigger time for all jobs
+ change_polling_all_interval_jobs(interval_sec=freq_sec, run_now=with_import_option)
+
+ message = {'message': _('Database Setting saved.'), 'is_error': False}
+
+ return jsonify(flask_message=message), 200
+
+
+@api_setting_module_blueprint.route('/data_source_save', methods=['POST'])
+def save_datasource_cfg():
+ """
+ Expected: ds_config = {"db_0001": {"master-name": name, "host": localhost, ...}}
+ """
+ try:
+ data_src: CfgDataSource = DataSourceSchema().load(request.json)
+
+ with make_session() as meta_session:
+ # data source
+ data_src_rec = insert_or_update_config(meta_session, data_src, exclude_columns=[CfgDataSource.order.key])
+
+ # csv detail
+ csv_detail = data_src.csv_detail
+ if csv_detail:
+ csv_columns = data_src.csv_detail.csv_columns
+ csv_columns = [col for col in csv_columns if not is_empty(col.column_name)]
+ data_src.csv_detail.csv_columns = csv_columns
+ csv_detail_rec = insert_or_update_config(meta_session, csv_detail,
+ parent_obj=data_src_rec,
+ parent_relation_key=CfgDataSource.csv_detail.key,
+ parent_relation_type=RelationShip.ONE)
+
+ # CRUD
+ csv_columns = csv_detail.csv_columns
+ crud_config(meta_session, csv_columns, CfgCsvColumn.data_source_id.key,
+ CfgCsvColumn.column_name.key,
+ parent_obj=csv_detail_rec,
+ parent_relation_key=CfgDataSourceCSV.csv_columns.key,
+ parent_relation_type=RelationShip.MANY)
+
+ # db detail
+ db_detail = data_src.db_detail
+ if db_detail:
+ # encrypt password
+ db_detail.password = encrypt(db_detail.password)
+ db_detail.hashed = True
+ # avoid blank string
+ db_detail.port = db_detail.port or None
+ db_detail.schema = db_detail.schema or None
+ insert_or_update_config(meta_session, db_detail,
+ parent_obj=data_src_rec,
+ parent_relation_key=CfgDataSource.db_detail.key,
+ parent_relation_type=RelationShip.ONE)
+ except Exception as e:
+ logger.exception(e)
+ message = {'message': _('Database Setting failed to save'), 'is_error': True}
+ return jsonify(flask_message=message), 500
+
+ message = {'message': _('Database Setting saved.'), 'is_error': False}
+ ds = None
+ if data_src_rec and data_src_rec.id:
+ ds_schema = DataSourceSchema()
+ ds = CfgDataSource.get_ds(data_src_rec.id)
+ ds = ds_schema.dumps(ds)
+ return jsonify(id=data_src_rec.id, data_source=ds, flask_message=message), 200
+
+
+# TODO: refactoring check connection without this function
+@api_setting_module_blueprint.route('/database_tables', methods=['GET'])
+def get_database_tables():
+ db_tables = CfgDataSource.get_all()
+ ds_schema = DataSourceSchema(many=True)
+ dump_data = ds_schema.dumps(db_tables)
+ return dump_data, 200 if db_tables else 500
+
+
+@api_setting_module_blueprint.route('/database_tables_source', methods=['GET'])
+def get_database_tables_source():
+ db_source = CfgDataSource.get_all_db_source()
+ ds_schema = DataSourceSchema(many=True)
+ dump_data = ds_schema.dumps(db_source)
+ return dump_data, 200 if db_source else 500
+
+
+@api_setting_module_blueprint.route('/database_table/', methods=['GET'])
+def get_database_table(db_id):
+ if not db_id:
+ return jsonify({'tables': [], 'msg': 'Invalid data source id'}), 400
+
+ tables = query_database_tables(db_id)
+
+ if tables is None:
+ return jsonify({'tables': [], 'msg': 'Invalid data source id'}), 400
+ else:
+ return jsonify(tables), 200
+
+
+@api_setting_module_blueprint.route('/register_basic_config', methods=['POST'])
+def regist_basic_config():
+ params = json.loads(request.data)
+
+ basic_config_yaml = BasicConfigYaml()
+
+ # Get Port number from YML Config
+ if params["info"]["port-no"] is None:
+ params["info"]["port-no"] = basic_config_yaml.dic_config['info'].get("port-no", 80)
+
+ # Set to show setting page as default
+ if params["info"]["hide-setting-page"] is None:
+ params["info"]["hide-setting-page"] = basic_config_yaml.dic_config['info'].get("hide-setting-page", False)
+
+ result = basic_config_yaml.write_json_to_yml_file(params)
+
+ if result:
+ message = {'message': _('Basic Config saved'), 'is_error': False}
+ else:
+ message = {'message': _('Basic Config failed to save'), 'is_error': True}
+
+ return jsonify(flask_message=message), 200
+
+
+@api_setting_module_blueprint.route('/check_db_connection', methods=['POST'])
+def check_db_connection():
+ """Check if we can connect to database. Supported databases: SQLite, PostgreSQL, MSSQLServer.
+ Returns:
+ HTTP Response - (True + OK message) if connection can be established, return (False + NOT OK message) otherwise.
+ """
+ params = json.loads(request.data).get("db")
+ db_type = params.get("db_type")
+ host = params.get("host")
+ port = params.get("port")
+ dbname = params.get("dbname")
+ schema = params.get("schema")
+ username = params.get("username")
+ password = params.get("password")
+
+ result = check_db_con(db_type, host, port, dbname, schema, username, password)
+
+ if result:
+ message = {"db_type": db_type, 'message': _("Connected"), 'connected': True}
+ else:
+ message = {"db_type": db_type, 'message': _("Failed to connect"), 'connected': False}
+
+ return jsonify(flask_message=message), 200
+
+
+@api_setting_module_blueprint.route('/show_latest_records', methods=['POST'])
+def show_latest_records():
+ """[summary]
+ Show 5 latest records
+ Returns:
+ [type] -- [description]
+ """
+ dic_form = request.form.to_dict()
+ data_source_id = dic_form.get("databaseName")
+ table_name = dic_form.get("tableName")
+ limit = parse_int_value(dic_form.get("limit")) or 5
+ cols_with_types, rows, cols_duplicated, previewed_files = get_latest_records(data_source_id, table_name, limit)
+ dic_preview_limit = gen_preview_data_check_dict(rows, previewed_files)
+ result = {'cols': cols_with_types,
+ 'rows': rows,
+ 'cols_duplicated': cols_duplicated,
+ 'fail_limit': dic_preview_limit,
+ }
+
+ return json.dumps(result, ensure_ascii=False, default=http_content.json_serial)
+
+
+@api_setting_module_blueprint.route('/get_csv_resources', methods=['POST'])
+def get_csv_resources():
+ folder_url = request.json.get('url')
+ etl_func = request.json.get('etl_func')
+ csv_delimiter = request.json.get('delimiter')
+
+ dic_output = preview_csv_data(folder_url, etl_func, csv_delimiter, 5)
+ rows = dic_output['content']
+ previewed_files = dic_output['previewed_files']
+ dic_preview_limit = gen_preview_data_check_dict(rows, previewed_files)
+ dic_output['fail_limit'] = dic_preview_limit
+
+ return jsonify(dic_output)
+
+
+@api_setting_module_blueprint.route('/job', methods=['POST'])
+def get_background_jobs():
+ return jsonify(background_jobs), 200
+
+
+@api_setting_module_blueprint.route('/listen_background_job', methods=['GET'])
+def listen_background_job():
+ def stream():
+ messages = background_announcer.listen() # returns a queue.Queue
+ while True:
+ msg = messages.get()
+ yield msg
+
+ return Response(stream(), mimetype='text/event-stream')
+
+
+@api_setting_module_blueprint.route('/check_folder', methods=['POST'])
+def check_folder():
+ try:
+ data = request.json.get('url')
+ return jsonify({
+ 'status': 200,
+ 'url': data,
+ 'is_exists': os.path.isdir(data) and os.path.exists(data),
+ 'dir': os.path.dirname(data)
+ })
+ except Exception:
+ # raise
+ return jsonify({
+ 'status': 500,
+ })
+
+
+@api_setting_module_blueprint.route('/job_detail/', methods=['GET'])
+def get_job_detail(job_id):
+ """[Summary] Get get job details
+ Returns:
+ [json] -- [job details content]
+ """
+ job_details = get_job_detail_service(job_id=job_id)
+ return jsonify(job_details), 200
+
+
+@api_setting_module_blueprint.route('/delete_process', methods=['POST'])
+def delete_proc_from_db():
+ # get proc_id
+ params = json.loads(request.data)
+ proc_id = params.get('proc_id')
+
+ # delete config and add job to delete data
+ delete_proc_cfg_and_relate_jobs(proc_id)
+
+ return jsonify(result=dict()), 200
+
+
+@api_setting_module_blueprint.route('/save_order/', methods=['POST'])
+def save_order(order_name):
+ """[Summary] Save orders to DB
+ Returns: 200/500
+ """
+ try:
+ orders = json.loads(request.data)
+ with make_session() as meta_session:
+ if order_name == UI_ORDER_DB:
+ for key, val in orders.items():
+ CfgDataSource.update_order(meta_session, key, val)
+ else:
+ for key, val in orders.items():
+ CfgProcess.update_order(meta_session, key, val)
+
+ except Exception:
+ traceback.print_exc()
+ return jsonify({}), 500
+
+ return jsonify({}), 200
+
+
+@api_setting_module_blueprint.route('/delete_datasource_cfg', methods=['POST'])
+def delete_datasource_cfg():
+ params = json.loads(request.data)
+ data_source_id = params.get('db_code')
+ if data_source_id:
+ del_data_source(data_source_id)
+
+ return jsonify(id=data_source_id), 200
+
+
+@api_setting_module_blueprint.route('/stop_job', methods=['POST'])
+def stop_jobs():
+ try:
+ if not is_local_client(request):
+ return jsonify({}), 403
+
+ # save log to db
+ with make_session() as meta_session:
+ t_app_log = AppLog()
+ t_app_log.ip = request.environ.get('X-Forwarded-For') or request.remote_addr
+ t_app_log.action = Action.SHUTDOWN_APP.name
+ t_app_log.description = request.user_agent.string
+ meta_session.add(t_app_log)
+ except Exception as ex:
+ traceback.print_exc()
+ logger.error(ex)
+
+ # backup database now
+ add_backup_dbs_job(True)
+
+ # add a job to check for shutdown time
+ add_shutdown_app_job()
+
+ return jsonify({}), 200
+
+
+@api_setting_module_blueprint.route('/shutdown', methods=['POST'])
+def shutdown():
+ if not is_local_client(request):
+ return jsonify({}), 403
+
+ logger.info('SHUTTING DOWN...')
+ os._exit(14)
+
+ return jsonify({}), 200
+
+
+@api_setting_module_blueprint.route('/proc_config', methods=['POST'])
+def post_proc_config():
+ process_schema = ProcessSchema()
+ proc_data = process_schema.load(request.json.get('proc_config'))
+ should_import_data = request.json.get('import_data')
+
+ try:
+ # get exists process from id
+ proc_id = proc_data.get(ProcessCfgConst.PROC_ID.value)
+ if proc_id:
+ process = get_process_cfg(int(proc_id))
+ if not process:
+ return jsonify({
+ 'status': 404,
+ 'message': 'Not found {}'.format(proc_id),
+ }), 200
+
+ process = create_or_update_process_cfg(proc_data)
+
+ # create process json
+ process_schema = ProcessSchema()
+ process_json = process_schema.dump(process) or {}
+
+ # import data
+ if should_import_data:
+ add_import_job(process, run_now=True)
+
+ return jsonify({
+ 'status': 200,
+ 'data': process_json,
+ }), 200
+ except Exception as ex:
+ traceback.print_exc()
+ return jsonify({
+ 'status': 500,
+ 'message': str(ex),
+ }), 500
+
+
+@api_setting_module_blueprint.route('/trace_config', methods=['GET'])
+def get_trace_configs():
+ """[Summary] Save orders to DB
+ Returns: 200/500
+ """
+ try:
+ procs = get_all_processes_traces_info()
+ return {'trace_config': json.dumps({'procs': procs})}, 200
+ except Exception:
+ traceback.print_exc()
+ return jsonify({}), 500
+
+
+@api_setting_module_blueprint.route('/trace_config', methods=['POST'])
+def save_trace_configs():
+ """[Summary] Save trace_configs to DB
+ Returns: 200/500
+ """
+ try:
+ params = json.loads(request.data)
+ save_trace_config_to_db(params)
+
+ job_id = JobType.GEN_GLOBAL.name
+ scheduler.add_job(job_id, gen_global_id_job, replace_existing=True,
+ trigger=DateTrigger(datetime.now().astimezone(utc), timezone=utc),
+ kwargs=dict(_job_id=job_id, _job_name=job_id, is_new_data_check=False))
+ except Exception:
+ traceback.print_exc()
+ return jsonify({}), 500
+
+ return jsonify({}), 200
+
+
+@api_setting_module_blueprint.route('/ds_load_detail/', methods=['GET'])
+def ds_load_detail(ds_id):
+ ds_schema = DataSourceSchema()
+ ds = CfgDataSource.get_ds(ds_id)
+ return ds_schema.dumps(ds), 200
+
+
+@api_setting_module_blueprint.route('/proc_config/', methods=['DELETE'])
+def del_proc_config(proc_id):
+ return jsonify({
+ 'status': 200,
+ 'data': {
+ 'proc_id': proc_id,
+ }
+ }), 200
+
+
+@api_setting_module_blueprint.route('/proc_config/', methods=['GET'])
+def get_proc_config(proc_id):
+ process = get_process_cfg(proc_id)
+ if process:
+ tables = query_database_tables(process['data_source_id'])
+ return jsonify({
+ 'status': 200,
+ 'data': process,
+ 'tables': tables
+ }), 200
+ else:
+ return jsonify({
+ 'status': 404,
+ 'data': 'Not found'
+ }), 200
+
+
+@api_setting_module_blueprint.route('/proc_filter_config/', methods=['GET'])
+def get_proc_config_filter_data(proc_id):
+ process = get_process_cfg(proc_id)
+ # filter_col_data = get_filter_col_data(process) or {}
+ filter_col_data = {}
+ if process:
+ return jsonify({
+ 'status': 200,
+ 'data': process,
+ 'filter_col_data': filter_col_data,
+ }), 200
+ else:
+ return jsonify({
+ 'status': 404,
+ 'data': {},
+ 'filter_col_data': {},
+ }), 200
+
+
+@api_setting_module_blueprint.route('/proc_config//columns', methods=['GET'])
+def get_proc_column_config(proc_id):
+ columns = get_process_columns(proc_id)
+ if columns:
+ return jsonify({
+ 'status': 200,
+ 'data': columns,
+ }), 200
+ else:
+ return jsonify({
+ 'status': 404,
+ 'data': []
+ }), 200
+
+
+@api_setting_module_blueprint.route('/proc_config//filters', methods=['GET'])
+def get_proc_filter_config(proc_id):
+ filters = get_process_filters(proc_id)
+ if filters:
+ return jsonify({
+ 'status': 200,
+ 'data': filters,
+ }), 200
+ else:
+ return jsonify({
+ 'status': 404,
+ 'data': []
+ }), 200
+
+
+@api_setting_module_blueprint.route('/proc_config//visualizations', methods=['GET'])
+def get_proc_visualization_config(proc_id):
+ proc_with_visual_settings = get_process_visualizations(proc_id)
+ if proc_with_visual_settings:
+ return jsonify({
+ 'status': 200,
+ 'data': proc_with_visual_settings,
+ }), 200
+ else:
+ return jsonify({
+ 'status': 404,
+ 'data': []
+ }), 200
+
+
+@api_setting_module_blueprint.route('/filter_config', methods=['POST'])
+def save_filter_config_configs():
+ """[Summary] Save filter_config to DB
+ Returns: 200/500
+ """
+ try:
+ params = json.loads(request.data)
+ filter_id = save_filter_config(params)
+
+ proc_id = params.get('processId')
+ process = get_process_cfg(proc_id)
+ except Exception:
+ traceback.print_exc()
+ return jsonify({}), 500
+
+ return jsonify({'proc': process, 'filter_id': filter_id}), 200
+
+
+@api_setting_module_blueprint.route('/filter_config/', methods=['DELETE'])
+def delete_filter_config(filter_id):
+ """[Summary] delete filter_config from DB
+ Returns: 200/500
+ """
+ try:
+ delete_cfg_filter_from_db(filter_id)
+ except Exception:
+ traceback.print_exc()
+ return jsonify({}), 500
+
+ return jsonify({}), 200
+
+
+@api_setting_module_blueprint.route('/distinct_sensor_values/', methods=['GET'])
+def get_sensor_distinct_values(cfg_col_id):
+ sensor_data = get_last_distinct_sensor_values(cfg_col_id)
+ if sensor_data:
+ return jsonify({
+ 'data': sensor_data,
+ }), 200
+ else:
+ return jsonify({
+ 'data': []
+ }), 200
+
+
+@api_setting_module_blueprint.route('/proc_config//visualizations', methods=['POST'])
+def post_master_visualizations_config(proc_id):
+ try:
+ save_master_vis_config(proc_id, request.json)
+ proc_with_visual_settings = get_process_visualizations(proc_id)
+ return jsonify({
+ 'status': 200,
+ 'data': proc_with_visual_settings,
+ }), 200
+ except Exception as ex:
+ traceback.print_exc()
+ return jsonify({
+ 'status': 500,
+ 'message': str(ex),
+ }), 500
+
+
+@api_setting_module_blueprint.route('/simulate_proc_link', methods=['POST'])
+def simulate_proc_link():
+ """[Summary] simulate proc link id
+ Returns: 200/500
+ """
+ traces = json.loads(request.data)
+ cfg_traces = [gen_cfg_trace(trace) for trace in traces]
+
+ dic_proc_cnt, dic_edge_cnt = sim_gen_global_id(cfg_traces)
+
+ # if there is no key in dic, set zero
+ for cfg_trace in cfg_traces:
+
+ self_proc_id = cfg_trace.self_process_id
+ target_proc_id = cfg_trace.target_process_id
+ edge_id = f'{self_proc_id}-{target_proc_id}'
+
+ if dic_proc_cnt.get(self_proc_id) is None:
+ dic_proc_cnt[self_proc_id] = 0
+
+ if dic_proc_cnt.get(target_proc_id) is None:
+ dic_proc_cnt[target_proc_id] = 0
+
+ if dic_edge_cnt.get(edge_id) is None:
+ dic_edge_cnt[edge_id] = 0
+
+ return jsonify(nodes=dic_proc_cnt, edges=dic_edge_cnt), 200
+
+
+@api_setting_module_blueprint.route('/count_proc_link', methods=['POST'])
+def count_proc_link():
+ """[Summary] count proc link id
+ Returns: 200/500
+ """
+ dic_proc_cnt, dic_edge_cnt = show_proc_link_info()
+ return jsonify(nodes=dic_proc_cnt, edges=dic_edge_cnt), 200
+
+
+@api_setting_module_blueprint.route('/to_eng', methods=['POST'])
+def to_eng():
+ request_col = request.json
+ col_english_name = to_romaji(request_col['colname'])
+ return jsonify({'status': 200, 'data': col_english_name}), 200
+
+
+@api_setting_module_blueprint.route('/list_to_english', methods=['POST'])
+def list_to_english():
+ request_json = request.json
+ raw_english_names = request_json.get('english_names') or []
+ romaji_english_names = [to_romaji(raw_name) for raw_name in raw_english_names]
+
+ return jsonify({'status': 200, 'data': romaji_english_names}), 200
+
+
+@api_setting_module_blueprint.route('/user_setting', methods=['POST'])
+def save_user_setting():
+ """[Summary] Save user settings to DB
+ Returns: 200/500
+ """
+ try:
+ params = json.loads(request.data)
+ save_user_settings(params)
+
+ # find setting id after creating a new setting
+ setting = parse_user_setting(params)
+ if not setting.id:
+ setting = CfgUserSetting.get_by_title(setting.title)[0]
+ setting = CfgUserSettingSchema().dump(setting)
+ except Exception as ex:
+ logger.exception(ex)
+ return jsonify({'status': 'error'}), 500
+
+ return jsonify({'status': 200, 'data': setting}), 200
+
+
+@api_setting_module_blueprint.route('/user_settings', methods=['GET'])
+def get_user_settings():
+ settings = get_all_user_settings()
+ return jsonify({'status': 200, 'data': settings}), 200
+
+
+@api_setting_module_blueprint.route('/user_setting/', methods=['GET'])
+def get_user_setting(setting_id):
+ setting_id = parse_int_value(setting_id)
+ setting = get_setting(setting_id)
+ if not setting:
+ return jsonify({}), 404
+
+ return jsonify({'status': 200, 'data': setting}), 200
+
+
+@api_setting_module_blueprint.route('/user_setting_page_top', methods=['GET'])
+def get_user_setting_page_top():
+ page = request.args.get("page")
+ if not page:
+ return jsonify({}), 400
+
+ setting = get_page_top_setting(page) or {}
+
+ return jsonify({'status': 200, 'data': setting}), 200
+
+
+@api_setting_module_blueprint.route('/user_setting/', methods=['DELETE'])
+def delete_user_setting(setting_id):
+ """[Summary] delete user_setting from DB
+ Returns: 200/500
+ """
+ try:
+ setting_id = parse_int_value(setting_id)
+ if not setting_id:
+ return jsonify({}), 400
+
+ delete_user_setting_by_id(setting_id)
+
+ except Exception as ex:
+ logger.exception(ex)
+ return jsonify({}), 500
+
+ return jsonify({}), 200
+
+
+@api_setting_module_blueprint.route('/get_env', methods=['GET'])
+def get_current_env():
+ current_env = os.environ.get('ANALYSIS_INTERFACE_ENV', appENV.DEVELOPMENT.value)
+ return jsonify({'status': 200, 'env': current_env}), 200
+
+
+@api_setting_module_blueprint.route('/load_user_setting', methods=['POST'])
+def load_user_setting():
+ request_data = json.loads(request.data)
+ setting_id = request_data.get('setting_id')
+ dic_orig_settings = request_data.get('dic_original_setting')
+ active_form = request_data.get('active_form')
+ shared_setting = request_data.get('shared_user_setting')
+ if setting_id:
+ setting_id = parse_int_value(setting_id)
+ dic_setting = get_setting(setting_id)
+ if not dic_setting:
+ return jsonify({}), 404
+
+ else:
+ dic_setting = {}
+ dic_src_settings = {'dataForm': shared_setting}
+
+ dic_des_setting = dic_orig_settings
+ if active_form and active_form in dic_orig_settings:
+ dic_des_setting = {active_form: dic_orig_settings[active_form]}
+
+ mapping_groups = map_form(dic_src_settings, dic_des_setting)
+
+ dic_setting['settings'] = transform_settings(mapping_groups)
+
+ return jsonify({'status': 200, 'data': dic_setting}), 200
+
+
+@api_setting_module_blueprint.route('/check_exist_title_setting', methods=['POST'])
+def check_exist_title_setting():
+ """[Summary] Check input title setting is exist on DB or not
+ Returns: status: 200/500 and is_exist: True/False
+ """
+ try:
+ params = json.loads(request.data)
+ is_exist = is_title_exist(params.get('title'))
+ except Exception as ex:
+ logger.exception(ex)
+ return jsonify({'status': 'error'}), 500
+
+ return jsonify({'status': 'ok', 'is_exist': is_exist}), 200
diff --git a/histview2/api/setting_module/services/__init__.py b/histview2/api/setting_module/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/histview2/api/setting_module/services/common.py b/histview2/api/setting_module/services/common.py
new file mode 100644
index 0000000..931d954
--- /dev/null
+++ b/histview2/api/setting_module/services/common.py
@@ -0,0 +1,83 @@
+from loguru import logger
+
+from histview2.common.common_utils import parse_int_value
+from histview2.setting_module.models import make_session, CfgUserSetting, insert_or_update_config
+from histview2.setting_module.schemas import CfgUserSettingSchema
+
+
+def is_local_client(req):
+ try:
+ client_ip = req.environ.get('X-Forwarded-For') or req.remote_addr
+ accepted_ips = ['127.0.0.1', 'localhost']
+ if client_ip in accepted_ips:
+ return True
+ except Exception as ex:
+ logger.exception(ex)
+
+ return False
+
+
+def save_user_settings(request_params):
+ with make_session() as meta_session:
+ cfg_user_setting = parse_user_setting(request_params)
+ insert_or_update_config(meta_session, cfg_user_setting)
+ meta_session.commit()
+ return True
+
+
+def parse_user_setting(params):
+ setting_id = parse_int_value(params.get('id'))
+ title = params.get('title') or ''
+ page = params.get('page') or ''
+ key = '{}|{}'.format(page, title) # TODO use page + title for now
+ created_by = params.get('created_by') or ''
+ priority = parse_int_value(params.get('priority')) or 1
+ use_current_time = bool(params.get('use_current_time'))
+ description = params.get('description') or ''
+ share_info = bool(params.get('share_info'))
+ settings = params.get('settings') or '[]'
+
+ cfg_user_setting = CfgUserSetting(**{
+ 'id': setting_id,
+ 'key': key,
+ 'title': title,
+ 'page': page,
+ 'created_by': created_by,
+ 'priority': priority,
+ 'use_current_time': use_current_time,
+ 'description': description,
+ 'share_info': share_info,
+ 'settings': settings,
+ })
+
+ return cfg_user_setting
+
+
+def get_all_user_settings():
+ user_setting_schema = CfgUserSettingSchema(exclude=[CfgUserSetting.settings.key])
+ user_settings = CfgUserSetting.get_all() or []
+ # TODO push current page setting to top
+ return [user_setting_schema.dump(user_setting) for user_setting in user_settings]
+
+
+def get_setting(setting_id):
+ user_setting_schema = CfgUserSettingSchema()
+ user_setting = CfgUserSetting.get_by_id(setting_id) or []
+ # TODO push current page setting to top
+ return user_setting_schema.dump(user_setting)
+
+
+def get_page_top_setting(page):
+ user_setting_schema = CfgUserSettingSchema()
+ user_setting = CfgUserSetting.get_top(page) or []
+ return user_setting_schema.dump(user_setting)
+
+
+def delete_user_setting_by_id(setting_id):
+ with make_session() as mss:
+ CfgUserSetting.delete_by_id(mss, setting_id)
+
+
+def is_title_exist(title):
+ user_settings = CfgUserSetting.get_by_title(title)
+ return True if user_settings else False
diff --git a/histview2/api/setting_module/services/csv_import.py b/histview2/api/setting_module/services/csv_import.py
new file mode 100644
index 0000000..b383499
--- /dev/null
+++ b/histview2/api/setting_module/services/csv_import.py
@@ -0,0 +1,595 @@
+import math
+from datetime import datetime
+from io import BytesIO
+from typing import List
+
+import pandas as pd
+from dateutil import tz
+from pandas import DataFrame
+
+from histview2.api.efa.services.etl import csv_transform, detect_file_delimiter
+from histview2.api.parallel_plot.services import gen_dic_sensors
+from histview2.api.setting_module.services.data_import import csv_data_with_headers, import_data, \
+ save_sensors, \
+ RECORD_PER_COMMIT, get_new_adding_columns, gen_import_job_info, NA_VALUES, \
+ data_pre_processing, gen_substring_column_info, gen_dic_sensor_n_cls, add_new_col_to_df, convert_df_col_to_utc, \
+ convert_df_datetime_to_str, validate_datetime, get_sensor_values, INDEX_COL, \
+ gen_error_output_df, get_df_first_n_last, write_error_trace, write_error_import, get_latest_records, FILE_IDX_COL, \
+ gen_duplicate_output_df, write_duplicate_import
+from histview2.api.trace_data.services.proc_link import add_gen_proc_link_job
+from histview2.common.common_utils import get_files, get_csv_delimiter, detect_encoding, get_file_modify_time, chunks, \
+ get_current_timestamp, detect_file_encoding, convert_time, DATE_FORMAT_STR_ONLY_DIGIT, get_ip_address, get_basename
+from histview2.common.constants import JobStatus, DataType
+from histview2.common.logger import log_execution_time
+from histview2.common.scheduler import scheduler_app_context, JobType
+from histview2.common.services.csv_content import read_data, is_normal_csv
+from histview2.common.services.normalization import normalize_str, normalize_list
+from histview2.common.timezone_utils import detect_timezone, get_utc_offset
+from histview2.setting_module.models import CsvImport, CfgProcess, CfgDataSourceCSV, CfgProcessColumn, JobManagement
+from histview2.setting_module.services.background_process import send_processing_info, JobInfo
+from histview2.trace_data.models import Process, find_cycle_class
+
+pd.options.mode.chained_assignment = None # default='warn'
+
+
+@scheduler_app_context
+def import_csv_job(_job_id, _job_name, _db_id, _proc_id, _proc_name, *args, **kwargs):
+ """ scheduler job import csv
+
+ Keyword Arguments:
+ _job_id {[type]} -- [description] (default: {None})
+ _job_name {[type]} -- [description] (default: {None})
+ """
+ gen = import_csv(*args, **kwargs)
+ send_processing_info(gen, JobType.CSV_IMPORT, db_code=_db_id, process_id=_proc_id, process_name=_proc_name,
+ after_success_func=add_gen_proc_link_job)
+
+
+@log_execution_time()
+def import_csv(proc_id, record_per_commit=RECORD_PER_COMMIT):
+ """ csv files import
+
+ Keyword Arguments:
+ proc_id {[type]} -- [description] (default: {None})
+ db_id {[type]} -- [description] (default: {None})
+
+ Raises:
+ e: [description]
+
+ Yields:
+ [type] -- [description]
+ """
+ # start job
+ yield 0
+
+ # get db info
+ proc_cfg: CfgProcess = CfgProcess.query.get(proc_id)
+ data_src: CfgDataSourceCSV = CfgDataSourceCSV.query.get(proc_cfg.data_source_id)
+
+ # create or get process
+ proc = Process.get_or_create_proc(proc_id=proc_id, proc_name=proc_cfg.name)
+
+ # get import files
+ import_targets, no_data_files = get_import_target_files(proc_id, data_src)
+
+ # csv delimiter
+ csv_delimiter = get_csv_delimiter(data_src.delimiter)
+
+ # get header
+ headers = data_src.get_column_names_with_sorted()
+ dic_use_cols = {col.column_name: col.data_type for col in proc_cfg.columns}
+
+ # job 100% with zero row
+ if not import_targets:
+ yield 100
+ return
+
+ # cycle class
+ cycle_cls = find_cycle_class(proc_id)
+
+ # check new adding column, save.
+ missing_sensors = get_new_adding_columns(proc, dic_use_cols)
+ save_sensors(missing_sensors)
+
+ # sensor classes
+ dic_sensor, dic_sensor_cls = gen_dic_sensor_n_cls(proc_id, dic_use_cols)
+
+ # substring sensors info
+ dic_substring_sensors = gen_substring_column_info(proc_id, dic_sensor)
+
+ # get GET_DATE
+ get_date_col = proc_cfg.get_date_col()
+
+ # depend on file type (efa1,2,3,4 or normal) , choose right header
+ default_csv_param = {}
+ use_col_names = []
+ if not data_src.etl_func and import_targets and not is_normal_csv(import_targets[-1][0], csv_delimiter):
+ is_abnormal = True
+ default_csv_param['names'] = headers
+ use_col_names = headers
+ data_first_row = data_src.skip_head + 1
+ head_skips = list(range(0, data_first_row))
+ else:
+ is_abnormal = False
+ data_first_row = data_src.skip_head + 1
+ head_skips = list(range(0, data_src.skip_head))
+
+ total_percent = 0
+ percent_per_file = 100 / len(import_targets)
+ dic_imported_row = {}
+ df = pd.DataFrame()
+
+ # init job information object
+ job_info = JobInfo()
+ job_info.empty_files = []
+
+ # file can not transform by R script
+ transformed_file_delimiter = csv_delimiter
+ for csv_file_name in no_data_files:
+ job_info.status = JobStatus.DONE
+ job_info.empty_files = [csv_file_name]
+ yield from yield_job_info(job_info, csv_file_name)
+ job_info.empty_files = []
+
+ # get current job id
+ t_job_management: JobManagement = JobManagement.get_last_job_of_process(proc_id, JobType.CSV_IMPORT.name)
+ job_id = str(t_job_management.id) if t_job_management else ''
+
+ df_db_latest_records = None
+ for idx, (csv_file_name, transformed_file) in enumerate(import_targets):
+ job_info.target = csv_file_name
+
+ if not dic_imported_row:
+ job_info.start_tm = get_current_timestamp()
+
+ # R error check
+ if isinstance(transformed_file, Exception):
+ yield from yield_job_info(job_info, csv_file_name, err_msgs=str(transformed_file))
+ continue
+
+ # delimiter check
+ transformed_file_delimiter = detect_file_delimiter(transformed_file, csv_delimiter)
+
+ # check missing columns
+ if is_abnormal is False:
+ # check missing columns
+ check_file = read_data(transformed_file, skip_head=data_src.skip_head, end_row=1,
+ delimiter=transformed_file_delimiter,
+ do_normalize=False)
+ csv_cols = next(check_file)
+ csv_cols_normalized = normalize_list(csv_cols)
+
+ check_file.close()
+ missing_cols = set(dic_use_cols).difference(csv_cols_normalized)
+ if missing_cols:
+ err_msg = f"File {transformed_file} doesn't contain expected columns: {missing_cols}"
+
+ df_one_file = csv_to_df(transformed_file, data_src, head_skips, data_first_row, 0,
+ transformed_file_delimiter)
+
+ if df_db_latest_records is None:
+ df_db_latest_records = get_latest_records(proc_id, dic_sensor, get_date_col)
+ df_error_trace = gen_error_output_df(csv_file_name, dic_sensor, get_df_first_n_last(df_one_file),
+ df_db_latest_records, err_msg)
+
+ write_error_trace(df_error_trace, proc_cfg.name, csv_file_name)
+ write_error_import(df_one_file, proc_cfg.name, csv_file_name, transformed_file_delimiter,
+ data_src.directory)
+
+ yield from yield_job_info(job_info, csv_file_name, err_msgs=err_msg)
+ continue
+
+ default_csv_param['usecols'] = [col for col in csv_cols if col]
+ use_col_names = csv_cols
+
+ # read csv file
+ default_csv_param['dtype'] = {col: 'string' for col, data_type in dic_use_cols.items() if
+ col in use_col_names and data_type == DataType.TEXT.name}
+
+ df_one_file = csv_to_df(transformed_file, data_src, head_skips, data_first_row, 0, transformed_file_delimiter,
+ default_csv_param=default_csv_param)
+
+ # validate column name
+ validate_columns(dic_use_cols, df_one_file.columns)
+
+ file_record_count = len(df_one_file)
+
+ # no records
+ if not file_record_count:
+ job_info.status = JobStatus.DONE
+ job_info.empty_files = [csv_file_name]
+ yield from yield_job_info(job_info, csv_file_name)
+ job_info.empty_files = []
+ continue
+
+ dic_imported_row[idx] = (csv_file_name, file_record_count)
+
+ # add 3 columns machine, line, process for efa 1,2,4
+ if is_abnormal:
+ cols, vals = csv_data_with_headers(csv_file_name, data_src)
+ df_one_file[cols] = vals
+
+ # remove unused columns
+ df_one_file = df_one_file[list(dic_use_cols)]
+
+ # mark file
+ df_one_file[FILE_IDX_COL] = idx
+
+ # merge df
+ df = df.append(df_one_file, ignore_index=True)
+
+ # 10K records
+ if len(df) * len(df.columns) < record_per_commit * 100:
+ continue
+
+ # calc percent
+ percent_per_commit = percent_per_file * len(dic_imported_row)
+
+ # do import
+ save_res, df_error, df_duplicate = import_df(proc_id, df, dic_use_cols, get_date_col, cycle_cls, dic_sensor,
+ dic_sensor_cls, dic_substring_sensors)
+
+ df_error_cnt = len(df_error)
+ if df_error_cnt:
+ if df_db_latest_records is None:
+ df_db_latest_records = get_latest_records(proc_id, dic_sensor, get_date_col)
+ write_invalid_records_to_file(df_error, dic_imported_row, dic_sensor, df_db_latest_records,
+ proc_cfg, transformed_file_delimiter, data_src.directory)
+
+ if df_duplicate is not None and len(df_duplicate):
+ write_duplicate_records_to_file(df_duplicate, dic_imported_row, dic_use_cols, proc_cfg.name, job_id)
+
+ total_percent = set_csv_import_percent(job_info, total_percent, percent_per_commit)
+ for _idx, (_csv_file_name, _imported_row) in dic_imported_row.items():
+ yield from yield_job_info(job_info, _csv_file_name, _imported_row, save_res, df_error_cnt)
+
+ # reset df (important!!!)
+ df = pd.DataFrame()
+ dic_imported_row = {}
+
+ # do last import
+ if len(df):
+ save_res, df_error, df_duplicate = import_df(proc_id, df, dic_use_cols, get_date_col, cycle_cls, dic_sensor,
+ dic_sensor_cls, dic_substring_sensors)
+
+ df_error_cnt = len(df_error)
+ if df_error_cnt:
+ if df_db_latest_records is None:
+ df_db_latest_records = get_latest_records(proc_id, dic_sensor, get_date_col)
+ write_invalid_records_to_file(df_error, dic_imported_row, dic_sensor, df_db_latest_records,
+ proc_cfg, transformed_file_delimiter, data_src.directory)
+
+ if df_duplicate is not None and len(df_duplicate):
+ write_duplicate_records_to_file(df_duplicate, dic_imported_row, dic_use_cols, proc_cfg.name, job_id)
+
+ for _idx, (_csv_file_name, _imported_row) in dic_imported_row.items():
+ yield from yield_job_info(job_info, _csv_file_name, _imported_row, save_res, df_error_cnt)
+
+ yield 100
+
+
+def set_csv_import_percent(job_info, total_percent, percent_per_chunk):
+ total_percent += percent_per_chunk
+ job_info.percent = math.floor(total_percent)
+ if job_info.percent >= 100:
+ job_info.percent = 99
+
+ return total_percent
+
+
+@log_execution_time()
+def get_last_csv_import_info(process_id):
+ """ get latest csv import info
+ """
+
+ latest_import_files = CsvImport.get_latest_done_files(process_id)
+ dic_imported_file = {rec.file_name: rec.start_tm for rec in latest_import_files}
+ csv_fatal_imports = CsvImport.get_last_fatal_import(process_id)
+ dic_fatal_file = {rec.file_name: rec.start_tm for rec in csv_fatal_imports}
+
+ return dic_imported_file, dic_fatal_file
+
+
+@log_execution_time()
+def filter_import_target_file(proc_id, all_files, dic_success_file: dict, dic_error_file: dict, is_transform=False):
+ """filter import target file base on last import job
+
+ Arguments:
+ all_files {[type]} -- [description]
+ dic_success_file {dict} -- [description]
+ dic_error_file {dict} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+
+ has_transform_targets = []
+ no_transform_targets = []
+ for file_name in all_files:
+ if file_name in dic_error_file:
+ pass
+ elif file_name in dic_success_file:
+ modified_date = get_file_modify_time(file_name)
+ imported_datetime = dic_success_file[file_name]
+ if modified_date <= imported_datetime:
+ continue
+
+ # count all rows
+ transformed_file = file_name
+ if is_transform:
+ transformed_file = csv_transform(proc_id, file_name)
+
+ if transformed_file:
+ has_transform_targets.append((file_name, transformed_file))
+ else:
+ no_transform_targets.append(file_name)
+
+ return has_transform_targets, no_transform_targets
+
+
+def validate_columns(checked_cols, csv_cols):
+ """
+ check if checked column exists in csv file
+ :param checked_cols:
+ :param csv_cols:
+ :return:
+ """
+ ng_cols = set(checked_cols) - set(csv_cols)
+ if ng_cols:
+ raise Exception('CSVファイルの列名・列数が正しくないです。')
+
+
+@log_execution_time()
+def csv_to_df(transformed_file, data_src, head_skips, data_first_row, skip_row, csv_delimiter, default_csv_param=None,
+ from_file=False):
+ # read csv file
+ read_csv_param = {}
+ if default_csv_param:
+ read_csv_param.update(default_csv_param)
+
+ read_csv_param.update(dict(skiprows=head_skips + list(range(data_first_row, skip_row + data_first_row))))
+
+ # get encoding
+ if from_file:
+ encoding = detect_file_encoding(transformed_file)
+ transformed_file = BytesIO(transformed_file)
+ else:
+ encoding = detect_encoding(transformed_file)
+
+ # load csv data to dataframe
+ df = pd.read_csv(transformed_file, sep=csv_delimiter, skipinitialspace=True, na_values=NA_VALUES,
+ error_bad_lines=False, encoding=encoding, skip_blank_lines=True, **read_csv_param)
+ df.dropna(how='all', inplace=True)
+ col_names = {col: normalize_str(col) for col in df.columns}
+ df = df.rename(columns=col_names)
+
+ # skip tail
+ if data_src.skip_tail and len(df):
+ df.drop(df.tail(data_src.skip_tail).index, inplace=True)
+
+ return df
+
+
+@log_execution_time()
+def get_import_target_files(proc_id, data_src):
+ dic_success_file, dic_error_file = get_last_csv_import_info(proc_id)
+ csv_files = get_files(data_src.directory, depth_from=1, depth_to=100, extension=['csv', 'tsv'])
+
+ # transform csv files (pre-processing)
+ is_transform = False
+ if data_src.etl_func:
+ is_transform = True
+
+ # filter target files
+ has_trans_targets, no_trans_targets = filter_import_target_file(proc_id, csv_files, dic_success_file,
+ dic_error_file, is_transform)
+ return has_trans_targets, no_trans_targets
+
+
+def strip_quote(val):
+ try:
+ return val.strip("'").strip()
+ except AttributeError:
+ return val
+
+
+@log_execution_time()
+def strip_quote_in_df(df: DataFrame):
+ """
+ strip quote and space
+ :param df:
+ :return:
+ """
+ # strip quote
+ cols = df.select_dtypes(include=['string', 'object']).columns.tolist()
+ df[cols] = df[cols].apply(strip_quote)
+
+ return df
+
+
+@log_execution_time()
+def get_datetime_val(datetime_col):
+ """
+ Gets a random datetime value support to convert UTC
+ :return:
+ """
+ # Check one by one until get well-formatted datetime string
+ valid_datetime_idx = datetime_col.first_valid_index()
+ datetime_val = datetime_col.loc[valid_datetime_idx] if valid_datetime_idx is not None else None
+ return datetime_val
+
+
+@log_execution_time()
+def copy_df(df):
+ orig_df = df.copy()
+ return orig_df
+
+
+@log_execution_time()
+def remove_duplicates(df: DataFrame, df_origin: DataFrame, proc_id, get_date_col):
+ # get columns that use to check duplicate
+ # df_cols = list(set(df.columns.tolist()) - set([INDEX_COL, FILE_IDX_COL]))
+
+ # remove duplicate in csv files
+ # df.drop_duplicates(subset=df_cols, keep='last', inplace=True)
+ df.drop_duplicates(keep='last', inplace=True)
+ index_col = add_new_col_to_df(df, '__df_index_column__', df.index)
+
+ # get min max time of df
+ start_tm, end_tm = get_min_max_date(df, get_date_col)
+
+ # get sensors
+ cfg_columns: List[CfgProcessColumn] = CfgProcessColumn.get_all_columns(proc_id)
+ cfg_columns.sort(key=lambda c: c.is_serial_no + c.is_get_date + c.is_auto_increment, reverse=True)
+
+ col_names = [cfg_col.column_name for cfg_col in cfg_columns]
+ dic_sensors = gen_dic_sensors(proc_id, col_names)
+
+ cycle_cls = find_cycle_class(proc_id)
+ idxs = None
+ for cols in chunks(col_names, 10):
+ # get data from database
+ records = get_sensor_values(proc_id, cols, dic_sensors, cycle_cls, start_tm=start_tm, end_tm=end_tm)
+ if not records:
+ break
+
+ df_db = pd.DataFrame(records)
+ df_db.drop(INDEX_COL, axis=1, inplace=True)
+ df_db.drop_duplicates(inplace=True)
+
+ # remove duplicate df vs df_db
+ _idxs = get_duplicate_info(df, df_db, index_col, idxs)
+
+ # can not check duplicate with these columns
+ # no column : it is ok , no dupl
+ if _idxs is None:
+ continue
+
+ # filter idxs
+ idxs = _idxs
+
+ # no duplicate
+ if not len(idxs):
+ break
+
+ if idxs:
+ df.drop(idxs, inplace=True)
+ df.drop(index_col, axis=1, inplace=True)
+
+ # duplicate data
+ df_duplicate = df_origin[~df_origin.index.isin(df.index)]
+
+ return df_duplicate
+
+
+@log_execution_time()
+def get_min_max_date(df: DataFrame, get_date_col):
+ return df[get_date_col].min(), df[get_date_col].max()
+
+
+@log_execution_time()
+def get_duplicate_info(df_csv: DataFrame, df_db: DataFrame, df_index_col, idxs):
+ col_names = df_db.columns.tolist()
+ all_cols = col_names + [df_index_col]
+ if idxs:
+ df = df_csv.loc[idxs][all_cols].copy()
+ else:
+ df = df_csv[all_cols].copy()
+
+ for col in col_names:
+ if df[col].dtype.name != df_db[col].dtype.name:
+ df[col] = df[col].astype(object)
+ df_db[col] = df_db[col].astype(object)
+
+ df_merged = pd.merge(df, df_db, on=col_names)
+ idxs = df_merged[df_index_col].to_list()
+ return idxs
+
+
+@log_execution_time()
+def import_df(proc_id, df, dic_use_cols, get_date_col, cycle_cls, dic_sensor, dic_sensor_cls, dic_substring_sensors):
+ if not len(df):
+ return 0, None, None
+
+ # convert types
+ df = df.convert_dtypes()
+
+ # original df
+ orig_df = copy_df(df)
+
+ # remove FILE INDEX col
+ if FILE_IDX_COL in df.columns:
+ df.drop(FILE_IDX_COL, axis=1, inplace=True)
+
+ # Convert UTC time
+ for col, dtype in dic_use_cols.items():
+ if DataType[dtype] is not DataType.DATETIME:
+ continue
+
+ null_is_error = False
+ if col == get_date_col:
+ null_is_error = True
+
+ validate_datetime(df, col, null_is_error=null_is_error)
+ convert_csv_timezone(df, col)
+
+ # data pre-processing
+ df_error = data_pre_processing(df, orig_df, dic_use_cols, exclude_cols=[get_date_col, FILE_IDX_COL, INDEX_COL])
+
+ # no records
+ if not len(df):
+ return 0, df_error, None
+
+ # remove duplicate records in csv file which exists in csv or DB
+ df_duplicate = remove_duplicates(df, orig_df, proc_id, get_date_col)
+
+ save_res = import_data(df, proc_id, get_date_col, cycle_cls, dic_sensor, dic_sensor_cls, dic_substring_sensors)
+ return save_res, df_error, df_duplicate
+
+
+def yield_job_info(job_info, csv_file_name, imported_row=0, save_res=0, df_error_cnt=0, err_msgs=None):
+ job_info.target = csv_file_name
+ job_info.err_msg = None
+ job_info.status = JobStatus.DONE
+ gen_import_job_info(job_info, save_res, end_time=get_current_timestamp(), imported_count=imported_row,
+ err_cnt=df_error_cnt, err_msgs=err_msgs)
+ yield job_info
+
+
+@log_execution_time()
+def convert_csv_timezone(df, get_date_col):
+ datetime_val = get_datetime_val(df[get_date_col])
+ is_tz_inside = bool(detect_timezone(datetime_val))
+ time_offset = get_utc_offset(tz.tzlocal()) if not is_tz_inside else None
+ df[get_date_col] = convert_df_col_to_utc(df, get_date_col, is_tz_inside, time_offset)
+ df[get_date_col] = convert_df_datetime_to_str(df, get_date_col)
+
+
+def write_invalid_records_to_file(df_error: DataFrame, dic_imported_row, dic_sensor, df_db, proc_cfg,
+ transformed_file_delimiter, data_src_folder, err_msg=None):
+ idxs = df_error[FILE_IDX_COL].unique()
+ for idx in idxs:
+ csv_file_name, *_ = dic_imported_row[idx]
+ df_error_one_file = df_error[df_error[FILE_IDX_COL] == idx]
+ df_error_one_file.drop(FILE_IDX_COL, axis=1, inplace=True)
+ df_error_trace = gen_error_output_df(csv_file_name, dic_sensor,
+ get_df_first_n_last(df_error_one_file), df_db, err_msg)
+ write_error_trace(df_error_trace, proc_cfg.name, csv_file_name)
+ write_error_import(df_error_one_file, proc_cfg.name, csv_file_name, transformed_file_delimiter, data_src_folder)
+ return True
+
+
+def write_duplicate_records_to_file(df_duplicate: DataFrame, dic_imported_row, dic_use_cols, proc_name, job_id=None):
+ error_msg = 'Duplicate Record'
+ time_str = convert_time(datetime.now(), format_str=DATE_FORMAT_STR_ONLY_DIGIT)[4:-3]
+ ip_address = get_ip_address()
+
+ for idx, df in df_duplicate.groupby(FILE_IDX_COL):
+ csv_file_path_name, *_ = dic_imported_row[idx]
+ csv_file_name = get_basename(csv_file_path_name) if csv_file_path_name else ''
+
+ df.drop(FILE_IDX_COL, axis=1, inplace=True)
+ df_output = gen_duplicate_output_df(dic_use_cols, get_df_first_n_last(df),
+ csv_file_name=csv_file_path_name, error_msgs=error_msg)
+
+ write_duplicate_import(df_output, [proc_name, csv_file_name, 'Duplicate', job_id, time_str, ip_address])
diff --git a/histview2/api/setting_module/services/data_import.py b/histview2/api/setting_module/services/data_import.py
new file mode 100644
index 0000000..733bdb1
--- /dev/null
+++ b/histview2/api/setting_module/services/data_import.py
@@ -0,0 +1,1269 @@
+import os.path
+import re
+import traceback
+from collections import defaultdict
+from datetime import datetime
+from typing import List
+
+import numpy as np
+import pandas as pd
+from apscheduler.triggers.date import DateTrigger
+from dateutil import tz
+from loguru import logger
+from pandas import DataFrame
+from pytz import utc
+from sqlalchemy import and_
+
+from histview2 import db, scheduler
+from histview2.common.common_utils import (
+ parse_int_value,
+ make_dir_from_file_path, get_current_timestamp, get_csv_delimiter, DATE_FORMAT_STR, convert_time,
+ DATE_FORMAT_STR_ONLY_DIGIT, split_path_to_list, get_error_trace_path, get_error_import_path, get_basename,
+ get_ip_address, chunks
+)
+from histview2.common.constants import *
+from histview2.common.logger import log_execution_time
+from histview2.common.pydn.dblib.db_proxy import DbProxy, gen_data_source_of_universal_db
+from histview2.common.memoize import set_all_cache_expired
+from histview2.common.scheduler import scheduler_app_context, JobType, lock
+from histview2.common.services import csv_header_wrapr as chw
+from histview2.common.services.csv_content import read_data
+from histview2.common.services.normalization import normalize_df, normalize_str
+from histview2.common.services.sse import background_announcer, AnnounceEvent
+from histview2.common.timezone_utils import calc_offset_between_two_tz
+from histview2.setting_module.models import CfgConstant, CfgDataSource, CfgDataSourceDB, CfgProcess
+from histview2.setting_module.services.background_process import send_processing_info
+from histview2.trace_data.models import Sensor, find_sensor_class, SensorType, find_cycle_class, CYCLE_CLASSES, Cycle
+
+# csv_import : max id of cycles
+# ( because of csv import performance, we make a deposit/a guess of cycle id number
+# to avoid conflict of other csv import thread/job )
+csv_import_cycle_max_id = None
+
+# index column in df
+INDEX_COL = '__INDEX__'
+CYCLE_TIME_COL = '__time__'
+
+# file index col in df
+FILE_IDX_COL = '__FILE_INDEX__'
+
+# max insert record per job
+RECORD_PER_COMMIT = 10_000
+
+# range of time per sql
+
+# N/A value lists
+PANDAS_DEFAULT_NA = {'#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN', '',
+ 'N/A', 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'}
+NA_VALUES = {'na', '-', '--', '---', '#NULL!', '#REF!', '#VALUE!', '#NUM!', '#NAME?', '0/0'}
+INF_VALUES = {'Inf', 'Infinity', '1/0', '#DIV/0!'}
+INF_NEG_VALUES = {'-Inf', '-Infinity', '-1/0'}
+
+ALL_SYMBOLS = set(PANDAS_DEFAULT_NA | NA_VALUES | INF_VALUES | INF_NEG_VALUES)
+SPECIAL_SYMBOLS = ALL_SYMBOLS - {'-'}
+IS_ERROR_COL = '___ERR0R___'
+
+
+@log_execution_time('[DATA IMPORT]')
+def import_data(df, proc_id, get_date_col, cycle_cls, dic_sensor, dic_sensor_cls, dic_substring_sensors):
+ cycles_len = len(df)
+ if not cycles_len:
+ return 0
+
+ # get available cycle ids from db
+ current_id = set_cycle_max_id(cycles_len)
+
+ # set ids for df
+ start_cycle_id = current_id + 1
+ df = set_cycle_ids_to_df(df, start_cycle_id)
+
+ cycle_vals = gen_insert_cycle_values(df, proc_id, cycle_cls, get_date_col)
+
+ # insert cycles
+ # get cycle and sensor columns for insert sql
+ cycle_sql_params = get_insert_params(get_cycle_columns())
+ sql_insert_cycle = gen_bulk_insert_sql(cycle_cls.__table__.name, *cycle_sql_params)
+
+ # run in threads
+ # pipeline = queue.Queue()
+ # q_output = queue.Queue()
+ # with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+ # executor.submit(gen_sensor_data_in_thread, pipeline, df, dic_sensor, dic_sensor_cls, dic_substring_sensors)
+ # executor.submit(insert_data_in_thread, pipeline, q_output, cycle_vals, sql_insert_cycle)
+ #
+ # commit_error = q_output.get()
+
+ # run in main thread
+ sensor_vals = []
+ for col_name, sensor in dic_sensor.items():
+ sensor_vals += gen_sensor_data(df, sensor, col_name, dic_sensor_cls, dic_substring_sensors)
+
+ commit_error = insert_data_to_db(sensor_vals, cycle_vals, sql_insert_cycle)
+
+ if isinstance(commit_error, Exception):
+ set_cycle_max_id(-cycles_len)
+ return commit_error
+
+ return cycles_len
+
+
+@log_execution_time()
+def gen_dic_sensor_n_cls(proc_id, dic_use_cols):
+ # sensor classes
+ dic_sensor = {}
+ dic_sensor_cls = {}
+ sensors = Sensor.get_sensor_by_col_names(proc_id, dic_use_cols)
+ for sensor in sensors:
+ dic_sensor[sensor.column_name] = sensor
+ dic_sensor_cls[sensor.column_name] = find_sensor_class(sensor.id, sensor.type)
+
+ return dic_sensor, dic_sensor_cls
+
+
+@log_execution_time()
+def gen_substring_column_info(proc_id, dic_sensor):
+ # substring dic
+ dic_substring_sensors = defaultdict(list)
+ for col_name, sensor in dic_sensor.items():
+ candidates = Sensor.get_substring_sensors(proc_id, sensor.id, col_name)
+ for candidate in candidates:
+ substr_check_res = get_from_to_substring_col(candidate)
+ if substr_check_res:
+ dic_substring_sensors[col_name].append(candidate)
+
+ return dic_substring_sensors
+
+
+@log_execution_time()
+def gen_data_type_list(columns, data_types, get_date_col, auto_increment_col=None):
+ ints = []
+ reals = []
+ dates = []
+ texts = []
+ for col, data_type in zip(columns, data_types):
+ if DataType[data_type] is DataType.INTEGER:
+ ints.append(col)
+ elif DataType[data_type] is DataType.REAL:
+ reals.append(col)
+ elif DataType[data_type] is DataType.DATETIME:
+ dates.append(col)
+ else:
+ texts.append(col)
+
+ return {'get_date_col': get_date_col,
+ 'auto_increment_col': auto_increment_col or get_date_col,
+ 'int_type_cols': ints,
+ 'real_type_cols': reals,
+ 'date_type_cols': dates,
+ 'text_type_cols': texts,
+ }
+
+
+# -------------------------- Factory data import -----------------------------
+
+
+@log_execution_time()
+def gen_cols_info(proc_cfg: CfgProcess):
+ """generate import columns, data types prepare for factory data import
+ :rtype: dict
+ :param proc_cfg:
+ :return:
+ """
+
+ columns = []
+ data_types = []
+ get_date_col = None
+ auto_increment_col = None
+ for col in proc_cfg.columns:
+ columns.append(col.column_name)
+ data_types.append(col.data_type)
+
+ # get date
+ if col.is_get_date:
+ get_date_col = col.column_name
+
+ # auto incremental column
+ if col.is_auto_increment:
+ auto_increment_col = col.column_name
+
+ # generate data type list
+ dic_cols_info = gen_data_type_list(columns, data_types, get_date_col, auto_increment_col)
+ return dic_cols_info
+
+
+@log_execution_time()
+def update_or_create_constant_by_type(const_type, value=0):
+ try:
+ CfgConstant.create_or_update_by_type(const_type=const_type, const_value=value)
+ except Exception:
+ traceback.print_exc()
+ return False
+
+ return True
+
+
+# -------------------------- Factory past 1 days data import -----------------------------
+
+
+@log_execution_time()
+def check_db_con(db_type, host, port, dbname, schema, username, password):
+ parsed_int_port = parse_int_value(port)
+ if parsed_int_port is None and db_type.lower() != DBType.SQLITE.name.lower():
+ return False
+
+ # オブジェクトを初期化する
+ db_source_detail = CfgDataSourceDB()
+ db_source_detail.host = host
+ db_source_detail.port = parsed_int_port
+ db_source_detail.dbname = dbname
+ db_source_detail.schema = schema
+ db_source_detail.username = username
+ db_source_detail.password = password
+
+ db_source = CfgDataSource()
+ db_source.db_detail = db_source_detail
+ db_source.type = db_type
+
+ # 戻り値の初期化
+ result = False
+
+ # コネクションをチェックする
+ with DbProxy(db_source) as db_instance:
+ if db_instance.is_connected:
+ result = True
+
+ return result
+
+
+@log_execution_time()
+def write_error_trace(df_error: DataFrame, proc_name, file_path=None, ip_address=None):
+ if not len(df_error):
+ return df_error
+
+ time_str = convert_time(datetime.now(), format_str=DATE_FORMAT_STR_ONLY_DIGIT)[4:-3]
+ ip_address = get_ip_address()
+ ip_address = f'_{ip_address}' if ip_address else ''
+
+ base_name = f'_{get_basename(file_path)}' if file_path else ''
+
+ file_name = f'{proc_name}{base_name}_{time_str}{ip_address}.txt'
+ full_path = os.path.join(get_error_trace_path(), file_name)
+ make_dir_from_file_path(full_path)
+
+ df_error.to_csv(full_path, sep=CsvDelimiter.TSV.value, header=None, index=False)
+
+ return df_error
+
+
+@log_execution_time()
+def write_duplicate_import(df: DataFrame, file_name_elements: List):
+ if not len(df):
+ return df
+
+ file_name = '_'.join([element for element in file_name_elements if element])
+ export_file_name = f'{file_name}.txt'
+ full_path = os.path.join(get_error_trace_path(), export_file_name)
+ # make folder
+ make_dir_from_file_path(full_path)
+
+ df.to_csv(full_path, sep=CsvDelimiter.TSV.value, header=None, index=False)
+
+ return df
+
+
+@log_execution_time()
+def write_error_import(df_error: DataFrame, proc_name, file_path=None, error_file_delimiter=CsvDelimiter.CSV.value,
+ csv_directory=None):
+ if not len(df_error):
+ return df_error
+
+ if csv_directory:
+ file_paths = split_path_to_list(file_path)
+ csv_directories = split_path_to_list(csv_directory)
+ file_name = file_paths[-1]
+ folders = file_paths[len(csv_directories):-1]
+ else:
+ time_str = convert_time(format_str=DATE_FORMAT_STR_ONLY_DIGIT)[4:-3]
+ file_name = time_str + error_file_delimiter
+ folders = []
+
+ full_path = os.path.join(get_error_import_path(), proc_name, *folders, file_name)
+ make_dir_from_file_path(full_path)
+
+ df_error.to_csv(full_path, sep=error_file_delimiter, index=False)
+
+ return df_error
+
+
+def get_latest_records(proc_id, dic_sensors, get_date_col):
+ cycle_cls = find_cycle_class(proc_id)
+ cycle_ids = cycle_cls.get_latest_cycle_ids(proc_id)
+ df_blank = pd.DataFrame({col: [] for col in [INDEX_COL] + list(dic_sensors)})
+ if not cycle_ids:
+ return df_blank
+
+ is_first = True
+ col_names = list(dic_sensors)
+ dfs = []
+ for cols in chunks(col_names, 50):
+ records = get_sensor_values(proc_id, cols, dic_sensors, cycle_cls, cycle_ids=cycle_ids, get_time_col=is_first)
+ is_first = False
+
+ if not records:
+ return df_blank
+
+ df = pd.DataFrame(records)
+ dfs.append(df)
+
+ dfs = [_df.set_index(INDEX_COL) for _df in dfs]
+ df = pd.concat(dfs, ignore_index=False, axis=1)
+ df.sort_values(CYCLE_TIME_COL, inplace=True)
+ if get_date_col in col_names:
+ df.drop(CYCLE_TIME_COL, axis=1, inplace=True)
+ else:
+ df.rename({CYCLE_TIME_COL: get_date_col}, inplace=True)
+
+ return df
+
+
+def gen_error_output_df(csv_file_name, dic_sensors, df_error, df_db, error_msgs=None):
+ db_len = len(df_db)
+ df_db = df_db.append(df_error, ignore_index=True)
+ columns = df_db.columns.tolist()
+
+ # error data
+ new_row = columns
+ df_db = add_row_to_df(df_db, columns, new_row, db_len)
+
+ new_row = ('column name/sample data (first 10 & last 10)',)
+ df_db = add_row_to_df(df_db, columns, new_row, db_len)
+
+ new_row = ('Data File', csv_file_name)
+ df_db = add_row_to_df(df_db, columns, new_row, db_len)
+
+ new_row = ('',)
+ df_db = add_row_to_df(df_db, columns, new_row, db_len)
+
+ # data in db
+ new_row = columns
+ df_db = add_row_to_df(df_db, columns, new_row)
+
+ new_row = ('column name/sample data (latest 5)',)
+ df_db = add_row_to_df(df_db, columns, new_row)
+
+ new_row = [DataType(dic_sensors[col_name].type).name for col_name in columns if col_name in dic_sensors]
+ df_db = add_row_to_df(df_db, columns, new_row)
+
+ new_row = ('data type',)
+ df_db = add_row_to_df(df_db, columns, new_row)
+
+ new_row = ('Database',)
+ df_db = add_row_to_df(df_db, columns, new_row)
+
+ new_row = ('',)
+ df_db = add_row_to_df(df_db, columns, new_row)
+
+ new_row = ('',)
+ df_db = add_row_to_df(df_db, columns, new_row)
+
+ if isinstance(error_msgs, (list, tuple)):
+ error_msg = '|'.join(error_msgs)
+ else:
+ error_msg = error_msgs
+
+ new_row = ('Error Type', error_msg or DATA_TYPE_ERROR_MSG)
+ df_db = add_row_to_df(df_db, columns, new_row)
+
+ return df_db
+
+
+def gen_duplicate_output_df(dic_use_cols, df_duplicate, csv_file_name=None, table_name=None, error_msgs=None):
+ # db_name: if factory db -> db name
+ # else if csv -> file name
+ columns = df_duplicate.columns.tolist()
+
+ # duplicate data
+ new_row = columns
+ df_output = add_row_to_df(df_duplicate, columns, new_row)
+
+ new_row = (f'column name/duplicate data (total: {len(df_duplicate)} rows)',)
+ df_output = add_row_to_df(df_output, columns, new_row)
+
+ new_row = ('',)
+ df_output = add_row_to_df(df_output, columns, new_row)
+
+ new_row = [dic_use_cols[col_name] for col_name in columns if col_name in dic_use_cols]
+ df_output = add_row_to_df(df_output, columns, new_row)
+
+ new_row = ('data type',)
+ df_output = add_row_to_df(df_output, columns, new_row)
+
+ new_row = ('',)
+ df_output = add_row_to_df(df_output, columns, new_row)
+
+ if csv_file_name:
+ new_row = ('Data File', csv_file_name)
+ df_output = add_row_to_df(df_output, columns, new_row)
+
+ if table_name:
+ new_row = ('Table name', table_name)
+ df_output = add_row_to_df(df_output, columns, new_row)
+
+ new_row = ('',)
+ df_output = add_row_to_df(df_output, columns, new_row)
+
+ new_row = ('',)
+ df_output = add_row_to_df(df_output, columns, new_row)
+
+ if isinstance(error_msgs, (list, tuple)):
+ error_msg = '|'.join(error_msgs)
+ else:
+ error_msg = error_msgs
+
+ new_row = ('Error Type', error_msg or DATA_TYPE_DUPLICATE_MSG)
+ df_output = add_row_to_df(df_output, columns, new_row)
+
+ return df_output
+
+
+def add_row_to_df(df, columns, new_row, pos=0):
+ df_temp = pd.DataFrame({columns[i]: new_row[i] for i in range(len(new_row))}, index=[pos])
+ df = pd.concat([df.iloc[0:pos], df_temp, df.iloc[pos:]]).reset_index(drop=True)
+
+ return df
+
+
+@log_execution_time()
+def get_new_adding_columns(proc, dic_use_cols):
+ proc_id = proc.id
+
+ # exist sensors
+ created_at = get_current_timestamp()
+ dic_exist_sensor = {s.column_name: s for s in proc.sensors}
+ missing_sensors = []
+ for col_name, data_type in dic_use_cols.items():
+ # already exist
+ if dic_exist_sensor.get(col_name):
+ continue
+
+ data_type_obj = DataType[data_type]
+ if data_type_obj is DataType.DATETIME:
+ data_type_obj = DataType.TEXT
+
+ sensor = dict(process_id=proc_id, column_name=col_name, type=data_type_obj.value, created_at=created_at)
+ missing_sensors.append(sensor)
+
+ return missing_sensors
+
+
+@log_execution_time()
+def commit_db_instance(db_instance):
+ # commit changes to db
+ db_instance.connection.commit()
+
+ # clear cache
+ set_all_cache_expired()
+
+
+def csv_data_with_headers(csv_file_name, data_src):
+ efa_header_exists = CfgConstant.get_efa_header_flag(data_src.id)
+ read_directly_ok = True
+ if efa_header_exists:
+ try:
+ # csv delimiter
+ csv_delimiter = get_csv_delimiter(data_src.delimiter)
+
+ # read file directly to get Line, Machine, Process
+ csv_reader = read_data(csv_file_name, end_row=5, delimiter=csv_delimiter, do_normalize=False)
+ next(csv_reader)
+
+ row_line = next(csv_reader) # 2nd row
+ line = normalize_str(row_line[1]) # 2nd cell
+
+ row_process = next(csv_reader) # 3rd row
+ process = normalize_str(row_process[1]) # 2nd cell
+
+ row_machine = next(csv_reader) # 4th row
+ machine = normalize_str(row_machine[1]) # 2nd cell
+
+ etl_headers = {
+ WR_HEADER_NAMES: [EFAColumn.Line.name, EFAColumn.Process.name, EFAColumn.Machine.name],
+ WR_VALUES: [line, process, machine],
+ }
+ return etl_headers[WR_HEADER_NAMES], etl_headers[WR_VALUES]
+ except Exception:
+ read_directly_ok = False
+ traceback.print_exc()
+
+ # if there is no flag in DB or failed to read file directly -> call R script + save flag
+ if not efa_header_exists or not read_directly_ok:
+ csv_inst, _ = chw.get_file_info_py(csv_file_name)
+ if isinstance(csv_inst, Exception):
+ return csv_inst
+
+ if csv_inst is None:
+ return [], []
+
+ etl_headers = chw.get_etl_headers(csv_inst)
+
+ # save flag to db if header exists
+ efa_header_exists = chw.get_efa_header_flag(csv_inst)
+ if efa_header_exists:
+ CfgConstant.create_or_update_by_type(const_type=CfgConstantType.EFA_HEADER_EXISTS.name,
+ const_name=data_src.id,
+ const_value=EFA_HEADER_FLAG)
+
+ return etl_headers[WR_HEADER_NAMES], etl_headers[WR_VALUES]
+
+
+# -------------------------- shutdown app job -----------------------------
+@log_execution_time()
+def add_shutdown_app_job():
+ # delete process data from universal db
+ shutdown_app_job_id = JobType.SHUTDOWN_APP.name
+ scheduler.add_job(
+ shutdown_app_job_id, shutdown_app_job,
+ trigger=DateTrigger(run_date=datetime.now().astimezone(utc), timezone=utc),
+ replace_existing=True,
+ kwargs=dict(
+ _job_id=shutdown_app_job_id,
+ _job_name=JobType.SHUTDOWN_APP.name,
+ )
+ )
+
+
+@scheduler_app_context
+def shutdown_app_job(_job_id=None, _job_name=None, *args, **kwargs):
+ """ scheduler job to shutdown app
+
+ Keyword Arguments:
+ _job_id {[type]} -- [description] (default: {None})
+ _job_name {[type]} -- [description] (default: {None})
+ """
+ gen = waiting_for_job_done(*args, **kwargs)
+ send_processing_info(gen, JobType.SHUTDOWN_APP, is_check_disk=False)
+
+
+@log_execution_time()
+def waiting_for_job_done():
+ """pause scheduler and wait for all other jobs done.
+
+ Arguments:
+ proc_id {[type]} -- [description]
+
+ Keyword Arguments:
+ db_id {[type]} -- [description] (default: {None})
+
+ Yields:
+ [type] -- [description]
+ """
+ yield 0
+
+ from histview2.common.scheduler import dic_running_job, scheduler
+ import time
+
+ with lock:
+ try:
+ scheduler.pause()
+ except Exception:
+ pass
+
+ start_time = time.time()
+ percent = 0
+ shutdown_job = {JobType.SHUTDOWN_APP.name}
+ while True:
+ running_jobs = set(dic_running_job.keys())
+ if not running_jobs.difference(shutdown_job):
+ print('///////////// ELIGIBLE TO SHUTDOWN APP ///////////')
+ # notify frontend to stop main thread
+ background_announcer.announce(True, AnnounceEvent.SHUT_DOWN.name)
+ break
+
+ # show progress
+ percent = min(percent + 5, 99)
+ yield percent
+
+ # check timeout: 2 hours
+ if time.time() - start_time > 7200:
+ break
+
+ # sleep 5 seconds and wait
+ time.sleep(5)
+
+ yield 100
+
+
+@log_execution_time()
+def save_sensors(sensors):
+ # sensor commit
+ if not sensors:
+ return
+
+ with lock:
+ db.session.execute(Sensor.__table__.insert(), sensors)
+ # db.session.bulk_insert_mappings(Sensor, sensors)
+ db.session.commit()
+
+
+@log_execution_time()
+def strip_special_symbol(data, is_dict=False):
+ # TODO: convert to dataframe than filter is faster , but care about generation purpose ,
+ # we just need to read some rows
+ iter_func = lambda x: x
+ if is_dict:
+ iter_func = lambda x: x.values()
+
+ for row in data:
+ is_ng = False
+ if not row:
+ continue
+ for val in iter_func(row):
+ if str(val).lower() in SPECIAL_SYMBOLS:
+ is_ng = True
+ break
+
+ if not is_ng:
+ yield row
+
+
+@log_execution_time()
+def set_cycle_ids_to_df(df: DataFrame, start_cycle_id):
+ """
+ reset new cycle id to save to db
+ :param df:
+ :param start_cycle_id:
+ :return:
+ """
+ df.reset_index(drop=True, inplace=True)
+ df.index = df.index + start_cycle_id
+ return df
+
+
+def gen_cycle_data(cycle_id, proc_id, cycle_time, created_at):
+ """
+ vectorize function , do not use decorator
+ :param cycle_id:
+ :param cycle_time:
+ :param proc_id:
+ :param created_at:
+ :return:
+ """
+ return dict(id=cycle_id, process_id=proc_id, time=cycle_time, created_at=created_at)
+
+
+def gen_sensors_data(cycle_id, sensor_id, sensor_val, created_at):
+ """
+ vectorize function , do not use decorator
+ :param cycle_id:
+ :param sensor_id:
+ :param sensor_val:
+ :param created_at:
+ :return:
+ """
+ return dict(cycle_id=cycle_id, sensor_id=sensor_id, value=sensor_val, created_at=created_at)
+
+
+@log_execution_time()
+def gen_import_job_info(job_info, save_res, start_time=None, end_time=None, imported_count=0, err_cnt=0, err_msgs=None):
+ # start time
+ if job_info.last_cycle_time:
+ job_info.first_cycle_time = job_info.last_cycle_time
+ else:
+ job_info.first_cycle_time = start_time
+
+ # end time
+ job_info.last_cycle_time = end_time
+
+ if isinstance(save_res, Exception):
+ job_info.exception = save_res
+ job_info.status = JobStatus.FATAL
+ else:
+ if imported_count:
+ job_info.row_count = imported_count
+ job_info.committed_count = imported_count
+ else:
+ job_info.row_count = save_res
+ job_info.committed_count = save_res
+
+ if job_info.err_msg or err_cnt > 0 or err_msgs:
+ job_info.status = JobStatus.FAILED
+ else:
+ job_info.status = JobStatus.DONE
+
+ # set msg
+ if job_info.status == JobStatus.FAILED:
+ if not err_msgs:
+ msg = DATA_TYPE_ERROR_MSG
+ job_info.data_type_error_cnt += err_cnt
+ elif isinstance(err_msgs, (list, tuple)):
+ msg = ','.join(err_msgs)
+ else:
+ msg = err_msgs
+
+ if job_info.err_msg:
+ job_info.err_msg += msg
+ else:
+ job_info.err_msg = msg
+
+ return job_info
+
+
+@log_execution_time()
+def validate_data(df: DataFrame, dic_use_cols, na_vals, exclude_cols=None):
+ """
+ validate data type, NaN values...
+ :param df:
+ :param dic_use_cols:
+ :param na_vals:
+ :param exclude_cols:
+ :return:
+ """
+
+ init_is_error_col(df)
+
+ if exclude_cols is None:
+ exclude_cols = []
+
+ exclude_cols.append(IS_ERROR_COL)
+
+ # string + object + category
+ float_cols = df.select_dtypes(include=['float']).columns.tolist()
+ int_cols = df.select_dtypes(include=['integer']).columns.tolist()
+ for col_name in df.columns:
+ if col_name in exclude_cols:
+ continue
+
+ if col_name not in dic_use_cols:
+ continue
+
+ # do nothing with int column
+ if col_name in int_cols:
+ continue
+
+ # data type that user chose
+ user_data_type = dic_use_cols[col_name]
+
+ # do nothing with float column
+ if col_name in float_cols and user_data_type != DataType.INTEGER.name:
+ continue
+
+ # convert inf , -inf to Nan
+ nan, inf_neg_val, inf_val = return_inf_vals(user_data_type)
+ if col_name in float_cols and user_data_type == DataType.INTEGER.name:
+ df.loc[df[col_name].isin([float('inf'), float('-inf')]), col_name] = nan
+ non_na_vals = df[col_name].dropna()
+ if len(non_na_vals):
+ df.loc[non_na_vals.index, col_name] = df.loc[non_na_vals.index, col_name].astype('Int64')
+
+ continue
+
+ # strip quotes and spaces
+ dtype_name = df[col_name].dtype.name
+ if user_data_type in [DataType.INTEGER.name, DataType.REAL.name]:
+ vals = df[col_name].copy()
+
+ # convert numeric values
+ numerics = pd.to_numeric(vals, errors='coerce')
+ df[col_name] = numerics
+
+ # strip quote space then convert non numeric values
+ non_num_idxs = numerics.isna()
+ non_numerics = vals.loc[non_num_idxs].dropna()
+ if len(non_numerics):
+ non_num_idxs = non_numerics.index
+ non_numerics = non_numerics.astype(str).str.strip("'").str.strip()
+
+ # convert non numeric again
+ numerics = pd.to_numeric(non_numerics, errors='coerce')
+ df.loc[non_num_idxs, col_name] = numerics
+
+ # set error for non numeric values
+ non_num_idxs = numerics.isna()
+ for idx, is_true in non_num_idxs.items():
+ if not is_true:
+ continue
+
+ if vals.at[idx] in na_vals:
+ df.at[idx, col_name] = nan
+ elif vals.at[idx] in INF_VALUES:
+ df.at[idx, col_name] = inf_val
+ elif vals.at[idx] in INF_NEG_VALUES:
+ df.at[idx, col_name] = inf_neg_val
+ else:
+ df.at[idx, IS_ERROR_COL] = 1
+
+ try:
+ if len(non_num_idxs):
+ pd.to_numeric(df.loc[non_num_idxs.index, col_name], errors='raise')
+ except Exception as ex:
+ logger.exception(ex)
+
+ # replace Inf --> None
+ if user_data_type == DataType.INTEGER.name:
+ df.loc[df[col_name].isin([float('inf'), float('-inf')]), col_name] = nan
+
+ elif user_data_type == DataType.TEXT.name:
+ idxs = df[col_name].dropna().index
+ if dtype_name == 'object':
+ df.loc[idxs, col_name] = df.loc[idxs, col_name].astype(str).str.strip("'").str.strip()
+ elif dtype_name == 'string':
+ df.loc[idxs, col_name] = df.loc[idxs, col_name].str.strip("'").str.strip()
+ else:
+ # convert to string before insert to database
+ df.loc[idxs, col_name] = df.loc[idxs, col_name].astype(str)
+ continue
+
+ if len(idxs):
+ conditions = [df[col_name].isin(na_vals),
+ df[col_name].isin(INF_VALUES),
+ df[col_name].isin(INF_NEG_VALUES)]
+ return_vals = [nan, inf_val, inf_neg_val]
+
+ df[col_name] = np.select(conditions, return_vals, df[col_name])
+
+
+@log_execution_time()
+def add_new_col_to_df(df: DataFrame, col_name, value):
+ """
+ add new value as a new column in dataframe , but avoid duplicate column name.
+ :param df:
+ :param col_name:
+ :param value:
+ :return:
+ """
+ columns = list(df.columns)
+ # avoid duplicate column name
+ while col_name in columns:
+ col_name = '_' + col_name
+
+ df[col_name] = value
+
+ return col_name
+
+
+def return_inf_vals(data_type):
+ if data_type == DataType.REAL.name:
+ return np.nan, float('-inf'), float('inf')
+ elif data_type == DataType.INTEGER.name:
+ return pd.NA, pd.NA, pd.NA
+
+ return None, '-inf', 'inf'
+
+
+@log_execution_time()
+def data_pre_processing(df, orig_df, dic_use_cols, na_values=None, exclude_cols=None):
+ if na_values is None:
+ na_values = PANDAS_DEFAULT_NA | NA_VALUES
+
+ # string parse
+ cols = get_object_cols(df)
+ df[cols] = df[cols].astype(str)
+ cols += get_string_cols(df)
+
+ # normalization
+ for col in cols:
+ normalize_df(df, col)
+
+ # parse data type
+ validate_data(df, dic_use_cols, na_values, exclude_cols)
+
+ # write to file
+ df_error = orig_df.loc[df.eval(f'{IS_ERROR_COL} == 1')]
+
+ # remove status column ( no need anymore )
+ df.drop(df[df[IS_ERROR_COL] == 1].index, inplace=True)
+ df.drop(IS_ERROR_COL, axis=1, inplace=True)
+
+ return df_error
+
+
+@log_execution_time()
+def get_from_to_substring_col(sensor):
+ substr_regex = re.compile(SUB_STRING_REGEX)
+ from_to_pos = substr_regex.match(sensor.column_name)
+ if not from_to_pos:
+ return None
+
+ from_char = int(from_to_pos[1]) - 1
+ to_char = int(from_to_pos[2])
+ substr_cls = find_sensor_class(sensor.id, DataType(sensor.type))
+
+ return substr_cls, from_char, to_char
+
+
+@log_execution_time()
+def gen_substr_data(substr_sensor, df, col_name):
+ """
+ generate data for sub string column from original data
+ :param substr_sensor:
+ :param df:
+ :param col_name:
+ :return:
+ """
+ substr_check_res = get_from_to_substring_col(substr_sensor)
+ if not substr_check_res:
+ return None, None
+
+ substr_cls, from_char, to_char = substr_check_res
+
+ sub_col_name = add_new_col_to_df(df, f'{col_name}_{from_char}_{to_char}', df[col_name].str[from_char:to_char])
+
+ # # remove blank values (we need to insert the same with proclink, so do not move blank)
+ # df_insert = df[df[sub_col_name] != '']
+ # if not len(df_insert):
+ # return None, None
+
+ # gen insert data
+ sensor_vals = gen_insert_sensor_values(df, substr_sensor.id, sub_col_name)
+
+ return substr_cls, sensor_vals
+
+
+@log_execution_time()
+def get_data_without_na(data):
+ valid_rows = []
+ for row in data:
+ # exclude_na = False not in [col not in ALL_SYMBOLS for col in row]
+ if any([val in ALL_SYMBOLS for val in row]):
+ continue
+
+ valid_rows.append(row)
+ return valid_rows
+
+
+def get_string_cols(df: DataFrame):
+ return [col for col in df.columns if df[col].dtype.name.lower() == 'string']
+
+
+def get_object_cols(df: DataFrame):
+ return [col for col in df.columns if df[col].dtype.name.lower() == 'object']
+
+
+@log_execution_time('[CONVERT DATE TIME TO UTC')
+def convert_df_col_to_utc(df, get_date_col, is_tz_inside, utc_time_offset):
+ if is_tz_inside:
+ return df[get_date_col].dt.tz_convert('UTC')
+
+ return df[get_date_col] - utc_time_offset
+
+
+@log_execution_time()
+def convert_df_datetime_to_str(df: DataFrame, get_date_col):
+ return df[get_date_col].dt.strftime(DATE_FORMAT_STR)
+
+
+@log_execution_time()
+def validate_datetime(df: DataFrame, date_col, is_strip=True, add_is_error_col=True, null_is_error=True):
+ dtype_name = df[date_col].dtype.name
+ if dtype_name == 'object':
+ df[date_col] = df[date_col].astype(str)
+ elif dtype_name != 'string':
+ return
+
+ # for csv data
+ if is_strip:
+ df[date_col] = df[date_col].str.strip("'").str.strip()
+
+ # convert to datetime value
+ if not null_is_error:
+ idxs = df[date_col].notna()
+
+ df[date_col] = pd.to_datetime(df[date_col], errors='coerce') # failed records -> pd.NaT
+
+ # mark error records
+ if add_is_error_col:
+ init_is_error_col(df)
+
+ if null_is_error:
+ df[IS_ERROR_COL] = np.where(pd.isna(df[date_col]), 1, df[IS_ERROR_COL])
+ else:
+ df_temp = df.loc[idxs, [date_col, IS_ERROR_COL]]
+ # df.loc[idxs, IS_ERROR_COL] = np.where(pd.isna(df.loc[idxs, date_col]), 1, df.loc[idxs, IS_ERROR_COL])
+ df_temp[IS_ERROR_COL] = np.where(pd.isna(df_temp[date_col]), 1, df_temp[IS_ERROR_COL])
+ df.loc[idxs, IS_ERROR_COL] = df_temp
+
+
+def init_is_error_col(df: DataFrame):
+ if IS_ERROR_COL not in df.columns:
+ df[IS_ERROR_COL] = 0
+
+
+@log_execution_time()
+def set_cycle_max_id(next_use_id_count):
+ """ get cycle max id to avoid conflict cycle id
+ """
+ global csv_import_cycle_max_id
+ with lock:
+ # when app start get max id of all tables
+ if csv_import_cycle_max_id is None:
+ csv_import_cycle_max_id = 0
+ max_id = max([cycle_cls.get_max_id() for cycle_cls in CYCLE_CLASSES])
+ else:
+ max_id = csv_import_cycle_max_id
+
+ csv_import_cycle_max_id = max_id + next_use_id_count
+ return max_id
+
+
+@log_execution_time()
+def check_update_time_by_changed_tz(proc_cfg: CfgProcess, time_zone=None):
+ if time_zone is None:
+ time_zone = tz.tzutc()
+
+ use_os_tz = proc_cfg.data_source.db_detail.use_os_timezone
+ # check use ose time zone
+ if check_timezone_changed(proc_cfg.id, use_os_tz):
+ # convert to local or convert from local
+ if use_os_tz:
+ # calculate offset +/-HH:MM
+ tz_offset = calc_offset_between_two_tz(time_zone, tz.tzlocal())
+ else:
+ tz_offset = calc_offset_between_two_tz(tz.tzlocal(), time_zone)
+
+ if tz_offset is None:
+ return None
+
+ # update time to new time zone
+ cycle_cls = find_cycle_class(proc_cfg.id)
+ with lock:
+ cycle_cls.update_time_by_tzoffset(proc_cfg.id, tz_offset)
+ date_sensor = Sensor.get_sensor_by_col_name(proc_cfg.id, proc_cfg.get_date_col())
+
+ sensor_cls = find_sensor_class(date_sensor.id, DataType(date_sensor.type))
+ sensor_cls.update_time_by_tzoffset(proc_cfg.id, date_sensor.id, tz_offset)
+ db.session.commit()
+
+ # save latest use os time zone flag to db
+ save_use_os_timezone_to_db(proc_cfg.id, use_os_tz)
+
+ return True
+
+
+@log_execution_time()
+def check_timezone_changed(proc_id, yml_use_os_timezone):
+ """check if use os timezone was changed by user
+
+ Args:
+ proc_id ([type]): [description]
+ yml_use_os_timezone ([type]): [description]
+
+ Returns:
+ [type]: [description]
+ """
+ if yml_use_os_timezone is None:
+ return False
+
+ db_use_os_tz = CfgConstant.get_value_by_type_name(
+ CfgConstantType.USE_OS_TIMEZONE.name, proc_id, lambda x: bool(int(x)))
+ if db_use_os_tz is None:
+ return False
+
+ if db_use_os_tz == yml_use_os_timezone:
+ return False
+
+ return True
+
+
+@log_execution_time()
+def save_use_os_timezone_to_db(proc_id, yml_use_os_timezone):
+ """save os timezone to constant table
+
+ Args:
+ proc_id ([type]): [description]
+ yml_use_os_timezone ([type]): [description]
+
+ Returns:
+ [type]: [description]
+ """
+ if not yml_use_os_timezone:
+ yml_use_os_timezone = False
+
+ CfgConstant.create_or_update_by_type(
+ const_type=CfgConstantType.USE_OS_TIMEZONE.name,
+ const_value=yml_use_os_timezone,
+ const_name=proc_id)
+
+ return True
+
+
+@log_execution_time()
+def gen_insert_cycle_values(df, proc_id, cycle_cls, get_date_col):
+ # created time
+ created_at = get_current_timestamp()
+ created_at_col_name = add_new_col_to_df(df, cycle_cls.created_at.key, created_at)
+
+ proc_id_col_name = add_new_col_to_df(df, cycle_cls.process_id.key, proc_id)
+ is_outlier_col_name = add_new_col_to_df(df, cycle_cls.is_outlier.key, 0)
+ cycle_vals = df[[proc_id_col_name, get_date_col, is_outlier_col_name, created_at_col_name]].to_records().tolist()
+ return cycle_vals
+
+
+@log_execution_time()
+def insert_data(db_instance, sql, vals):
+ db_instance.execute_sql_in_transaction(sql, vals)
+ return True
+
+
+@log_execution_time()
+def gen_insert_sensor_values(df_insert, sensor_id, col_name):
+ sensor_id_col = add_new_col_to_df(df_insert, SensorType.sensor_id.key, sensor_id)
+ sensor_vals = df_insert[[sensor_id_col, col_name]].to_records().tolist()
+
+ return sensor_vals
+
+
+@log_execution_time()
+def gen_bulk_insert_sql(tblname, cols_str, params_str):
+ sql = f'INSERT INTO {tblname} ({cols_str}) VALUES ({params_str})'
+
+ return sql
+
+
+@log_execution_time()
+def get_cycle_columns():
+ return Cycle.id.key, Cycle.process_id.key, Cycle.time.key, Cycle.is_outlier.key, Cycle.created_at.key
+
+
+@log_execution_time()
+def get_sensor_columns():
+ return SensorType.cycle_id.key, SensorType.sensor_id.key, SensorType.value.key
+
+
+@log_execution_time()
+def get_insert_params(columns):
+ cols_str = ','.join(columns)
+ params_str = ','.join(['?'] * len(columns))
+
+ return cols_str, params_str
+
+
+@log_execution_time()
+def gen_sensor_data(df, sensor, col_name, dic_sensor_cls, dic_substring_sensors):
+ data = []
+ sensor_cls = dic_sensor_cls[col_name]
+
+ df_insert = df.dropna(subset=[col_name])[[col_name]]
+ if not df_insert.size:
+ return data
+
+ sensor_vals = gen_insert_sensor_values(df_insert, sensor.id, col_name)
+ data.append((sensor_cls.__table__.name, sensor_vals))
+
+ # insert substring columns
+ substr_sensors = dic_substring_sensors.get(col_name, [])
+ if substr_sensors:
+ df_insert[col_name] = df_insert[col_name].astype(str)
+
+ for substr_sensor in substr_sensors:
+ substr_cls, substr_rows = gen_substr_data(substr_sensor, df_insert, col_name)
+ if substr_cls and substr_rows:
+ data.append((substr_cls.__table__.name, substr_rows))
+
+ return data
+
+
+@log_execution_time()
+def insert_data_to_db(sensor_values, cycle_vals, sql_insert_cycle):
+ try:
+ with lock:
+ with DbProxy(gen_data_source_of_universal_db(), True) as db_instance:
+ # insert cycle
+ insert_data(db_instance, sql_insert_cycle, cycle_vals)
+
+ # insert sensor
+ sensor_sql_params = get_insert_params(get_sensor_columns())
+ for tblname, vals in sensor_values:
+ sql_insert_sensor = gen_bulk_insert_sql(tblname, *sensor_sql_params)
+ insert_data(db_instance, sql_insert_sensor, vals)
+
+ # commit data to database
+ commit_db_instance(db_instance)
+
+ return None
+ except Exception as e:
+ return e
+
+
+# def gen_sensor_data_in_thread(pipeline: Queue, df, dic_sensor, dic_sensor_cls, dic_substring_sensors):
+# for col_name, sensor in dic_sensor.items():
+# sensor_cls = dic_sensor_cls[col_name]
+# sensor_vals = gen_insert_sensor_values(df, sensor, col_name)
+# if sensor_vals:
+# pipeline.put((sensor_cls.__table__.name, sensor_vals))
+# # insert substring columns
+# for substr_sensor in dic_substring_sensors.get(col_name, []):
+# substr_cls, substr_rows = gen_substr_data(substr_sensor, sensor_vals)
+# if substr_cls and substr_rows:
+# pipeline.put((sensor_cls.__table__.name, sensor_vals))
+#
+# # Stop flag
+# pipeline.put(None)
+#
+#
+# def insert_data_in_thread(pipeline: Queue, q_output: Queue, cycle_vals, sql_insert_cycle):
+# try:
+# with lock:
+# with DbProxy(gen_data_source_of_universal_db(), True) as db_instance:
+# # insert cycle
+# insert_data(db_instance, sql_insert_cycle, cycle_vals)
+#
+# # insert sensor
+# sensor_sql_params = get_insert_params(get_sensor_columns())
+# while True:
+# data = pipeline.get()
+# if data is None:
+# break
+#
+# tblname, vals = data
+#
+# sql_insert_sensor = gen_bulk_insert_sql(tblname, *sensor_sql_params)
+# insert_data(db_instance, sql_insert_sensor, vals)
+#
+# # commit data to database
+# commit_db_instance(db_instance)
+#
+# q_output.put(None)
+# except Exception as e:
+# q_output.put(e)
+
+@log_execution_time()
+def get_sensor_values(proc_id, col_names, dic_sensors, cycle_cls, start_tm=None, end_tm=None, cycle_ids=None,
+ sort_by_time=False, get_time_col=None):
+ cols = [cycle_cls.id.label(INDEX_COL)]
+ if get_time_col:
+ cols.append(cycle_cls.time.label(CYCLE_TIME_COL))
+
+ data_query = db.session.query(*cols)
+ data_query = data_query.filter(cycle_cls.process_id == proc_id)
+
+ for col_name in col_names:
+ sensor = dic_sensors[col_name]
+ sensor_val_cls = find_sensor_class(sensor.id, DataType(sensor.type), auto_alias=True)
+ sensor_val = sensor_val_cls.value.label(col_name)
+
+ data_query = data_query.outerjoin(
+ sensor_val_cls,
+ and_(sensor_val_cls.cycle_id == cycle_cls.id, sensor_val_cls.sensor_id == sensor.id)
+ )
+
+ data_query = data_query.add_columns(sensor_val)
+
+ # chunk
+ if cycle_ids:
+ data_query = data_query.filter(cycle_cls.id.in_(cycle_ids))
+ else:
+ data_query = data_query.filter(cycle_cls.time >= start_tm)
+ data_query = data_query.filter(cycle_cls.time <= end_tm)
+
+ if sort_by_time:
+ data_query = data_query.order_by(cycle_cls.time)
+
+ records = data_query.all()
+ return records
+
+
+def get_df_first_n_last(df: DataFrame, first_count=10, last_count=10):
+ if len(df) <= first_count + last_count:
+ return df
+
+ return df.loc[df.head(first_count).index.append(df.tail(last_count).index)]
diff --git a/histview2/api/setting_module/services/factory_import.py b/histview2/api/setting_module/services/factory_import.py
new file mode 100644
index 0000000..12528de
--- /dev/null
+++ b/histview2/api/setting_module/services/factory_import.py
@@ -0,0 +1,663 @@
+from datetime import datetime
+
+import pandas as pd
+from loguru import logger
+from pandas import DataFrame
+
+from histview2.api.setting_module.services.csv_import import remove_duplicates
+from histview2.api.setting_module.services.data_import import import_data, save_sensors, get_new_adding_columns, \
+ gen_import_job_info, \
+ data_pre_processing, convert_df_col_to_utc, convert_df_datetime_to_str, validate_datetime, \
+ gen_dic_sensor_n_cls, gen_substring_column_info, check_update_time_by_changed_tz, gen_error_output_df, \
+ get_df_first_n_last, write_error_import, write_error_trace, gen_duplicate_output_df, write_duplicate_import
+from histview2.api.trace_data.services.proc_link import add_gen_proc_link_job
+from histview2.common.common_utils import add_days, convert_time, DATE_FORMAT_STR_FACTORY_DB, add_double_quotes, \
+ add_years, DATE_FORMAT_STR_ONLY_DIGIT, get_ip_address
+from histview2.common.constants import MSG_DB_CON_FAILED, JobStatus, DataType
+from histview2.common.logger import log_execution_time
+from histview2.common.pydn.dblib import mysql, mssqlserver, oracle
+from histview2.common.pydn.dblib.db_proxy import DbProxy
+from histview2.common.scheduler import scheduler_app_context, JobType
+from histview2.common.timezone_utils import get_db_timezone, gen_sql, get_time_info, detect_timezone
+from histview2.setting_module.models import FactoryImport, CfgProcess, CfgDataSourceDB, JobManagement
+from histview2.setting_module.services.background_process import send_processing_info, JobInfo, \
+ format_factory_date_to_meta_data
+from histview2.trace_data.models import Process, find_cycle_class
+
+MAX_RECORD = 1_000_000
+SQL_FACTORY_LIMIT = 5_000_000
+SQL_DAY = 8
+SQL_DAYS_AGO = 30
+FETCH_MANY_SIZE = 10_000
+
+pd.options.mode.chained_assignment = None # default='warn'
+
+
+@scheduler_app_context
+def import_factory_job(_job_id, _job_name, _db_id, _proc_id, _proc_name, *args, **kwargs):
+ """ scheduler job import factory data
+
+ Keyword Arguments:
+ _job_id {[type]} -- [description] (default: {None})
+ _job_name {[type]} -- [description] (default: {None})
+ """
+ gen = import_factory(*args, **kwargs)
+ send_processing_info(gen, JobType.FACTORY_IMPORT, db_code=_db_id, process_id=_proc_id, process_name=_proc_name,
+ after_success_func=add_gen_proc_link_job)
+
+
+@log_execution_time()
+def import_factory(proc_id):
+ """transform data and then import from factory db to universal db
+
+ Arguments:
+ proc_id {[type]} -- [description]
+
+ Yields:
+ [type] -- [description]
+ """
+ # start job
+ yield 0
+
+ # get process id in edge db
+ proc_cfg: CfgProcess = CfgProcess.query.get(proc_id)
+ data_src: CfgDataSourceDB = CfgDataSourceDB.query.get(proc_cfg.data_source_id)
+
+ # check db connection
+ check_db_connection(data_src)
+
+ # process info
+ proc = Process.get_or_create_proc(proc_id=proc_id, proc_name=proc_cfg.name)
+
+ # columns info
+ proc_name = proc_cfg.name
+ column_names = [col.column_name for col in proc_cfg.columns]
+ auto_increment_col = proc_cfg.get_auto_increment_col_else_get_date()
+ auto_increment_idx = column_names.index(auto_increment_col)
+ dic_use_cols = {col.column_name: col.data_type for col in proc_cfg.columns}
+
+ # cycle class
+ cycle_cls = find_cycle_class(proc_id)
+
+ # check new adding column, save.
+ missing_sensors = get_new_adding_columns(proc, dic_use_cols)
+ save_sensors(missing_sensors)
+
+ # sensor classes
+ dic_sensor, dic_sensor_cls = gen_dic_sensor_n_cls(proc_id, dic_use_cols)
+
+ # substring sensors info
+ dic_substring_sensors = gen_substring_column_info(proc_id, dic_sensor)
+
+ # get date time column
+ get_date_col = proc_cfg.get_date_col()
+
+ # last import date
+ last_import = FactoryImport.get_last_import(proc.id, JobType.FACTORY_IMPORT.name)
+
+ if last_import:
+ filter_time = last_import.import_to
+ else:
+ # the first time import data : get minimum time of factory db
+ filter_time = None
+
+ # convert utc function
+ dic_tz_info = {col: handle_time_zone(proc_cfg, col)
+ for col, dtype in dic_use_cols.items() if DataType[dtype] is DataType.DATETIME}
+
+ # get factory max date
+ fac_max_date, is_tz_col = get_factory_max_date(proc_cfg)
+
+ inserted_row_count = 0
+ calc_range_days_func = calc_sql_range_days()
+ sql_day = SQL_DAY
+ is_import = True
+ end_time = None
+ total_row = 0
+ job_info = JobInfo()
+ job_info.auto_increment_col_timezone = is_tz_col
+ job_info.target = proc_cfg.name
+
+ # get current job id
+ t_job_management: JobManagement = JobManagement.get_last_job_of_process(proc_id, JobType.FACTORY_IMPORT.name)
+ job_id = str(t_job_management.id) if t_job_management else ''
+ data_source_name = proc_cfg.data_source.name
+ table_name = proc_cfg.table_name
+
+ while inserted_row_count < MAX_RECORD and is_import:
+ # get sql range
+ if end_time:
+ if total_row:
+ sql_day = calc_range_days_func(sql_day, total_row)
+
+ start_time, end_time, filter_time = get_sql_range_time(end_time, range_day=sql_day, is_tz_col=is_tz_col)
+ else:
+ start_time, end_time, filter_time = get_sql_range_time(filter_time, is_tz_col=is_tz_col)
+
+ # no data in range, stop
+ if start_time > fac_max_date:
+ break
+
+ # validate import date range
+ if end_time >= fac_max_date:
+ end_time = fac_max_date
+ is_import = False
+
+ # get data from factory
+ data = get_factory_data(proc_cfg, column_names, auto_increment_col, start_time, end_time)
+ if not data:
+ break
+
+ cols = next(data)
+ remain_rows = tuple()
+ for rows in data:
+ is_import, rows, remain_rows = gen_import_data(rows, remain_rows, auto_increment_idx)
+ if not is_import:
+ continue
+
+ # dataframe
+ df = pd.DataFrame(rows, columns=cols)
+
+ # no records
+ if not len(df):
+ continue
+
+ # Convert UTC time
+ for col, dtype in dic_use_cols.items():
+ if DataType[dtype] is not DataType.DATETIME:
+ continue
+
+ null_is_error = False
+ if col == get_date_col:
+ null_is_error = True
+
+ validate_datetime(df, col, is_strip=False, null_is_error=null_is_error)
+ df[col] = convert_df_col_to_utc(df, col, *dic_tz_info[col])
+ df[col] = convert_df_datetime_to_str(df, col)
+
+ # convert types
+ df = df.convert_dtypes()
+
+ # original df
+ orig_df = df.copy()
+
+ # data pre-processing
+ df_error = data_pre_processing(df, orig_df, dic_use_cols, exclude_cols=[get_date_col])
+ df_error_cnt = len(df_error)
+ if df_error_cnt:
+ df_error_trace = gen_error_output_df(proc_id, proc_name, dic_sensor, get_df_first_n_last(df_error))
+ write_error_trace(df_error_trace, proc_cfg.name)
+ write_error_import(df_error, proc_cfg.name)
+
+ # no records
+ if not len(df):
+ continue
+
+ # remove duplicate records which exists DB
+ df_duplicate = remove_duplicates(df, orig_df, proc_id, get_date_col)
+ df_duplicate_cnt = len(df_duplicate)
+ if df_duplicate_cnt:
+ write_duplicate_records_to_file_factory(df_duplicate, data_source_name, table_name, dic_use_cols,
+ proc_cfg.name, job_id)
+
+ # import data
+ save_res = import_data(df, proc_id, get_date_col, cycle_cls, dic_sensor, dic_sensor_cls,
+ dic_substring_sensors)
+
+ # update job info
+ imported_end_time = rows[-1][auto_increment_idx]
+ gen_import_job_info(job_info, save_res, start_time, imported_end_time, err_cnt=df_error_cnt)
+
+ # total row of one job
+ total_row = job_info.row_count
+ inserted_row_count += total_row
+
+ job_info.calc_percent(inserted_row_count, MAX_RECORD)
+
+ yield job_info
+
+ # raise exception if FATAL error happened
+ if job_info.status is JobStatus.FATAL:
+ raise job_info.exception
+
+ # calc range of days to gen sql
+ logger.info(
+ f'FACTORY DATA IMPORT SQL(days = {sql_day}, records = {total_row}, range = {start_time} - {end_time})')
+
+
+
+@log_execution_time()
+def calc_sql_range_days():
+ """
+ calculate range of days for 1 sql sentence
+ """
+ limit_record = 100_000
+ limit_max_day = 256
+ limit_min_day = 1
+
+ prev_day_cnt = 1
+ prev_record_cnt = 0
+
+ def _calc_sql_range_days(cur_day_cnt, cur_record_cnt):
+ nonlocal prev_day_cnt, prev_record_cnt
+
+ # compare current to previous, get max
+ if cur_record_cnt >= prev_record_cnt:
+ rec_cnt = cur_record_cnt
+ day_cnt = cur_day_cnt
+ else:
+ rec_cnt = prev_record_cnt
+ day_cnt = prev_day_cnt
+
+ # set previous sql
+ prev_day_cnt = day_cnt
+ prev_record_cnt = rec_cnt
+
+ # adjust number of days to get data
+ if rec_cnt > limit_record * 2:
+ day_cnt //= 2
+ elif rec_cnt < limit_record:
+ day_cnt *= 2
+
+ # make sure range is 1 ~ 256 days
+ day_cnt = min(day_cnt, limit_max_day)
+ day_cnt = max(day_cnt, limit_min_day)
+
+ return day_cnt
+
+ return _calc_sql_range_days
+
+
+@log_execution_time()
+def get_sql_range_time(filter_time=None, range_day=SQL_DAY, start_days_ago=SQL_DAYS_AGO, is_tz_col=False):
+ # if there is no data , this poling is the first time, so get data of n days ago.
+ limit_date = add_days(days=-start_days_ago)
+ limit_date = limit_date.replace(hour=0, minute=0, second=0, microsecond=0)
+
+ if filter_time:
+ filter_time = max(convert_time(filter_time), convert_time(limit_date))
+ else:
+ filter_time = convert_time(limit_date)
+
+ # start time
+ start_time = convert_time(filter_time, return_string=False)
+
+ # 8 days after
+ end_time = add_days(start_time, days=range_day)
+
+ # convert to string
+ start_time = convert_time(start_time, format_str=DATE_FORMAT_STR_FACTORY_DB, only_milisecond=True)
+ end_time = convert_time(end_time, format_str=DATE_FORMAT_STR_FACTORY_DB, only_milisecond=True)
+ filter_time = convert_time(filter_time, format_str=DATE_FORMAT_STR_FACTORY_DB, only_milisecond=True)
+
+ if is_tz_col:
+ start_time += 'Z'
+ end_time += 'Z'
+ filter_time += 'Z'
+
+ return start_time, end_time, filter_time
+
+
+@log_execution_time('[FACTORY DATA IMPORT SELECT SQL]')
+def get_data_by_range_time(db_instance, get_date_col, column_names, table_name, start_time, end_time, sql_limit):
+ if isinstance(db_instance, mysql.MySQL):
+ sel_cols = ','.join(column_names)
+ else:
+ table_name = add_double_quotes(table_name)
+ sel_cols = ','.join([add_double_quotes(col) for col in column_names])
+ get_date_col = add_double_quotes(get_date_col)
+
+ # sql
+ sql = f"{sel_cols} FROM {table_name} WHERE {get_date_col} > '{start_time}' AND {get_date_col} <= '{end_time}'"
+ sql = f'{sql} ORDER BY {get_date_col}'
+
+ if isinstance(db_instance, mssqlserver.MSSQLServer):
+ sql = f'SELECT TOP {sql_limit} {sql}'
+ elif isinstance(db_instance, oracle.Oracle):
+ sql = f'SELECT * FROM (SELECT {sql}) WHERE ROWNUM <= {sql_limit}'
+ else:
+ sql = f'SELECT {sql} LIMIT {sql_limit}'
+
+ logger.info(f'sql: {sql}')
+ data = db_instance.fetch_many(sql, FETCH_MANY_SIZE)
+ if not data:
+ return None
+
+ yield from data
+
+
+@log_execution_time()
+def get_factory_data(proc_cfg, column_names, auto_increment_col, start_time, end_time):
+ """generate select statement and get data from factory db
+
+ Arguments:
+ proc_id {[type]} -- [description]
+ db_config_yaml {DBConfigYaml} -- [description]
+ proc_config_yaml {ProcConfigYaml} -- [description]
+ """
+ # exe sql
+ with DbProxy(proc_cfg.data_source) as db_instance:
+ data = get_data_by_range_time(db_instance, auto_increment_col, column_names, proc_cfg.table_name, start_time,
+ end_time,
+ SQL_FACTORY_LIMIT)
+
+ if not data:
+ return None
+
+ for rows in data:
+ yield tuple(rows)
+
+
+@log_execution_time()
+def get_factory_max_date(proc_cfg):
+ """
+ get factory max date
+ """
+
+ with DbProxy(proc_cfg.data_source) as db_instance:
+ # gen sql
+ get_date_col = add_double_quotes(proc_cfg.get_auto_increment_col_else_get_date())
+ orig_tblname = proc_cfg.table_name.strip('\"')
+ if not isinstance(db_instance, mysql.MySQL):
+ table_name = add_double_quotes(orig_tblname)
+ else:
+ table_name = orig_tblname
+
+ sql = f'select max({get_date_col}) from {table_name}'
+ _, rows = db_instance.run_sql(sql, row_is_dict=False)
+
+ if not rows:
+ return None
+
+ out = rows[0][0]
+
+ is_tz_col = db_instance.is_timezone_hold_column(orig_tblname, get_date_col)
+ out = format_factory_date_to_meta_data(out, is_tz_col)
+
+ return out, is_tz_col
+
+
+SQL_PAST_DAYS_AGO = 1
+
+
+@log_execution_time()
+def get_tzoffset_of_random_record(data_source, table_name, get_date_col):
+ # exec sql
+ with DbProxy(data_source) as db_instance:
+ # get timezone offset
+ db_timezone = get_db_timezone(db_instance)
+
+ sql = gen_sql(db_instance, table_name, get_date_col)
+ _, rows = db_instance.run_sql(sql, row_is_dict=False)
+
+ date_val = None
+ tzoffset_str = None
+ if rows:
+ date_val, tzoffset_str = rows[0]
+
+ return date_val, tzoffset_str, db_timezone
+
+
+@scheduler_app_context
+def factory_past_data_transform_job(_job_id, _job_name, _db_id, _proc_id, _proc_name, *args, **kwargs):
+ """ scheduler job import factory data
+
+ Keyword Arguments:
+ _job_id {[type]} -- [description] (default: {None})
+ _job_name {[type]} -- [description] (default: {None})
+ """
+ gen = factory_past_data_transform(*args, **kwargs)
+ send_processing_info(gen, JobType.FACTORY_PAST_IMPORT, db_code=_db_id,
+ process_id=_proc_id, process_name=_proc_name,
+ after_success_func=add_gen_proc_link_job)
+
+
+@log_execution_time()
+def factory_past_data_transform(proc_id):
+ """transform data and then import from factory db to universal db
+
+ Arguments:
+ proc_id {[type]} -- [description]
+
+ Yields:
+ [type] -- [description]
+ """
+ # start job
+ yield 0
+
+ # get process id in edge db
+ proc_cfg: CfgProcess = CfgProcess.query.get(proc_id)
+ data_src: CfgDataSourceDB = CfgDataSourceDB.query.get(proc_cfg.data_source_id)
+
+ # check db connection
+ check_db_connection(data_src)
+
+ proc = Process.get_or_create_proc(proc_id=proc_id)
+
+ # columns info
+ proc_name = proc_cfg.name
+ column_names = [col.column_name for col in proc_cfg.columns]
+ auto_increment_col = proc_cfg.get_auto_increment_col_else_get_date()
+ auto_increment_idx = column_names.index(auto_increment_col)
+ dic_use_cols = {col.column_name: col.data_type for col in proc_cfg.columns}
+
+ # cycle class
+ cycle_cls = find_cycle_class(proc_id)
+
+ # check new adding column, save.
+ missing_sensors = get_new_adding_columns(proc, dic_use_cols)
+ save_sensors(missing_sensors)
+
+ # sensor classes
+ dic_sensor, dic_sensor_cls = gen_dic_sensor_n_cls(proc_id, dic_use_cols)
+
+ # substring sensors info
+ dic_substring_sensors = gen_substring_column_info(proc_id, dic_sensor)
+
+ # get date time column
+ get_date_col = proc_cfg.get_date_col()
+
+ # last import date
+ last_import = FactoryImport.get_last_import(proc.id, JobType.FACTORY_PAST_IMPORT.name, is_first_id=True)
+
+ if not last_import:
+ # check if first time factory import was DONE !
+ last_import = FactoryImport.get_first_import(proc.id, JobType.FACTORY_IMPORT.name)
+
+ # the first time import data
+ if not last_import:
+ yield 100
+ return
+
+ filter_time = last_import.import_from
+
+ if filter_time < convert_time(add_years(years=-1)):
+ yield 100
+ return
+
+ # return if already inserted 2 millions
+ if cycle_cls.get_count_by_proc_id(proc.id) > 2_000_000:
+ yield 100
+ return
+
+ # is timezone column
+ is_tz_col = False
+ if filter_time[-1] == 'Z':
+ is_tz_col = True
+
+ # calc end time
+ end_time = convert_time(filter_time, return_string=False)
+
+ # calc start time
+ start_time = add_days(end_time, days=-SQL_PAST_DAYS_AGO)
+
+ # convert to char format
+ start_time = format_factory_date_to_meta_data(start_time, is_tz_col)
+ end_time = format_factory_date_to_meta_data(end_time, is_tz_col)
+
+ # convert utc function
+ dic_tz_info = {col: handle_time_zone(proc_cfg, col)
+ for col, dtype in dic_use_cols.items() if DataType[dtype] is DataType.DATETIME}
+
+ # job info
+ job_info = JobInfo()
+ job_info.auto_increment_col_timezone = is_tz_col
+ job_info.target = proc_cfg.name
+
+ # get data from factory
+ data = get_factory_data(proc_cfg, column_names, auto_increment_col, start_time, end_time)
+
+ # there is no data , return
+ if not data:
+ gen_import_job_info(job_info, 0, start_time, end_time)
+ job_info.auto_increment_col_timezone = is_tz_col
+ job_info.percent = 100
+ yield job_info
+ return
+
+ # get current job id
+ t_job_management: JobManagement = JobManagement.get_last_job_of_process(proc_id, JobType.FACTORY_PAST_IMPORT.name)
+ job_id = str(t_job_management.id) if t_job_management else ''
+ data_source_name = proc_cfg.data_source.name
+ table_name = proc_cfg.table_name
+
+ # start import data
+ cols = next(data)
+ remain_rows = tuple()
+ inserted_row_count = 0
+ for rows in data:
+ is_import, rows, remain_rows = gen_import_data(rows, remain_rows, auto_increment_idx)
+ if not is_import:
+ continue
+
+ # dataframe
+ df = pd.DataFrame(rows, columns=cols)
+
+ # no records
+ if not len(df):
+ continue
+
+ # Convert UTC time
+ for col, dtype in dic_use_cols.items():
+ if DataType[dtype] is not DataType.DATETIME:
+ continue
+
+ null_is_error = False
+ if col == get_date_col:
+ null_is_error = True
+
+ validate_datetime(df, col, is_strip=False, null_is_error=null_is_error)
+ df[col] = convert_df_col_to_utc(df, col, *dic_tz_info[col])
+ df[col] = convert_df_datetime_to_str(df, col)
+
+ # convert types
+ df = df.convert_dtypes()
+
+ # original df
+ orig_df = df.copy()
+
+ # data pre-processing
+ df_error = data_pre_processing(df, orig_df, dic_use_cols, exclude_cols=[get_date_col])
+ df_error_cnt = len(df_error)
+ if df_error_cnt:
+ df_error_trace = gen_error_output_df(proc_id, proc_name, dic_sensor, get_df_first_n_last(df_error))
+ write_error_trace(df_error_trace, proc_cfg.name)
+ write_error_import(df_error, proc_cfg.name)
+
+ # remove duplicate records which exists DB
+ df_duplicate = remove_duplicates(df, orig_df, proc_id, get_date_col)
+ df_duplicate_cnt = len(df_duplicate)
+ if df_duplicate_cnt:
+ write_duplicate_records_to_file_factory(df_duplicate, data_source_name, table_name, dic_use_cols,
+ proc_cfg.name, job_id)
+
+ # import data
+ save_res = import_data(df, proc_id, get_date_col, cycle_cls, dic_sensor, dic_sensor_cls, dic_substring_sensors)
+
+ # update job info
+ imported_end_time = rows[-1][auto_increment_idx]
+ gen_import_job_info(job_info, save_res, start_time, imported_end_time, err_cnt=df_error_cnt)
+
+ # total row of one job
+ total_row = job_info.row_count
+ inserted_row_count += total_row
+
+ job_info.calc_percent(inserted_row_count, MAX_RECORD)
+ yield job_info
+
+ # raise exception if FATAL error happened
+ if job_info.status is JobStatus.FATAL:
+ raise job_info.exception
+
+ # output log
+ log_str = 'FACTORY PAST DATA IMPORT SQL(days={}, records={}, range={}-{})'
+ logger.info(log_str.format(SQL_PAST_DAYS_AGO, total_row, start_time, end_time))
+
+ yield 100
+
+
+@log_execution_time()
+def handle_time_zone(proc_cfg, get_date_col):
+ # convert utc time func
+ get_date, tzoffset_str, db_timezone = get_tzoffset_of_random_record(proc_cfg.data_source, proc_cfg.table_name,
+ get_date_col)
+
+ if tzoffset_str:
+ # use os time zone
+ db_timezone = None
+ else:
+ detected_timezone = detect_timezone(get_date)
+ # if there is time offset in datetime value, do not force time.
+ if detected_timezone is None:
+ # check and update if use os time zone flag changed
+ # if tz offset in val date, do not need to force
+ check_update_time_by_changed_tz(proc_cfg)
+
+ if proc_cfg.data_source.db_detail.use_os_timezone:
+ # use os time zone
+ db_timezone = None
+
+ is_tz_inside, _, time_offset = get_time_info(get_date, db_timezone)
+
+ return is_tz_inside, time_offset
+
+
+@log_execution_time()
+def check_db_connection(data_src):
+ # check db connection
+ with DbProxy(data_src) as db_instance:
+ if not db_instance.is_connected:
+ raise Exception(MSG_DB_CON_FAILED)
+
+
+@log_execution_time()
+def gen_import_data(rows, remain_rows, auto_increment_idx):
+ is_allow_import = True
+ # last fetch
+ if len(rows) < FETCH_MANY_SIZE:
+ return is_allow_import, remain_rows + rows, []
+
+ rows += remain_rows
+ last_row_idx = len(rows) - 1
+ first_row_idx = max(last_row_idx - 1000, 0)
+
+ for i in range(last_row_idx, first_row_idx, -1):
+ # difference time
+ if rows[i][auto_increment_idx] != rows[i - 1][auto_increment_idx]:
+ return is_allow_import, rows[:i], rows[i:]
+
+ # no difference
+ is_allow_import = False
+ return is_allow_import, [], rows
+
+
+def write_duplicate_records_to_file_factory(df_duplicate: DataFrame, data_source_name, table_name, dic_use_cols,
+ proc_name, job_id=None):
+ error_msg = 'Duplicate Record'
+ time_str = convert_time(datetime.now(), format_str=DATE_FORMAT_STR_ONLY_DIGIT)[4:-3]
+ ip_address = get_ip_address()
+
+ df_output = gen_duplicate_output_df(dic_use_cols, get_df_first_n_last(df_duplicate),
+ table_name=table_name, error_msgs=error_msg)
+
+ write_duplicate_import(df_output, [proc_name, data_source_name, 'Duplicate', job_id, time_str, ip_address])
diff --git a/histview2/api/setting_module/services/filter_settings.py b/histview2/api/setting_module/services/filter_settings.py
new file mode 100644
index 0000000..38a544f
--- /dev/null
+++ b/histview2/api/setting_module/services/filter_settings.py
@@ -0,0 +1,77 @@
+from histview2.common.constants import CfgFilterType, RelationShip
+from histview2.setting_module.models import CfgFilter, CfgFilterDetail, make_session, get_or_create, CfgProcess, \
+ insert_or_update_config, crud_config
+
+
+def get_filter_request_data(params):
+ process_id = params.get('processId')
+ filter_id = ''.join(params.get('filterId') or [])
+ filter_type = params.get('filterType')
+ filter_name = params.get('filterName')
+ column_id = params.get('columnName') or None
+ filter_parent_detail_ids = params.get('filterDetailParentIds') or []
+ filter_detail_ids = params.get('fitlerDetailIds') or []
+ filter_detail_conds = params.get('filterConditions') or []
+ filter_detail_names = params.get('filterDetailNames') or []
+ filter_detail_functions = params.get('filterFunctions') or []
+ filter_detail_start_froms = params.get('filterStartFroms') or []
+
+ if not filter_parent_detail_ids:
+ filter_parent_detail_ids = [None] * len(filter_detail_ids)
+ if not filter_detail_functions:
+ filter_detail_functions = [None] * len(filter_detail_ids)
+ if not filter_detail_start_froms:
+ filter_detail_start_froms = [None] * len(filter_detail_ids)
+
+ return [process_id, filter_id, filter_type, column_id,
+ filter_detail_ids, filter_detail_conds, filter_detail_names,
+ filter_parent_detail_ids, filter_detail_functions, filter_detail_start_froms, filter_name]
+
+
+def save_filter_config(params):
+ [process_id, filter_id, filter_type, column_id,
+ filter_detail_ids, filter_detail_conds, filter_detail_names, filter_parent_detail_ids,
+ filter_detail_functions, filter_detail_start_froms, filter_name] = get_filter_request_data(params)
+
+ with make_session() as meta_session:
+ cfg_filter = CfgFilter(**{
+ 'id': int(filter_id) if filter_id else None,
+ 'process_id': process_id,
+ 'name': filter_name,
+ 'column_id': column_id,
+ 'filter_type': filter_type,
+ })
+ cfg_filter = insert_or_update_config(meta_session, cfg_filter)
+ meta_session.commit()
+
+ filter_id = cfg_filter.id # to return to frontend (must)
+
+ # crud filter details
+ num_details = len(filter_detail_conds)
+ filter_details = []
+ for idx in range(num_details):
+ filter_detail = CfgFilterDetail(**{
+ 'id': int(filter_detail_ids[idx]) if filter_detail_ids[idx] else None,
+ 'filter_id': cfg_filter.id,
+ 'name': filter_detail_names[idx],
+ 'parent_detail_id': filter_parent_detail_ids[idx] or None,
+ 'filter_condition': filter_detail_conds[idx],
+ 'filter_function': filter_detail_functions[idx] or None,
+ 'filter_from_pos': filter_detail_start_froms[idx] or None,
+ })
+ filter_details.append(filter_detail)
+
+ crud_config(meta_session=meta_session,
+ data=filter_details,
+ model=CfgFilterDetail,
+ key_names=CfgFilterDetail.id.key,
+ parent_key_names=CfgFilterDetail.filter_id.key,
+ parent_obj=cfg_filter,
+ parent_relation_key=CfgFilter.filter_details.key,
+ parent_relation_type=RelationShip.MANY)
+ return filter_id
+
+
+def delete_cfg_filter_from_db(filter_id):
+ with make_session() as mss:
+ CfgFilter.delete_by_id(mss, filter_id)
diff --git a/histview2/api/setting_module/services/polling_frequency.py b/histview2/api/setting_module/services/polling_frequency.py
new file mode 100644
index 0000000..9903433
--- /dev/null
+++ b/histview2/api/setting_module/services/polling_frequency.py
@@ -0,0 +1,136 @@
+import time
+from datetime import datetime
+from typing import List
+
+from apscheduler.triggers.date import DateTrigger
+from apscheduler.triggers.interval import IntervalTrigger
+from loguru import logger
+from pytz import utc
+
+from histview2 import scheduler, dic_request_info
+from histview2.api.setting_module.services.csv_import import import_csv_job
+from histview2.api.setting_module.services.factory_import import import_factory_job, \
+ factory_past_data_transform_job
+from histview2.api.setting_module.services.process_delete import add_del_proc_job
+from histview2.common.common_utils import add_seconds
+from histview2.common.constants import CfgConstantType, DBType
+from histview2.common.logger import log_execution_time
+from histview2.common.scheduler import JobType, remove_jobs, scheduler_app_context, add_job_to_scheduler
+from histview2.setting_module.models import CfgConstant, CfgProcess, JobManagement
+
+
+@log_execution_time()
+def change_polling_all_interval_jobs(interval_sec, run_now=False):
+ """ add job for csv and factory import
+
+ Arguments:
+ interval_sec {[type]} -- [description]
+
+ Keyword Arguments:
+ target_job_names {[type]} -- [description] (default: {None})
+ """
+ # target jobs (do not remove factory past data import)
+ target_jobs = [JobType.CSV_IMPORT, JobType.FACTORY_IMPORT]
+
+ # remove jobs
+ remove_jobs(target_jobs)
+
+ # check if not run now and interval is zero , quit
+ if interval_sec == 0 and not run_now:
+ return
+
+ # add new jobs with new interval
+ procs: List[CfgProcess] = CfgProcess.query.all()
+
+ for proc_cfg in procs:
+ add_import_job(proc_cfg, interval_sec=interval_sec, run_now=run_now)
+
+
+def add_import_job(proc_cfg: CfgProcess, interval_sec=None, run_now=None):
+ if interval_sec is None:
+ interval_sec = CfgConstant.get_value_by_type_first(CfgConstantType.POLLING_FREQUENCY.name, int)
+
+ if interval_sec:
+ trigger = IntervalTrigger(seconds=interval_sec, timezone=utc)
+ else:
+ trigger = DateTrigger(datetime.now().astimezone(utc), timezone=utc)
+
+ if proc_cfg.data_source.type.lower() == DBType.CSV.value.lower():
+ job_name = JobType.CSV_IMPORT.name
+ import_func = import_csv_job
+ else:
+ job_name = JobType.FACTORY_IMPORT.name
+ import_func = import_factory_job
+
+ # check for last job entry in t_job_management
+ prev_job = JobManagement.get_last_job_of_process(proc_cfg.id, job_name)
+
+ job_id = f'{job_name}_{proc_cfg.id}'
+ dic_import_param = dict(_job_id=job_id, _job_name=job_name,
+ _db_id=proc_cfg.data_source_id, _proc_id=proc_cfg.id, _proc_name=proc_cfg.name,
+ proc_id=proc_cfg.id)
+
+ add_job_to_scheduler(job_id, job_name, trigger, import_func, run_now, dic_import_param)
+
+ add_idle_mornitoring_job()
+
+ # double check
+ attempt = 0
+ while attempt < 3:
+ attempt += 1
+ scheduler_job = scheduler.get_job(job_id)
+ last_job = JobManagement.get_last_job_of_process(proc_cfg.id, job_name)
+ if is_job_added(scheduler_job, prev_job, last_job):
+ break
+ else:
+ add_job_to_scheduler(job_id, job_name, trigger, import_func, run_now, dic_import_param)
+ logger.info("ADD MISSING JOB: job_id=", job_id)
+ time.sleep(1)
+
+
+def is_job_added(scheduler_job, prev_job, last_job):
+ if not scheduler_job:
+ if (prev_job is None and last_job is None) or (prev_job is not None and last_job.id == prev_job.id):
+ return False
+ return True
+
+
+@log_execution_time()
+def add_idle_mornitoring_job():
+ scheduler.add_job(JobType.IDLE_MORNITORING.name, idle_monitoring,
+ name=JobType.IDLE_MORNITORING.name,
+ replace_existing=True,
+ trigger=IntervalTrigger(seconds=60, timezone=utc),
+ kwargs=dict(_job_id=JobType.IDLE_MORNITORING.name, _job_name=JobType.IDLE_MORNITORING.name))
+
+ return True
+
+
+@scheduler_app_context
+def idle_monitoring(_job_id=None, _job_name=None):
+ """
+ check if system if idle
+
+ """
+ # check last request > now() - 5 minutes
+ last_request_time = dic_request_info.get('last_request_time', datetime.utcnow())
+ if last_request_time > add_seconds(seconds=-5 * 60):
+ return
+
+ # delete unused processes
+ add_del_proc_job()
+
+ processes = CfgProcess.get_all()
+ for proc_cfg in processes:
+ if proc_cfg.data_source.type.lower() == DBType.CSV.name.lower():
+ continue
+
+ job_id = f'{JobType.FACTORY_PAST_IMPORT.name}_{proc_cfg.id}'
+ logger.info('IDLE_MONITORING', job_id)
+ dic_import_param = dict(_job_id=job_id, _job_name=JobType.FACTORY_PAST_IMPORT.name,
+ _db_id=proc_cfg.data_source_id, _proc_id=proc_cfg.id, _proc_name=proc_cfg.name,
+ proc_id=proc_cfg.id)
+ scheduler.add_job(job_id, factory_past_data_transform_job,
+ trigger=DateTrigger(datetime.now().astimezone(utc), timezone=utc),
+ name=JobType.FACTORY_PAST_IMPORT.name, replace_existing=True,
+ kwargs=dic_import_param)
diff --git a/histview2/api/setting_module/services/process_delete.py b/histview2/api/setting_module/services/process_delete.py
new file mode 100644
index 0000000..bca982b
--- /dev/null
+++ b/histview2/api/setting_module/services/process_delete.py
@@ -0,0 +1,94 @@
+from datetime import datetime
+
+from apscheduler.triggers.date import DateTrigger
+from pytz import utc
+
+from histview2 import db, scheduler
+from histview2.common.logger import log_execution_time, log_execution
+from histview2.common.scheduler import scheduler_app_context, JobType, remove_jobs
+from histview2.setting_module.models import CfgDataSource, make_session
+from histview2.setting_module.models import CfgProcess
+from histview2.setting_module.services.background_process import send_processing_info
+from histview2.trace_data.models import Process
+
+
+@scheduler_app_context
+def delete_process_job(_job_id=None, _job_name=None, *args, **kwargs):
+ """ scheduler job to delete process from db
+
+ Keyword Arguments:
+ _job_id {[type]} -- [description] (default: {None})
+ _job_name {[type]} -- [description] (default: {None})
+ """
+ gen = delete_process(*args, **kwargs)
+ send_processing_info(gen, JobType.DEL_PROCESS, db_code=kwargs.get('db_id'), process_id=kwargs.get('proc_id'), is_check_disk=False)
+
+
+@log_execution_time()
+def delete_process():
+ """
+ delete processes
+ :return:
+ """
+ yield 0
+
+ missing_procs = get_unused_procs()
+
+ if missing_procs:
+ proc_id = missing_procs[0]
+ proc = Process.query.get(proc_id)
+ proc.delete_proc_detail()
+ db.session.delete(proc)
+ db.session.commit()
+
+ yield 100
+
+
+@log_execution()
+def add_del_proc_job():
+ missing_procs = get_unused_procs()
+
+ if not missing_procs:
+ return
+
+ scheduler.add_job(
+ JobType.DEL_PROCESS.name, delete_process_job,
+ trigger=DateTrigger(run_date=datetime.now().astimezone(utc), timezone=utc),
+ replace_existing=True,
+ kwargs=dict(_job_id=JobType.DEL_PROCESS.name, _job_name=JobType.DEL_PROCESS.name)
+ )
+
+
+@log_execution_time()
+def delete_proc_cfg_and_relate_jobs(proc_id):
+ # delete cfg process
+ deleted = CfgProcess.delete(proc_id=proc_id)
+
+ # remove job relate to that process
+ if deleted:
+ # target jobs
+ target_jobs = [JobType.CSV_IMPORT, JobType.FACTORY_IMPORT, JobType.FACTORY_PAST_IMPORT]
+ # remove importing job from job queue
+ remove_jobs(target_jobs, proc_id)
+
+
+@log_execution_time()
+def get_unused_procs():
+ return list(set([proc.id for proc in Process.get_all_ids()]) - set([proc.id for proc in CfgProcess.get_all_ids()]))
+
+
+def del_data_source(ds_id):
+ """
+ delete data source
+ :param ds_id:
+ :return:
+ """
+ with make_session() as meta_session:
+ ds = meta_session.query(CfgDataSource).get(ds_id)
+ if not ds:
+ return
+
+ # delete data
+ for proc in ds.processes or []:
+ delete_proc_cfg_and_relate_jobs(proc.id)
+ meta_session.delete(ds)
diff --git a/histview2/api/setting_module/services/save_load_user_setting.py b/histview2/api/setting_module/services/save_load_user_setting.py
new file mode 100644
index 0000000..523fd45
--- /dev/null
+++ b/histview2/api/setting_module/services/save_load_user_setting.py
@@ -0,0 +1,201 @@
+import re
+from collections import defaultdict
+from itertools import zip_longest
+
+
+class UserSettingDetail:
+ def __init__(self, dic_vals):
+ self.id = dic_vals.get('id')
+ self.name = dic_vals.get('name')
+ self.value = dic_vals.get('value')
+ self.type = dic_vals.get('type')
+ self.level = dic_vals.get('level')
+ self.genBtnId = dic_vals.get('genBtnId')
+ self.checked = dic_vals.get('checked')
+ self.isActiveTab = dic_vals.get('isActiveTab')
+ self.original_obj = dic_vals
+
+ def convert_to_obj(self):
+ dic_vals = {key: val for key, val in self.__dict__.items() if key != 'original_obj' and val is not None}
+ return dic_vals
+
+ def is_checkbox_or_radio(self):
+ return True if self.type in ('checkbox', 'radio') else False
+
+
+def transform_settings(mapping_groups):
+ dic_output = {}
+ for form_name, src_vals, des_vals in mapping_groups:
+ vals = transform_setting(src_vals, des_vals)
+ vals = [val.convert_to_obj() for val in vals]
+ dic_output[form_name] = vals
+
+ return dic_output
+
+
+def transform_setting(src_vals, des_vals):
+ dic_src_checkboxes, dic_src_others = group_by_name(src_vals)
+ dic_des_checkboxes, dic_des_others = group_by_name(des_vals)
+ checkbox_vals = mapping_checkbox_radio(dic_src_checkboxes, dic_des_checkboxes)
+ other_vals = mapping_others(dic_src_others, dic_des_others)
+
+ return other_vals + checkbox_vals
+
+
+def mapping_checkbox_radio(dic_src, dic_des):
+ output_vals = []
+ all_keys = list(set(list(dic_src) + list(dic_des)))
+ for name in all_keys:
+ src_vals = dic_src.get(name, [])
+ if not src_vals:
+ src_vals = dic_des.get(name, [])
+
+ output_vals.extend(src_vals)
+
+ return output_vals
+
+
+def get_pair_names(target_name, dic_vals):
+ if target_name in dic_vals:
+ return dic_vals[target_name]
+
+ target_name = remove_non_str(target_name)
+ target_name = target_name.lower()
+ for name, vals in dic_vals.items():
+ name = remove_non_str(name)
+ name = name.lower()
+ if vals and 'select' in vals[0].type:
+ if name == target_name:
+ return vals
+ else:
+ if name in target_name or target_name in name:
+ return vals
+
+ return []
+
+
+def mapping_others(dic_src, dic_des):
+ all_vals = []
+ all_keys = list(set(list(dic_src) + list(dic_des)))
+ for name in all_keys:
+ group_vals = []
+ src_vals = get_pair_names(name, dic_src)
+ des_vals = get_pair_names(name, dic_des)
+
+ if not src_vals:
+ continue
+
+ if not des_vals:
+ des_vals = src_vals
+
+ first_des_val = des_vals[0]
+ des_id_str, _ = split_str_and_last_number(first_des_val.id)
+ des_name_str, _ = split_str_and_last_number(first_des_val.name)
+ for src_val, des_val in zip_longest(src_vals, des_vals):
+ if src_val is None:
+ continue
+
+ src_val: UserSettingDetail
+ des_val: UserSettingDetail
+
+ _, src_name_num = split_str_and_last_number(src_val.name)
+ _, src_id_num = split_str_and_last_number(src_val.id)
+ new_obj = UserSettingDetail(first_des_val.__dict__)
+ new_obj.id = des_id_str + src_id_num
+ new_obj.name = des_name_str + src_name_num
+ new_obj.value = src_val.value
+ group_vals.append(new_obj)
+
+ all_vals += group_vals
+
+ return all_vals
+
+
+def group_by_name(vals):
+ dic_checkboxes = defaultdict(list)
+ dic_others = defaultdict(list)
+ for dic_vals in vals:
+ setting = UserSettingDetail(dic_vals)
+ if not setting.name:
+ continue
+
+ if setting.is_checkbox_or_radio():
+ if setting.name == 'cat_filter':
+ continue
+ dic_checkboxes[setting.name.lower()].append(setting)
+ else:
+ short_name, _ = split_str_and_last_number(setting.name)
+ short_name = short_name.lower()
+ dic_others[short_name].append(setting)
+
+ dic_checkboxes = {key: sorted(vals, key=lambda x: x.name) for key, vals in dic_checkboxes.items()}
+ dic_others = {key: sorted(vals, key=lambda x: x.name) for key, vals in dic_others.items()}
+
+ return dic_checkboxes, dic_others
+
+
+def map_form(dic_src_vals, dic_des_vals):
+ mapping_groups = []
+ if len(dic_src_vals) == len(dic_des_vals):
+ names = zip(list(dic_src_vals), list(dic_des_vals))
+ else:
+ src_active_form = get_active_tab(dic_src_vals)
+ names = zip([src_active_form], list(dic_des_vals))
+
+ for src_name, des_name in names:
+ mapping_groups.append((des_name, dic_src_vals[src_name], dic_des_vals[des_name]))
+
+ return mapping_groups
+
+
+def map_form_bk(dic_src_vals, dic_des_vals):
+ mapping_groups = []
+ if len(dic_src_vals) == len(dic_des_vals):
+ names = zip(list(dic_src_vals), list(dic_des_vals))
+ for src_name, des_name in names:
+ mapping_groups.append((des_name, dic_src_vals[src_name], dic_des_vals[des_name]))
+ return mapping_groups
+
+ for form_name, vals in dic_des_vals.items():
+ if form_name in dic_src_vals:
+ mapping_groups.append((form_name, dic_src_vals[form_name], vals))
+ else:
+ src_vals = [(len(set(form_name) & set(_form_name)), _vals) for _form_name, _vals in dic_src_vals.items()]
+ src_vals = sorted(src_vals, key=lambda x: x[0])[-1]
+
+ mapping_groups.append((form_name, src_vals[1], vals))
+ return mapping_groups
+
+
+def get_active_tab(dic_setting):
+ tabs = []
+ for form_name, vals in dic_setting.items():
+ for dic_item in vals:
+ is_active_tab = dic_item.get('isActiveTab', None)
+ if is_active_tab is not None:
+ tabs.append(is_active_tab)
+
+ if tabs:
+ break
+
+ zip_forms = zip(list(dic_setting), tabs)
+ for form_name, is_active in zip_forms:
+ if is_active:
+ return form_name
+
+ return list(dic_setting.keys())[0]
+
+
+def remove_non_str(val):
+ return re.sub(r"[-_\d\s]", '', val)
+
+
+def split_str_and_last_number(input_str):
+ if not input_str:
+ return [input_str, '']
+
+ res = re.match(r'^(.*[^0-9])(\d+)$', input_str)
+ if res is None:
+ return [input_str, '']
+
+ return res[1], res[2]
diff --git a/histview2/api/setting_module/services/show_latest_record.py b/histview2/api/setting_module/services/show_latest_record.py
new file mode 100644
index 0000000..9d919c5
--- /dev/null
+++ b/histview2/api/setting_module/services/show_latest_record.py
@@ -0,0 +1,366 @@
+import os
+from functools import lru_cache
+from itertools import islice
+
+from histview2.api.efa.services.etl import preview_data, detect_file_delimiter
+from histview2.api.setting_module.services.csv_import import convert_csv_timezone
+from histview2.api.setting_module.services.data_import import strip_special_symbol, validate_datetime
+from histview2.api.setting_module.services.factory_import import get_tzoffset_of_random_record
+from histview2.common.common_utils import guess_data_types, get_csv_delimiter, get_sorted_files
+from histview2.common.constants import DBType, DataType, RelationShip, WR_VALUES, WR_HEADER_NAMES, WR_TYPES
+from histview2.common.logger import log_execution_time
+from histview2.common.memoize import memoize
+from histview2.common.pydn.dblib import mssqlserver, oracle
+from histview2.common.pydn.dblib.db_proxy import DbProxy
+from histview2.common.services import csv_header_wrapr as chw
+from histview2.common.services.csv_content import read_data, gen_data_types, is_normal_csv
+from histview2.common.services.jp_to_romaji_utils import to_romaji, change_duplicated_columns
+from histview2.common.services.normalization import normalize_list, normalize_big_rows
+from histview2.common.timezone_utils import get_time_info
+from histview2.setting_module.models import CfgDataSource, CfgProcessColumn, CfgVisualization, \
+ make_session, CfgProcess, crud_config
+from histview2.setting_module.schemas import VisualizationSchema
+from histview2.trace_data.models import Sensor, find_sensor_class
+
+
+def get_latest_records(data_source_id, table_name, limit):
+ blank_output = dict(cols=[], rows=[])
+ if not data_source_id:
+ return blank_output
+
+ data_source = CfgDataSource.query.get(data_source_id)
+ if not data_source:
+ return blank_output
+
+ previewed_files = None
+ cols_with_types = []
+ if data_source.type.lower() == DBType.CSV.name.lower():
+ csv_detail = data_source.csv_detail
+ dic_preview = preview_csv_data(csv_detail.directory, csv_detail.etl_func, csv_detail.delimiter, limit,
+ return_df=True)
+ headers = dic_preview.get('header')
+ data_types = dic_preview.get('dataType')
+ if headers and data_types:
+ cols_with_types = gen_cols_with_types(headers, data_types)
+
+ # sort columns
+ sorted_columns = sorted(csv_detail.csv_columns, key=lambda c: c.order or c.id)
+ cols = [col.column_name for col in sorted_columns if col.column_name in headers]
+
+ # get rows
+ df_rows = dic_preview.get('content', None)
+ previewed_files = dic_preview.get('previewed_files')
+ else:
+ cols, df_rows = get_info_from_db(data_source, table_name)
+ data_types = [gen_data_types(df_rows[col]) for col in cols]
+ if cols and data_types:
+ cols_with_types = gen_cols_with_types(cols, data_types)
+ # format data
+ df_rows = convert_utc_df(df_rows, cols, data_types, data_source, table_name)
+
+ # change name if romaji cols is duplicated
+ cols_with_types, cols_duplicated = change_duplicated_columns(cols_with_types)
+ rows = transform_df_to_rows(cols, df_rows, limit)
+ return cols_with_types, rows, cols_duplicated, previewed_files
+
+
+def gen_data_types_from_factory_type(cols, cols_with_types):
+ dic_col_type = {col.get('name'): guess_data_types(col.get('type')) for col in cols_with_types}
+ return [dic_col_type.get(col) for col in cols]
+
+
+@lru_cache(maxsize=20)
+def get_info_from_db(data_source, table_name):
+ with DbProxy(data_source) as db_instance:
+ if not db_instance or not table_name:
+ return [], []
+
+ sql_limit = 2000
+ if isinstance(db_instance, mssqlserver.MSSQLServer):
+ cols, rows = db_instance.run_sql("select TOP {} * from \"{}\"".format(sql_limit, table_name), False)
+ elif isinstance(db_instance, oracle.Oracle):
+ cols, rows = db_instance.run_sql(
+ "select * from \"{}\" where rownum <= {}".format(table_name, sql_limit), False)
+ else:
+ cols, rows = db_instance.run_sql("select * from \"{}\" limit {}".format(table_name, sql_limit), False)
+
+ cols = normalize_list(cols)
+ df_rows = normalize_big_rows(rows, cols, strip_quote=False)
+ return cols, df_rows
+
+
+def get_filter_col_data(proc_config: dict):
+ filter_cfgs = proc_config.get('filters') or []
+ cfg_col_ids = [filter_cfg.get('column_id') for filter_cfg in filter_cfgs]
+ if not cfg_col_ids:
+ return {}
+ sensor_data = {}
+ for col_id in cfg_col_ids:
+ sensor_data[col_id] = get_distinct_sensor_values(col_id)
+ return sensor_data
+
+
+@memoize()
+def get_distinct_sensor_values(cfg_col_id):
+ cfg_col: CfgProcessColumn = CfgProcessColumn.query.get(cfg_col_id)
+ if not cfg_col:
+ return []
+ sensor = Sensor.get_sensor_by_col_name(cfg_col.process_id, cfg_col.column_name)
+ sensor_vals = []
+ if sensor:
+ sensor_val_cls = find_sensor_class(sensor.id, DataType(sensor.type))
+ sensor_vals = sensor_val_cls.get_distinct_values(cfg_col.column_name, limit=1000)
+ sensor_vals = [sensor_val.value for sensor_val in sensor_vals]
+ return sensor_vals
+
+
+@memoize()
+def get_last_distinct_sensor_values(cfg_col_id):
+ cfg_col: CfgProcessColumn = CfgProcessColumn.query.get(cfg_col_id)
+ if not cfg_col:
+ return []
+ sensor = Sensor.get_sensor_by_col_name(cfg_col.process_id, cfg_col.column_name)
+ unique_sensor_vals = set()
+ if sensor:
+ sensor_val_cls = find_sensor_class(sensor.id, DataType(sensor.type))
+ sensor_vals = sensor_val_cls.get_last_distinct_values(cfg_col.column_name, limit=10000)
+ unique_sensor_vals = set([sensor_val.value for sensor_val in sensor_vals][:1000])
+ unique_sensor_vals = sorted(unique_sensor_vals)
+ return list(unique_sensor_vals)
+
+
+def save_master_vis_config(proc_id, cfg_jsons):
+ vis_schema = VisualizationSchema()
+
+ with make_session() as meta_session:
+ proc: CfgProcess = meta_session.query(CfgProcess).get(proc_id or -1)
+ if proc:
+ cfg_vis_data = []
+ for cfg_json in cfg_jsons:
+ cfg_vis_data.append(vis_schema.load(cfg_json))
+ crud_config(meta_session=meta_session,
+ data=cfg_vis_data,
+ model=CfgVisualization,
+ key_names=CfgVisualization.id.key,
+ parent_key_names=CfgVisualization.process_id.key,
+ parent_obj=proc,
+ parent_relation_key=CfgProcess.visualizations.key,
+ parent_relation_type=RelationShip.MANY)
+
+
+@log_execution_time()
+def preview_csv_data(folder_url, etl_func, csv_delimiter, limit, return_df=False):
+ df_data_details = None
+ csv_delimiter = get_csv_delimiter(csv_delimiter)
+ sorted_files = get_sorted_files(folder_url)
+ sorted_files = sorted_files[0:5]
+
+ csv_file = ''
+ skip_head = 0
+ skip_tail = 0
+ header_names = []
+ data_types = []
+ data_details = []
+ if not sorted_files:
+ return {
+ 'file_name': csv_file,
+ 'header': header_names,
+ 'content': [] if return_df else data_details,
+ 'dataType': data_types,
+ 'skip_head': skip_head,
+ 'skip_tail': skip_tail,
+ 'previewed_files': sorted_files,
+ }
+
+ csv_file = sorted_files[0]
+
+ # call efa etl
+ has_data_file = None
+ if etl_func:
+ # try to get file which has data to detect data types + get col names
+ for file_path in sorted_files:
+ preview_file_path = preview_data(file_path)
+ if preview_file_path and not isinstance(preview_file_path, Exception):
+ has_data_file = True
+ csv_file = preview_file_path
+ csv_delimiter = detect_file_delimiter(csv_file, csv_delimiter)
+ break
+
+ if (etl_func and has_data_file) or is_normal_csv(csv_file, csv_delimiter):
+ for i in range(2):
+ data = None
+ try:
+ data = read_data(csv_file, delimiter=csv_delimiter, do_normalize=False)
+ header_names = next(data)
+
+ # strip special symbols
+ if i == 0:
+ data = strip_special_symbol(data)
+
+ # get 5 rows
+ data_details = list(islice(data, 1000))
+ finally:
+ if data:
+ data.close()
+
+ if data_details:
+ break
+
+ # normalization
+ header_names = normalize_list(header_names)
+ df_data_details = normalize_big_rows(data_details, header_names)
+ data_types = [gen_data_types(df_data_details[col]) for col in header_names]
+ else:
+ # try to get file which has data to detect data types + get col names
+ dic_file_info, csv_file = get_etl_good_file(sorted_files)
+ if dic_file_info and csv_file:
+ skip_head = chw.get_skip_head(dic_file_info)
+ skip_tail = chw.get_skip_tail(dic_file_info)
+ header_names = chw.get_columns_name(dic_file_info)
+ etl_headers = chw.get_etl_headers(dic_file_info)
+ data_types = chw.get_data_type(dic_file_info)
+ for i in range(2):
+ data = None
+ try:
+ data = read_data(csv_file, headers=header_names, skip_head=skip_head, delimiter=csv_delimiter,
+ do_normalize=False)
+ # non-use header
+ next(data)
+
+ # strip special symbols
+ if i == 0:
+ data = strip_special_symbol(data)
+
+ # get 5 rows
+ data_details = list(islice(data, limit + skip_tail))
+ data_details = data_details[:len(data_details) - skip_tail]
+ finally:
+ if data:
+ data.close()
+
+ if data_details:
+ break
+
+ # Merge heads with Machine, Line, Process
+ if etl_headers[WR_VALUES]:
+ header_names += etl_headers[WR_HEADER_NAMES]
+ data_types += etl_headers[WR_TYPES]
+ data_details = chw.merge_etl_heads(etl_headers[WR_VALUES], data_details)
+
+ header_names = normalize_list(header_names)
+ df_data_details = normalize_big_rows(data_details, header_names)
+
+ if df_data_details is not None:
+ # convert utc
+ for col, dtype in zip(header_names, data_types):
+ if DataType(dtype) is not DataType.DATETIME:
+ continue
+ # Convert UTC time
+ validate_datetime(df_data_details, col, False, False)
+ convert_csv_timezone(df_data_details, col)
+ df_data_details.dropna(subset=[col], inplace=True)
+
+ df_data_details = df_data_details[0:5]
+ if not return_df:
+ df_data_details = df_data_details.to_records(index=False).tolist()
+ else:
+ if not return_df:
+ df_data_details = []
+
+ if csv_file:
+ csv_file = csv_file.replace('/', os.sep)
+
+ return {
+ 'file_name': csv_file,
+ 'header': header_names,
+ 'content': df_data_details,
+ 'dataType': data_types,
+ 'skip_head': skip_head,
+ 'skip_tail': skip_tail,
+ 'previewed_files': sorted_files,
+ }
+
+
+@log_execution_time()
+def get_etl_good_file(sorted_files):
+ csv_file = None
+ dic_file_info = None
+ for file_path in sorted_files:
+ check_result = chw.get_file_info_py(file_path)
+ if isinstance(check_result, Exception):
+ continue
+
+ dic_file_info, is_empty_file = check_result
+
+ if dic_file_info is None:
+ continue
+
+ if is_empty_file:
+ continue
+
+ csv_file = file_path
+ break
+
+ return dic_file_info, csv_file
+
+
+@log_execution_time()
+def gen_cols_with_types(cols, data_types):
+ cols_with_types = []
+ for col_name, data_type in zip(cols, data_types):
+ is_date = DataType(data_type) is DataType.DATETIME
+
+ # add to output
+ if col_name:
+ cols_with_types.append({
+ "name": col_name,
+ "type": DataType(data_type).name,
+ 'romaji': to_romaji(col_name),
+ 'is_date': is_date,
+ })
+
+ return cols_with_types
+
+
+@log_execution_time()
+def convert_utc_df(df_rows, cols, data_types, data_source, table_name):
+ for col_name, data_type in zip(cols, data_types):
+ is_date = DataType(data_type) is DataType.DATETIME
+ if not is_date:
+ continue
+
+ # convert utc
+ date_val, tzoffset_str, db_timezone = get_tzoffset_of_random_record(data_source, table_name, col_name)
+
+ # use os timezone
+ if data_source.db_detail.use_os_timezone:
+ db_timezone = None
+
+ is_tz_inside, _, time_offset = get_time_info(date_val, db_timezone)
+
+ # Convert UTC time
+ validate_datetime(df_rows, col_name, False, False)
+ convert_csv_timezone(df_rows, col_name)
+ df_rows.dropna(subset=[col_name], inplace=True)
+
+ return df_rows
+
+
+def transform_df_to_rows(cols, df_rows, limit):
+ return [dict(zip(cols, vals)) for vals in df_rows[0:limit][cols].to_records(index=False).tolist()]
+
+
+@log_execution_time()
+def gen_preview_data_check_dict(rows, previewed_files):
+ dic_preview_limit = {}
+ file_path = previewed_files[0] if previewed_files else ''
+ file_name = ''
+ folder_path = ''
+ if file_path:
+ file_name = os.path.basename(file_path)
+ folder_path = os.path.dirname(file_path)
+
+ dic_preview_limit['reach_fail_limit'] = True if not rows and previewed_files else False
+ dic_preview_limit['file_name'] = file_name
+ dic_preview_limit['folder'] = folder_path
+ return dic_preview_limit
diff --git a/histview2/api/table_viewer/controllers.py b/histview2/api/table_viewer/controllers.py
new file mode 100644
index 0000000..b165485
--- /dev/null
+++ b/histview2/api/table_viewer/controllers.py
@@ -0,0 +1,112 @@
+import json
+
+from flask import Blueprint, request
+
+from histview2.common.pydn.dblib import mssqlserver, oracle
+from histview2.common.pydn.dblib.db_proxy import DbProxy
+from histview2.common.services import http_content
+from histview2.common.services.jp_to_romaji_utils import to_romaji
+from histview2.common.services.sse import notify_progress
+from histview2.setting_module.models import CfgDataSource
+
+api_table_viewer_blueprint = Blueprint(
+ 'api_table_viewer',
+ __name__,
+ url_prefix='/histview2/api/table_viewer'
+)
+
+
+@api_table_viewer_blueprint.route('/column_names', methods=['GET'])
+def get_column_names():
+ """[summary]
+ show_column_names
+ Returns:
+ [type] -- [description]
+ """
+ database = request.args.get('database')
+ table = request.args.get('table')
+
+ blank_output = json.dumps({
+ 'cols': [],
+ 'rows': []
+ }, ensure_ascii=False, default=http_content.json_serial)
+
+ data_source = CfgDataSource.query.get(database)
+ if not data_source:
+ return blank_output
+
+ with DbProxy(data_source) as db_instance:
+ if not db_instance or not table:
+ return blank_output
+
+ cols = db_instance.list_table_columns(table)
+ for col in cols:
+ col['romaji'] = to_romaji(col['name'])
+
+ content = {
+ 'cols': cols,
+ }
+
+ return json.dumps(content, ensure_ascii=False, default=http_content.json_serial)
+
+
+@api_table_viewer_blueprint.route('/table_records', methods=['POST'])
+def get_table_records():
+ """[summary]
+ Show limited records
+ Returns:
+ [type] -- [description]
+ """
+
+ request_data = json.loads(request.data)
+ db_code = request_data.get("database_code")
+ table_name = request_data.get("table_name")
+ sort_column = request_data.get("sort_column")
+ sort_order = request_data.get("sort_order") or "DESC"
+ limit = request_data.get("limit") or 5
+
+ blank_output = json.dumps({
+ 'cols': [],
+ 'rows': []
+ }, ensure_ascii=False, default=http_content.json_serial)
+
+ if not db_code or not table_name or sort_order not in ("ASC", "DESC"):
+ return blank_output
+
+ data_source = CfgDataSource.query.get(db_code)
+ if not data_source:
+ return blank_output
+
+ with DbProxy(data_source) as db_instance:
+ if not db_instance or not table_name:
+ return blank_output
+
+ cols_with_types = db_instance.list_table_columns(table_name)
+ for col in cols_with_types:
+ col['romaji'] = to_romaji(col['name'])
+
+ cols, rows = query_data(db_instance, table_name, sort_column, sort_order, limit)
+
+ result = {
+ 'cols': cols_with_types,
+ 'rows': rows
+ }
+ return json.dumps(result, ensure_ascii=False, default=http_content.json_serial)
+
+
+@notify_progress(50)
+def query_data(db_instance, table_name, sort_column, sort_order, limit):
+ sort_statement = ''
+ if sort_column and sort_order:
+ sort_statement = "order by \"{}\" {} ".format(sort_column, sort_order)
+
+ if isinstance(db_instance, mssqlserver.MSSQLServer):
+ sql = "select TOP {} * from \"{}\" {} ".format(limit, table_name, sort_statement)
+ elif isinstance(db_instance, oracle.Oracle):
+ sql = "select * from \"{}\" where rownum <= {} {} ".format(table_name, limit, sort_statement)
+ else:
+ sql = "select * from \"{}\" {} limit {}".format(table_name, sort_statement, limit)
+
+ cols, rows = db_instance.run_sql(sql=sql)
+
+ return cols, rows
diff --git a/histview2/api/trace_data/controllers.py b/histview2/api/trace_data/controllers.py
new file mode 100644
index 0000000..1648371
--- /dev/null
+++ b/histview2/api/trace_data/controllers.py
@@ -0,0 +1,154 @@
+import json
+import timeit
+
+import simplejson
+from flask import Blueprint, request, jsonify, Response
+
+from histview2.api.trace_data.services.csv_export import gen_csv_data
+from histview2.api.trace_data.services.time_series_chart import (update_outlier_flg, save_proc_sensor_order_to_db,
+ gen_graph_fpp)
+from histview2.common.services import http_content, csv_content
+from histview2.common.services.form_env import parse_request_params, parse_multi_filter_into_one
+from histview2.common.services.import_export_config_n_data import export_debug_info, set_export_dataset_id_to_dic_param, \
+ get_dic_form_from_debug_info, import_user_setting_db, \
+ import_config_db, get_zip_full_path
+from histview2.common.trace_data_log import save_input_data_to_file, EventType
+
+api_trace_data_blueprint = Blueprint(
+ 'api_trace_data',
+ __name__,
+ url_prefix='/histview2/api/fpp'
+)
+
+FPP_MAX_GRAPH = 20
+
+
+@api_trace_data_blueprint.route('/index', methods=['POST'])
+def trace_data():
+ """
+ Trace Data API
+ return dictionary
+ """
+
+ start = timeit.default_timer()
+ dic_form = request.form.to_dict(flat=False)
+
+ # save dic_form to pickle (for future debug)
+ save_input_data_to_file(dic_form, EventType.FPP)
+ dic_param = parse_multi_filter_into_one(dic_form)
+
+ # check if we run debug mode (import mode)
+ dic_param = get_dic_form_from_debug_info(dic_param)
+
+ dic_param = gen_graph_fpp(dic_param, FPP_MAX_GRAPH)
+ stop = timeit.default_timer()
+ dic_param['backend_time'] = stop - start
+
+ # export mode ( output for export mode )
+ set_export_dataset_id_to_dic_param(dic_param)
+
+ # trace_data.htmlをもとにHTML生成
+ out_dict = simplejson.dumps(dic_param, ensure_ascii=False, default=http_content.json_serial, ignore_nan=True)
+
+ return out_dict, 200
+
+
+@api_trace_data_blueprint.route('/csv_export', methods=['GET'])
+def csv_export():
+ """csv export
+
+ Returns:
+ [type] -- [description]
+ """
+ dic_form = parse_request_params(request)
+ dic_param = parse_multi_filter_into_one(dic_form)
+ csv_str = gen_csv_data(dic_param)
+ csv_filename = csv_content.gen_csv_fname()
+
+ response = Response(csv_str.encode("utf-8-sig"), mimetype="text/csv",
+ headers={
+ "Content-Disposition": "attachment;filename={}".format(csv_filename),
+ })
+ response.charset = "utf-8-sig"
+
+ return response
+
+
+@api_trace_data_blueprint.route('/tsv_export', methods=['GET'])
+def tsv_export():
+ """tsv export
+
+ Returns:
+ [type] -- [description]
+ """
+ dic_form = parse_request_params(request)
+ dic_param = parse_multi_filter_into_one(dic_form)
+ csv_str = gen_csv_data(dic_param, delimiter='\t')
+ csv_filename = csv_content.gen_csv_fname("tsv")
+
+ response = Response(csv_str.encode("utf-8-sig"), mimetype="text/tsv",
+ headers={
+ "Content-Disposition": "attachment;filename={}".format(csv_filename),
+ })
+ response.charset = "utf-8-sig"
+
+ return response
+
+
+@api_trace_data_blueprint.route('/zip_export', methods=['GET'])
+def zip_export():
+ """zip export
+
+ Returns:
+ [type] -- [description]
+ """
+ dic_form = parse_request_params(request)
+ dataset_id = int(dic_form['dataset_id'])
+ user_setting_id = int(dic_form['user_setting_id'])
+ response = export_debug_info(dataset_id, user_setting_id)
+
+ return response
+
+
+@api_trace_data_blueprint.route('/zip_import', methods=['GET'])
+def zip_import():
+ """zip import
+
+ Returns:
+ [type] -- [description]
+ """
+ dic_form = parse_request_params(request)
+ filename = dic_form['filename']
+ zip_file = get_zip_full_path(filename)
+ import_config_db(zip_file)
+ user_setting = import_user_setting_db(zip_file)
+ dic_user_setting = {'id': user_setting['id'], 'page': user_setting['page']}
+
+ return jsonify(dic_user_setting), 200
+
+
+@api_trace_data_blueprint.route('/update_outlier', methods=['POST'])
+def update_outlier():
+ """
+ Update outlier flags to DB.
+ :return:
+ """
+ request_data = json.loads(request.data)
+ proc_id = request_data.get("process_id")
+ cycle_ids = request_data.get("cycle_ids")
+ is_outlier = request_data.get("is_outlier")
+ update_outlier_flg(proc_id, cycle_ids, is_outlier)
+ return jsonify({}), 200
+
+
+@api_trace_data_blueprint.route('/save_order', methods=['POST'])
+def save_proc_sensor_order():
+ """
+ Save order of processes and sensors from GUI drag & drop
+ :return:
+ """
+ request_data = json.loads(request.data)
+ orders = request_data.get("orders") or {}
+ save_proc_sensor_order_to_db(orders)
+
+ return jsonify({}), 200
diff --git a/histview2/api/trace_data/services/__init__.py b/histview2/api/trace_data/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/histview2/api/trace_data/services/csv_export.py b/histview2/api/trace_data/services/csv_export.py
new file mode 100644
index 0000000..3059995
--- /dev/null
+++ b/histview2/api/trace_data/services/csv_export.py
@@ -0,0 +1,127 @@
+from typing import Dict
+
+import numpy as np
+import pandas as pd
+import pytz
+from dateutil import tz
+from pandas import DataFrame
+
+from histview2.api.trace_data.services.time_series_chart import get_data_from_db, get_procs_in_dic_param
+from histview2.common.common_utils import gen_sql_label, DATE_FORMAT_STR, DATE_FORMAT_STR_CSV
+from histview2.common.constants import *
+from histview2.common.logger import log_execution_time
+from histview2.common.services.form_env import bind_dic_param_to_class
+from histview2.setting_module.models import CfgProcess
+from histview2.trace_data.schemas import DicParam
+
+
+@log_execution_time()
+def gen_csv_data(dic_param, delimiter=None): # get the most cover flows
+ """tracing data to show csv
+ 1 start point x n end point
+ filter by condition points that between start point and end_point
+ """
+ # bind dic_param
+ graph_param = bind_dic_param_to_class(dic_param)
+
+ dic_proc_cfgs = get_procs_in_dic_param(graph_param)
+
+ # add start proc
+ graph_param.add_start_proc_to_array_formval()
+
+ # add category
+ graph_param.add_cate_procs_to_array_formval()
+
+ # get serials + date
+ for proc in graph_param.array_formval:
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+
+ get_date = proc_cfg.get_date_col(column_name_only=False).id
+ proc.add_cols(get_date, append_first=True)
+
+ serial_ids = [serial.id for serial in proc_cfg.get_serials(column_name_only=False)]
+ proc.add_cols(serial_ids, append_first=True)
+
+ # get data from database
+ df, *_ = get_data_from_db( graph_param)
+ client_timezone = dic_param[COMMON].get(CLIENT_TIMEZONE)
+ client_timezone = pytz.timezone(client_timezone) if client_timezone else tz.tzlocal()
+ # client_timezone = tz.gettz(client_timezone or None) or tz.tzlocal()
+
+ if delimiter:
+ csv_data = to_csv(df, dic_proc_cfgs, graph_param, delimiter=delimiter, client_timezone=client_timezone)
+ else:
+ csv_data = to_csv(df, dic_proc_cfgs, graph_param, client_timezone=client_timezone)
+
+ return csv_data
+
+
+def gen_export_col_name(proc_name, col_name):
+ return f'{proc_name}|{col_name}'
+
+
+@log_execution_time()
+def to_csv(df: DataFrame, dic_proc_cfgs: Dict[int, CfgProcess], graph_param: DicParam, delimiter=',',
+ client_timezone=None, output_path=None, output_col_ids=None, len_of_col_name=None):
+ # rename
+ new_headers = []
+ suffix = '...'
+ dic_rename = {}
+ for proc in graph_param.array_formval:
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ for col_id, col_name, name in zip(proc.col_ids, proc.col_names, proc.col_show_names):
+ old_name = gen_sql_label(col_id, col_name)
+ if old_name not in df.columns:
+ continue
+
+ if output_col_ids and col_id not in output_col_ids:
+ continue
+
+ new_name = gen_export_col_name(proc_cfg.name, name)
+ if len_of_col_name and len(new_name) > len_of_col_name:
+ new_name = new_name[:len_of_col_name - len(suffix)] + suffix
+ idx = 1
+ while new_name in new_headers:
+ new_name = f'{new_name[:-3]}({idx})'
+ idx += 1
+ new_headers.append(new_name)
+
+ dic_rename[old_name] = new_name
+
+ # get only output columns
+ df_csv = df[dic_rename]
+ df_csv.rename(columns=dic_rename, inplace=True)
+ df_csv.replace({np.nan: None}, inplace=True)
+
+ # timezone
+ if client_timezone:
+ # get date list
+ get_dates = []
+ for proc_cfg in dic_proc_cfgs.values():
+ get_date_col = proc_cfg.get_date_col(column_name_only=False)
+ get_date_name_in_df = gen_export_col_name(proc_cfg.name, get_date_col.name)
+ get_dates.append(get_date_name_in_df)
+
+ for col in df_csv.columns:
+ if col not in get_dates:
+ continue
+ # df_csv[col] = df_csv[col].apply(lambda v: convert_dt_str_to_timezone(client_timezone, v))
+ df_csv[col] = pd.to_datetime(df_csv[col], format=DATE_FORMAT_STR, utc=True) \
+ .dt.tz_convert(client_timezone).dt.strftime(DATE_FORMAT_STR_CSV)
+
+ return df_csv.to_csv(output_path, sep=delimiter, index=False)
+
+
+def sql_label_short(headers, length=10):
+ new_headers = []
+ suffix = '...'
+ for header in headers:
+ new_header = header[:length - len(suffix)] + suffix if len(header) > length else header
+
+ idx = 1
+ while new_header in new_headers:
+ new_header = f'{new_header[:-3]}({idx})'
+ idx += 1
+
+ new_headers.append(new_header)
+ return new_headers
diff --git a/histview2/api/trace_data/services/graph_search.py b/histview2/api/trace_data/services/graph_search.py
new file mode 100644
index 0000000..5d73668
--- /dev/null
+++ b/histview2/api/trace_data/services/graph_search.py
@@ -0,0 +1,57 @@
+
+class GraphUtil:
+
+ def __init__(self, V):
+ self.V = V
+ self.adj = [[] for i in range(len(V))]
+
+ def dfs_util(self, temp, v, visited):
+ visited[v] = True
+ temp.append(v)
+ idx = self.V.index(v)
+ for i in self.adj[idx]:
+ pr = self.V[i]
+ if not visited[pr]:
+ temp = self.dfs_util(temp, pr, visited)
+ return temp
+
+ def add_edge(self, v, w):
+ v_index = self.V.index(v)
+ w_index = self.V.index(w)
+ self.adj[v_index].append(w_index)
+ self.adj[w_index].append(v_index)
+
+ def connected_components(self):
+ visited = {}
+ cc = []
+ for v in self.V:
+ visited[v] = False
+ for v in self.V:
+ if not visited[v]:
+ temp = []
+ cc.append(self.dfs_util(temp, v, visited))
+ return cc
+
+ def find_linked_processes(self, target):
+ if target not in self.V:
+ return []
+
+ visited = {}
+ for v in self.V:
+ visited[v] = False
+ for v in self.V:
+ if not visited[v]:
+ temp = []
+ component = self.dfs_util(temp, v, visited)
+ if target in component:
+ return component
+ return []
+
+
+if __name__ == "__main__":
+ g = GraphUtil([10, 11, 12, 13, 14, 20, 2222]) # pass list of proc_ids
+ g.add_edge(11, 10)
+ g.add_edge(12, 13)
+ g.add_edge(13, 14)
+ components = g.find_linked_processes(10)
+ print(components)
diff --git a/histview2/api/trace_data/services/plot_view.py b/histview2/api/trace_data/services/plot_view.py
new file mode 100644
index 0000000..3c8eac5
--- /dev/null
+++ b/histview2/api/trace_data/services/plot_view.py
@@ -0,0 +1,461 @@
+import itertools
+from datetime import datetime, timedelta
+from typing import List
+
+import numpy as np
+import pandas as pd
+import pytz
+from dateutil import parser, tz
+
+from histview2.api.trace_data.services.graph_search import GraphUtil
+from histview2.api.trace_data.services.proc_link import order_before_mapping_data
+from histview2.api.trace_data.services.time_series_chart import get_data_from_db, gen_dic_data_from_df, \
+ get_chart_infos, gen_plotdata, create_rsuffix, get_procs_in_dic_param, create_graph_config, \
+ gen_blank_df_end_cols
+from histview2.common.common_utils import gen_sql_label, DATE_FORMAT_STR_CSV, DATE_FORMAT, TIME_FORMAT
+from histview2.common.constants import ARRAY_PLOTDATA, PRC_MAX, PRC_MIN, THRESH_HIGH, THRESH_LOW, ARRAY_FORMVAL, \
+ END_PROC, GET02_VALS_SELECT, ACT_FROM, ACT_TO, SUMMARIES
+from histview2.common.logger import log_execution_time
+from histview2.common.services.form_env import bind_dic_param_to_class, parse_multi_filter_into_one
+from histview2.common.services.statistics import calc_summaries
+from histview2.common.yaml_utils import YamlConfig
+from histview2.setting_module.models import CfgProcessColumn, CfgTrace, CfgProcess
+from histview2.trace_data.models import Cycle
+from histview2.trace_data.schemas import EndProc
+
+
+@log_execution_time()
+def gen_graph_plot_view(dic_form):
+ """tracing data to show graph
+ 1 start point x n end point
+ filter by condition point
+ https://files.slack.com/files-pri/TJHPR9BN3-F01GG67J84C/image.pngnts that between start point and end_point
+ """
+ dic_param = parse_multi_filter_into_one(dic_form)
+ cycle_id = int(dic_form.get('cycle_id'))
+ point_time = dic_form.get('time')
+ target_id = int(dic_form.get('sensor_id'))
+ sensor = CfgProcessColumn.query.get(target_id)
+ target_proc_id = sensor.process_id
+
+ # bind graph_param
+ graph_param, dic_proc_cfgs = build_graph_param(dic_param)
+
+ # get data from database
+ df, _, _ = get_data_from_db( graph_param)
+
+ # create output data
+ orig_graph_param = bind_dic_param_to_class(dic_param)
+ orig_graph_param.add_cate_procs_to_array_formval()
+ dic_data = gen_dic_data_from_df(df, orig_graph_param)
+ orig_graph_param = bind_dic_param_to_class(dic_param)
+ times = df[Cycle.time.key].tolist() or []
+
+ # get chart infos
+ chart_infos, original_graph_configs = get_chart_infos(orig_graph_param, dic_data, times)
+
+ dic_param[ARRAY_FORMVAL], dic_param[ARRAY_PLOTDATA] \
+ = gen_plotdata(orig_graph_param, dic_data, chart_infos, original_graph_configs)
+
+ # calculate_summaries
+ calc_summaries(dic_param)
+
+ # extract_cycle
+ df = extract_cycle(df, cycle_id)
+ if df.empty:
+ df = gen_blank_df_end_cols(graph_param.array_formval)
+ df[df.columns] = df[df.columns].to_numpy()
+
+ # timezone
+ client_timezone = graph_param.common.client_timezone or tz.tzlocal()
+ client_timezone = pytz.timezone(client_timezone)
+
+ # List table
+ list_tbl_header, list_tbl_rows = gen_list_table(dic_proc_cfgs, graph_param, df, client_timezone)
+
+ # Stats table
+ stats_tbl_header, stats_tbl_data = gen_stats_table(
+ dic_proc_cfgs,
+ graph_param,
+ df,
+ dic_param,
+ original_graph_configs,
+ client_timezone,
+ point_time,
+ target_id,
+ target_proc_id,
+ )
+
+ # Full link table
+ dic_param = build_dic_param_plot_view(dic_form)
+ graph_param, dic_proc_cfgs = build_graph_param(dic_param, full_link=True)
+ df_full, _, _ = get_data_from_db( graph_param)
+ df_full: pd.DataFrame = extract_cycle(df_full, cycle_id)
+ if df_full.empty:
+ df_full = gen_blank_df_end_cols(graph_param.array_formval)
+ df_full[df.columns] = df[df.columns].to_numpy()
+
+ full_link_tbl_header, full_link_tbl_rows = gen_list_table(dic_proc_cfgs, graph_param, df_full, client_timezone)
+
+ return dic_param, {
+ 'stats_tbl_header': stats_tbl_header,
+ 'stats_tbl_data': stats_tbl_data,
+ 'list_tbl_header': list_tbl_header,
+ 'list_tbl_rows': list_tbl_rows,
+ 'full_link_tbl_header': full_link_tbl_header,
+ 'full_link_tbl_rows': full_link_tbl_rows,
+ }
+
+
+def extract_cycle(df: pd.DataFrame, cycle_id):
+ if 'id' in df.columns:
+ df = df[df.id == cycle_id].reset_index()
+ else:
+ df = df[df.index == cycle_id].reset_index()
+
+ return df.replace({np.nan: ''})
+
+
+def gen_stats_table(dic_proc_cfgs, graph_param, df, dic_param, chart_infos, client_timezone, start_time_val, target_id,
+ target_proc_id):
+ proc_ids = dic_proc_cfgs.keys()
+ proc_ids = order_proc_as_trace_config(proc_ids)
+
+ stats_tbl_data = []
+ max_num_serial = 1
+ for proc_id, proc_cfg in dic_proc_cfgs.items():
+ serial_col_cfgs = proc_cfg.get_serials(column_name_only=False)
+ if len(serial_col_cfgs) >= max_num_serial:
+ max_num_serial = len(serial_col_cfgs)
+
+ for proc_order, proc_id in enumerate(proc_ids):
+ proc_cfg = dic_proc_cfgs.get(proc_id)
+ end_proc: EndProc = graph_param.search_end_proc(proc_id)[1]
+ col_ids = end_proc.col_ids
+ col_names = end_proc.col_names
+ col_show_names = end_proc.col_show_names
+ serial_col_cfgs = proc_cfg.get_serials(column_name_only=False)
+ serial_ids = []
+ serial_vals = []
+ for serial in serial_col_cfgs:
+ serial_label = gen_sql_label(serial.id, serial.column_name)
+ serial_ids.append(serial.id)
+ serial_vals.append(df.loc[0][serial_label])
+
+ if len(serial_col_cfgs) < max_num_serial:
+ diff = max_num_serial - len(serial_col_cfgs)
+ for i in range(diff):
+ serial_ids.append('')
+ serial_vals.append('')
+
+ # Datetime
+ time_col_name = str(Cycle.time.key) + create_rsuffix(proc_id)
+ time_val = df.loc[0][time_col_name]
+ if not pd.isna(time_val) and time_val:
+ dt_obj = parser.parse(time_val)
+ dt_obj = dt_obj.astimezone(client_timezone)
+ time_val = datetime.strftime(dt_obj, DATE_FORMAT_STR_CSV)
+ else:
+ time_val = ''
+
+ for col_idx, col_id in enumerate(col_ids):
+ if col_id in serial_ids:
+ continue
+
+ row = []
+ if col_id == target_id:
+ priority = 1
+ elif proc_id == target_proc_id:
+ priority = 2
+ else:
+ priority = proc_order + 10
+ row.append(priority)
+
+ # Serial No
+ row.extend(serial_vals)
+
+ # Item
+ col_name = col_names[col_idx]
+ row.append(col_name)
+
+ # Name
+ col_show_name = col_show_names[col_idx]
+ row.append(col_show_name)
+
+ # Value
+ col_label = gen_sql_label(col_id, col_name)
+ col_val = df.loc[0][col_label]
+ row.append(col_val)
+
+ # Datetime
+ row.append(time_val)
+
+ # Process
+ row.append(proc_cfg.name)
+
+ # Threshold
+ latest_idx = None
+ col_idx = get_sensor_idx(dic_param, proc_id, col_id)
+ col_thresholds = YamlConfig.get_node(chart_infos, [proc_id, col_id]) or []
+ threshold = {}
+ if col_thresholds:
+ point_time = time_val or start_time_val
+ threshold, latest_idx = get_latest_threshold(col_thresholds, point_time, client_timezone)
+ th_type = threshold.get('type') or ''
+ th_name = threshold.get('name') or ''
+ if col_idx is not None:
+ th_type = th_type or 'Default'
+ th_name = th_name or 'Default'
+ lcl = threshold.get(THRESH_LOW) or ''
+ ucl = threshold.get(THRESH_HIGH) or ''
+ lpcl = threshold.get(PRC_MIN) or ''
+ upcl = threshold.get(PRC_MAX) or ''
+ row.extend([th_type, th_name, lcl, ucl, lpcl, upcl])
+
+ # Summaries
+ if col_idx is not None and latest_idx is not None:
+ plotdata = dic_param[ARRAY_PLOTDATA][col_idx]
+ summaries = plotdata[SUMMARIES] or []
+ summary = summaries[latest_idx]
+ row.extend(build_summary_cells(summary))
+ else:
+ row.extend(build_empty_summary_cells())
+
+ stats_tbl_data.append(row)
+
+ stats_tbl_data = sorted(stats_tbl_data, key=lambda x: int(x[0]))
+ stats_tbl_data = list(map(lambda x: x[1:], stats_tbl_data))
+
+ stats_tbl_header = build_stats_header(max_num_serial)
+
+ return stats_tbl_header, stats_tbl_data
+
+
+def get_latest_threshold(col_thresholds, point_time, client_timezone):
+ if not col_thresholds:
+ return create_graph_config()[0], None
+
+ if point_time:
+ point_time = parser.parse(point_time)
+ if point_time.tzinfo is None:
+ point_time = client_timezone.localize(point_time)
+ latest_act_to = parser.parse('1970-01-01T00:00:00.000Z')
+ latest_threshold = col_thresholds[0]
+ latest_idx = 0
+ for idx, th in enumerate(col_thresholds):
+ act_from = parser.parse(th.get(ACT_FROM) or '1970-01-01T00:00:00.000Z')
+ act_to = parser.parse(th.get(ACT_TO) or '9999-01-01T00:00:00.000Z')
+ if act_from <= point_time <= act_to:
+ if latest_act_to < act_to:
+ latest_threshold = th
+ latest_idx = idx
+ latest_act_to = act_to
+ return latest_threshold, latest_idx
+ else:
+ latest_idx = 0
+ return col_thresholds[latest_idx], latest_idx
+
+
+def build_stats_header(max_num_serial):
+ stats_tbl_header = ['Serial No {}'.format(idx + 1) for idx in range(max_num_serial)]
+ stats_tbl_header.extend(['Item', 'Name', 'Value', 'Datetime', 'Process name',
+ 'Type', 'Name', 'Lower threshold', 'Upper threshold',
+ 'Lower process threshold', 'Upper process threshold',
+ 'N', 'Average', '3σ', 'Cp', 'Cpk', 'σ', 'Max', 'Min',
+ 'Median', 'P95', 'P75 Q3', 'P25 Q1', 'P5', 'IQR'])
+ return stats_tbl_header
+
+
+def build_empty_summary_cells():
+ return [''] * 14
+
+
+def build_summary_cells(summary):
+ bstats = summary['basic_statistics'] or {}
+ non_pt = summary['non_parametric'] or {}
+ return list(map(
+ lambda x: '' if x is None else x,
+ [bstats.get('n_stats'), bstats.get('average'), bstats.get('sigma_3'), bstats.get('Cp'),
+ bstats.get('Cpk'), bstats.get('sigma'), bstats.get('Max'), bstats.get('Min'),
+ non_pt.get('median'), non_pt.get('p95'), non_pt.get('p75'), non_pt.get('p25'), non_pt.get('p5'),
+ non_pt.get('iqr')]
+ ))
+
+
+def convert_and_format(time_val, client_timezone, out_format=DATE_FORMAT_STR_CSV):
+ dt_obj = parser.parse(time_val)
+ dt_obj = dt_obj.astimezone(client_timezone)
+ return datetime.strftime(dt_obj, out_format)
+
+
+def gen_list_table(dic_proc_cfgs, graph_param, df, client_timezone):
+ proc_ids = dic_proc_cfgs.keys()
+ proc_ids = order_proc_as_trace_config(proc_ids)
+ list_tbl_data = []
+ list_tbl_header = []
+ for proc_id in proc_ids:
+ proc_cfg = dic_proc_cfgs.get(proc_id)
+ list_tbl_header.extend(['Item', "Name", "Value"])
+ end_proc: EndProc = graph_param.search_end_proc(proc_id)[1]
+ col_ids = end_proc.col_ids
+ get_date_col: CfgProcessColumn = proc_cfg.get_date_col(column_name_only=False)
+ dic_id_col = {col.id: col for col in proc_cfg.columns}
+ serial_col_cfgs = proc_cfg.get_serials(column_name_only=False)
+ serial_ids = []
+ serial_vals = []
+ proc_rows = []
+ for serial in serial_col_cfgs:
+ serial_label = gen_sql_label(serial.id, serial.column_name)
+ serial_ids.append(serial.id)
+ serial_val = df.loc[0][serial_label]
+ serial_vals.append(serial_val)
+ # Serial No
+ proc_rows.append([serial.column_name, serial.name, serial_val])
+
+ # Datetime
+ time_col_name = str(Cycle.time.key) + create_rsuffix(proc_id)
+ time_val = df.loc[0][time_col_name]
+ if not pd.isna(time_val) and time_val:
+ time_val = convert_and_format(time_val, client_timezone, DATE_FORMAT_STR_CSV)
+ else:
+ time_val = ''
+ proc_rows.append(['Datetime', '', time_val])
+
+ # Line No
+ proc_rows.append(['Line No', '', ''])
+
+ # Process
+ proc_rows.append(['Process', '', proc_cfg.name])
+
+ # Machine No
+ proc_rows.append(['Machine No', '', ''])
+
+ # Part No
+ proc_rows.append(['Part No', '', ''])
+
+ # Other columns
+ for col_id in col_ids:
+ cfg_col: CfgProcessColumn = dic_id_col.get(col_id)
+ if not cfg_col:
+ continue
+
+ if col_id in serial_ids:
+ continue
+
+ if col_id == get_date_col.id:
+ continue
+
+ row = []
+ # Item
+ row.append(cfg_col.column_name)
+
+ # Name
+ row.append(cfg_col.name)
+
+ # Value
+ col_label = gen_sql_label(cfg_col.id, cfg_col.column_name)
+ col_val = df.loc[0][col_label]
+ if cfg_col.is_get_date and not pd.isna(time_val) and time_val:
+ time_val = convert_and_format(time_val, client_timezone, DATE_FORMAT_STR_CSV)
+ else:
+ row.append(col_val)
+
+ proc_rows.append(row)
+
+ # append to first table
+ list_tbl_data.append(proc_rows)
+
+ list_tbl_rows = []
+ for row in itertools.zip_longest(*list_tbl_data):
+ list_tbl_rows.append(list(itertools.chain.from_iterable([r if r else ['', '', ''] for r in row])))
+
+ return list_tbl_header, list_tbl_rows
+
+
+def get_sensor_idx(dic_param, proc_id, col_id):
+ for idx, form_val in enumerate(dic_param[ARRAY_FORMVAL]):
+ if form_val[END_PROC] == proc_id and form_val[GET02_VALS_SELECT] == col_id:
+ return idx
+ return None
+
+
+def build_dic_param_plot_view(dic_form):
+ clicked_time = dic_form.get('time')
+
+ clicked_time = parser.parse(clicked_time)
+ clicked_time = clicked_time - timedelta(minutes=5)
+ start_date = clicked_time.strftime(DATE_FORMAT)
+ start_time = clicked_time.strftime(TIME_FORMAT)
+ clicked_time = clicked_time + timedelta(minutes=10)
+ end_date = clicked_time.strftime(DATE_FORMAT)
+ end_time = clicked_time.strftime(TIME_FORMAT)
+ dic_form['START_DATE'] = start_date
+ dic_form['END_DATE'] = end_date
+ dic_form['START_TIME'] = start_time
+ dic_form['END_TIME'] = end_time
+
+ dic_param = parse_multi_filter_into_one(dic_form)
+
+ return dic_param
+
+
+def order_proc_as_trace_config(proc_ids):
+ edges = CfgTrace.get_all()
+ ordered_edges: List[CfgTrace] = order_before_mapping_data(edges)
+ ordered_proc_ids = [(edge.self_process_id, edge.target_process_id) for edge in ordered_edges]
+ ordered_proc_ids = list(itertools.chain.from_iterable(ordered_proc_ids))
+ reversed_proc_ids = list(reversed(ordered_proc_ids))
+ ordered_proc_ids = []
+ for proc_id in reversed_proc_ids:
+ if proc_id in proc_ids and proc_id not in ordered_proc_ids:
+ ordered_proc_ids.append(proc_id)
+ return list(reversed(ordered_proc_ids)) or proc_ids
+
+
+def get_linked_procs(proc_id):
+ processes = CfgProcess.get_all()
+ nodes = [proc.id for proc in processes]
+
+ edges: List[CfgTrace] = CfgTrace.get_all()
+
+ graph_util = GraphUtil(nodes)
+ for edge in edges:
+ graph_util.add_edge(edge.self_process_id, edge.target_process_id)
+ graph_util.add_edge(edge.target_process_id, edge.self_process_id)
+
+ linked_procs = graph_util.find_linked_processes(proc_id)
+
+ return linked_procs
+
+
+def build_graph_param(dic_param, full_link=False):
+ # bind dic_param
+ graph_param = bind_dic_param_to_class(dic_param)
+
+ # add relevant procs
+ if full_link:
+ relevant_procs = get_linked_procs(graph_param.get_start_proc())
+ for proc_id in relevant_procs:
+ graph_param.add_proc_to_array_formval(proc_id, [])
+
+ dic_proc_cfgs = get_procs_in_dic_param(graph_param)
+
+ # add start proc
+ graph_param.add_start_proc_to_array_formval()
+
+ # add condition procs
+ graph_param.add_cond_procs_to_array_formval()
+
+ # add category
+ graph_param.add_cate_procs_to_array_formval()
+
+ # get serials
+ for proc in graph_param.array_formval:
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ columns = proc_cfg.get_serials(column_name_only=False)
+ if full_link:
+ columns = proc_cfg.columns
+ col_ids = [col.id for col in columns]
+ proc.add_cols(col_ids)
+
+ return graph_param, dic_proc_cfgs
diff --git a/histview2/api/trace_data/services/proc_link.py b/histview2/api/trace_data/services/proc_link.py
new file mode 100644
index 0000000..a4ffd81
--- /dev/null
+++ b/histview2/api/trace_data/services/proc_link.py
@@ -0,0 +1,457 @@
+from collections import namedtuple, deque
+from datetime import datetime, timedelta
+from typing import List, Dict
+
+from apscheduler.triggers import date
+from pytz import utc
+from sqlalchemy import insert
+from sqlalchemy.sql.expression import literal
+
+from histview2 import scheduler
+# from histview2 import web_socketio
+from histview2.api.setting_module.services.data_import import get_from_to_substring_col
+from histview2.common.common_utils import chunks_dic
+from histview2.common.constants import *
+from histview2.common.logger import log_execution_time
+from histview2.common.memoize import set_all_cache_expired
+from histview2.common.pydn.dblib.db_proxy import DbProxy, gen_data_source_of_universal_db
+from histview2.common.scheduler import scheduler_app_context, JobType, IMPORT_DATA_JOBS, RESCHEDULE_SECONDS
+from histview2.common.services.sse import background_announcer, AnnounceEvent
+from histview2.setting_module.models import JobManagement, CfgTrace, ProcLink, CfgProcess
+from histview2.setting_module.services.background_process import send_processing_info
+from histview2.trace_data.models import *
+
+# socketio = web_socketio[SOCKETIO]
+
+# 2 proc join key string
+JOIN_KEY = 'key'
+SET_RELATION_INSERT = 'set_relation_insert'
+DIC_CYCLE_UPDATE = 'dic_cycle_update'
+SET_EXIST_RELATION = 'set_exist_relation'
+RECORD_PER_COMMIT = 1_000_000
+
+
+@scheduler_app_context
+def gen_global_id_job(_job_id=None, _job_name=None, is_new_data_check=True, is_publish=True):
+ """run job generate global id
+
+ Keyword Arguments:
+ _job_id {[type]} -- [description] (default: {None})
+ _job_name {[type]} -- [description] (default: {None})
+ """
+
+ # check if generate global is needed
+ if is_new_data_check:
+ prev_gen_job = JobManagement.get_last_job_id_by_jobtype(JobType.GEN_GLOBAL.name)
+ prev_gen_job_id = 0
+ if prev_gen_job:
+ prev_gen_job_id = prev_gen_job.id
+
+ jobs = JobManagement.check_new_jobs(prev_gen_job_id, IMPORT_DATA_JOBS)
+ if not jobs:
+ print("QUIT GENERATE GLOBAL ID , BECAUSE THERE IS NO NEW DATA FROM THE LAST GENERATE")
+ return
+
+ # generate global ids
+ gen = gen_global_id()
+ else:
+ # generate global ids
+ gen = gen_global_id(reset_existed_global_id=True)
+
+ send_processing_info(gen, JobType.GEN_GLOBAL)
+
+ # publish to clients that proc link job was done !
+ if is_publish:
+ background_announcer.announce(True, AnnounceEvent.PROC_LINK.name)
+ print('PROC_LINK_DONE_PUBLISH: DONE')
+ # clear cache
+ set_all_cache_expired()
+
+
+@log_execution_time('[GENERATE GLOBAL ID]')
+def gen_global_id(reset_existed_global_id=False):
+ """
+ generate global id for universal db (root function)
+ """
+ yield 0
+
+ if reset_existed_global_id:
+ clear_data_before_gen_proc_link()
+
+ yield 10
+
+ # get all first,end procs ( forward trace )
+ edges = CfgTrace.get_all()
+
+ start_procs = get_start_procs(edges)
+
+ # check universal zero
+ if not start_procs:
+ return
+
+ # create sub string sensors
+ gen_substring_sensors(edges)
+
+ # set global id for start procs
+ for proc_id in start_procs:
+ cycle_cls = find_cycle_class(proc_id)
+ cycle_cls.gen_auto_global_id(proc_id)
+
+ db.session.commit()
+ percent = 20
+
+ # trace each edge , return global & relation list
+ dic_output = {SET_RELATION_INSERT: set(), DIC_CYCLE_UPDATE: {}, SET_EXIST_RELATION: set(GlobalRelation.get_all())}
+
+ # matched count on proc
+ # dic_cycle_ids = defaultdict(set)
+ dic_edge_cnt = {}
+ edges = order_before_mapping_data(edges)
+ for edge in edges:
+ # matching data
+ start_cycle_ids, end_cycle_ids = mapping_data(edge, dic_output)
+
+ # count matching data for per process
+ dic_edge_cnt[(edge.self_process_id, edge.target_process_id)] = len(start_cycle_ids)
+ # dic_cycle_ids[edge.self_process_id].update(start_cycle_ids)
+ # dic_cycle_ids[edge.target_process_id].update(end_cycle_ids)
+
+ # save db
+ cycle_cls = find_cycle_class(edge.target_process_id)
+ for chunk in chunks_dic(dic_output[DIC_CYCLE_UPDATE], RECORD_PER_COMMIT):
+ cycles = [dict(id=cycle_id, global_id=global_id) for cycle_id, global_id in chunk.items()]
+ db.session.bulk_update_mappings(cycle_cls, cycles)
+ db.session.commit()
+
+ # reset dict after saved
+ dic_output[DIC_CYCLE_UPDATE] = {}
+ percent += 1
+ yield percent
+
+ insert_targets = list(dic_output[SET_RELATION_INSERT])
+ if len(insert_targets):
+ global_relate_cols = (GlobalRelation.global_id.key, GlobalRelation.relate_id.key, GlobalRelation.created_at.key)
+ with DbProxy(gen_data_source_of_universal_db(), True) as db_instance:
+ for chunk in chunks(insert_targets, RECORD_PER_COMMIT):
+ created_at = get_current_timestamp()
+ insert_relations = []
+
+ for rel in chunk:
+ insert_relations.append((rel[0], rel[1], created_at))
+ insert_relations.append((rel[1], rel[0], created_at))
+
+ # insert to db
+ db_instance.bulk_insert(GlobalRelation.__table__.name, global_relate_cols, insert_relations)
+
+ # commit changes to db
+ db_instance.connection.commit()
+
+ # percent
+ percent += 1
+ yield percent
+
+ yield 99, {}, dic_edge_cnt
+ yield 100
+
+
+@log_execution_time()
+def order_before_mapping_data(edges: List[CfgTrace]):
+ """ trace all node in dic_node , and gen sql
+ """
+ ordered_edges = []
+
+ max_loop = len(edges) * 10
+ edges = deque(edges)
+ cnt = 0
+ while edges:
+ if cnt > max_loop:
+ raise Exception('Edges made a ring circle, You must re-setting tracing edge to break the ring circle!!!')
+
+ # get first element
+ edge = edges.popleft()
+
+ # check if current start proc is in others edge's end procs
+ # if YES , we must wait for these end proc run first( move the current edge to the end)
+ if any((edge.self_process_id == other_edge.target_process_id for other_edge in edges)):
+ # move to the end of queue
+ edges.append(edge)
+ cnt += 1
+ else:
+ ordered_edges.append(edge)
+ cnt = 0
+
+ return ordered_edges
+
+
+@log_execution_time()
+def mapping_data(edge: CfgTrace, dic_output: Dict):
+ dic_start_data = build_proc_data(edge, True)
+ dic_end_data = build_proc_data(edge)
+
+ # update global id and relation
+ start_cycle_ids = []
+ end_cycle_ids = []
+ for keys, start_row in dic_start_data.items():
+ end_row = dic_end_data.get(keys)
+ if end_row is None:
+ continue
+
+ # if end proc global id is NULL
+ if end_row.global_id:
+ # cross relate
+ # if not same number( already added), and not exist in database
+ if end_row.global_id == start_row.global_id:
+ continue
+
+ key = (start_row.global_id, end_row.global_id)
+ reverse_key = (end_row.global_id, start_row.global_id)
+
+ if key in dic_output[SET_EXIST_RELATION]:
+ continue
+
+ if key in dic_output[SET_RELATION_INSERT]:
+ continue
+
+ if reverse_key in dic_output[SET_RELATION_INSERT]:
+ continue
+
+ # add to insert list
+ dic_output[SET_RELATION_INSERT].add((start_row.global_id, end_row.global_id))
+ else:
+ # add to update list
+ dic_output[DIC_CYCLE_UPDATE][end_row.id] = start_row.global_id
+
+ # count
+ start_cycle_ids.append(start_row.id)
+ end_cycle_ids.append(end_row.id)
+
+ return start_cycle_ids, end_cycle_ids
+
+
+def gen_trace_key_info(edge: CfgTrace, is_start_proc):
+ # trace key info
+ TraceKeyInfo = namedtuple('TraceKeyInfo',
+ 'proc_id, column_id, column_name, col_name_with_substr, from_char, to_char')
+
+ if is_start_proc:
+ proc_id = edge.self_process_id
+ keys = [(key.self_column_id, key.self_column_substr_from, key.self_column_substr_to) for key in edge.trace_keys]
+ else:
+ proc_id = edge.target_process_id
+ keys = [(key.target_column_id, key.target_column_substr_from, key.target_column_substr_to) for key in
+ edge.trace_keys]
+
+ trace_key_infos = []
+ for key in keys:
+ col_id, from_char, to_char = key
+ column = CfgProcessColumn.query.get(col_id)
+ if from_char or to_char:
+ substr_col_name = SUB_STRING_COL_NAME.format(column.column_name, from_char, to_char)
+ else:
+ substr_col_name = column.column_name
+
+ trace_key_info = TraceKeyInfo(proc_id, column.id, column.column_name, substr_col_name, from_char, to_char)
+ trace_key_infos.append(trace_key_info)
+
+ return trace_key_infos
+
+
+def build_proc_data(edge: CfgTrace, is_start_proc=False):
+ """
+ build sql to query matching global
+ get cycles by proc_id
+ """
+
+ # get proc_id , keys
+ trace_key_infos = gen_trace_key_info(edge, is_start_proc)
+ proc_id = trace_key_infos[0].proc_id
+
+ cycle_cls = find_cycle_class(proc_id)
+ data = db.session.query(cycle_cls.id, cycle_cls.global_id).filter(cycle_cls.process_id == proc_id)
+ data = data.order_by(cycle_cls.time, cycle_cls.id)
+
+ # only get global_id that not null
+ if is_start_proc:
+ data = data.filter(cycle_cls.global_id > 0)
+
+ # get sensor information of keys from database (run separate to reuse cache, keys are only 1 or 2 columns)
+ sensors = Sensor.query.filter(Sensor.process_id == proc_id).filter(
+ Sensor.column_name.in_([trace_key.col_name_with_substr for trace_key in trace_key_infos])).all()
+ for sensor in sensors:
+ sensor_val_cls = find_sensor_class(sensor.id, DataType(sensor.type), auto_alias=True)
+ data = data.join(sensor_val_cls, sensor_val_cls.cycle_id == cycle_cls.id)
+ data = data.filter(sensor_val_cls.sensor_id == sensor.id)
+ data = data.add_columns(sensor_val_cls.value.label(gen_sql_label(sensor.column_name)))
+
+ data = data.all()
+
+ # make dictionary (remove duplicate and faster for tracing)
+ data = {tuple([getattr(row, gen_sql_label(key.col_name_with_substr)) for key in trace_key_infos]): row
+ for row in data}
+ return data
+
+
+@log_execution_time()
+def gen_substring_sensor(proc_id, orig_col_name, from_char, to_char):
+ # new column name for sub string
+ substr_col_name = SUB_STRING_COL_NAME.format(orig_col_name, from_char, to_char)
+
+ # check duplicate sub string sensors
+ if Sensor.get_sensor_by_col_name(proc_id, substr_col_name):
+ return None
+
+ orig_sensor = Sensor.get_sensor_by_col_name(proc_id, orig_col_name)
+ if not orig_sensor:
+ return None
+
+ sensor = Sensor(process_id=proc_id, column_name=substr_col_name, type=orig_sensor.type)
+ db.session.add(sensor)
+ db.session.commit()
+
+ sensor_id = sensor.id
+ sensor_type = sensor.type
+ orig_sensor_val_cls = find_sensor_class(orig_sensor.id, DataType(orig_sensor.type))
+
+ # get all value of original sensor
+ data = db.session.query(orig_sensor_val_cls.cycle_id, literal(sensor_id),
+ func.substr(orig_sensor_val_cls.value, from_char, to_char - from_char + 1))
+
+ data = data.filter(orig_sensor_val_cls.sensor_id == orig_sensor.id)
+
+ # insert into sensor val
+ sensor_val_cls = find_sensor_class(sensor_id, DataType(sensor_type))
+ sensor_insert = insert(sensor_val_cls).from_select(
+ (sensor_val_cls.cycle_id, sensor_val_cls.sensor_id, sensor_val_cls.value), data)
+
+ # execute
+ db.session.execute(sensor_insert)
+ db.session.commit()
+
+ return substr_col_name
+
+
+def add_gen_proc_link_job(publish=False):
+ """call gen proc link id job
+
+ Args:
+ :param publish:
+ """
+ job_id = JobType.GEN_GLOBAL.name
+ run_time = datetime.now().astimezone(utc) + timedelta(seconds=RESCHEDULE_SECONDS)
+ date_trigger = date.DateTrigger(run_date=run_time, timezone=utc)
+ scheduler.add_job(job_id, gen_global_id_job, trigger=date_trigger, replace_existing=True,
+ kwargs=dict(_job_id=job_id, _job_name=job_id, is_new_data_check=True, is_publish=publish))
+
+
+#######################################################
+
+def get_start_procs(edges):
+ self_trace_ids = set()
+ target_trace_ids = set()
+
+ for edge in edges:
+ self_trace_ids.add(edge.self_process_id)
+ target_trace_ids.add(edge.target_process_id)
+
+ # start procs, end procs
+ return self_trace_ids - target_trace_ids
+
+
+def get_end_procs(edges):
+ self_trace_ids = set()
+ target_trace_ids = set()
+
+ for edge in edges:
+ self_trace_ids.add(edge.self_process_id)
+ target_trace_ids.add(edge.target_process_id)
+
+ # start procs, end procs
+ return target_trace_ids - self_trace_ids
+
+
+def gen_substring_sensors(edges: List[CfgTrace]):
+ for edge in edges:
+ for trace_key in edge.trace_keys:
+ if trace_key.self_column_substr_from:
+ orig_col = CfgProcessColumn.query.get(trace_key.self_column_id)
+ gen_substring_sensor(edge.self_process_id, orig_col.column_name,
+ trace_key.self_column_substr_from,
+ trace_key.self_column_substr_to)
+ if trace_key.target_column_substr_from:
+ orig_col = CfgProcessColumn.query.get(trace_key.target_column_id)
+ gen_substring_sensor(edge.target_process_id, orig_col.column_name,
+ trace_key.target_column_substr_from,
+ trace_key.target_column_substr_to)
+
+
+# @log_execution_time()
+# def show_proc_link_info():
+# """
+# show matched global id count
+# :return:
+# """
+# # get infos
+# data = ProcLink.calc_proc_link()
+#
+# # matched count on edge
+# dic_edge_cnt = {}
+# dic_proc_cnt = {}
+#
+# # count matched per edge
+# for row in data:
+# if row.target_process_id:
+# dic_edge_cnt[f'{row.process_id}-{row.target_process_id}'] = row.matched_count
+# else:
+# dic_proc_cnt[row.process_id] = row.matched_count
+#
+# return dic_proc_cnt, dic_edge_cnt
+
+@log_execution_time()
+def show_proc_link_info():
+ """
+ show matched global id count
+ :return:
+ """
+ dic_proc_cnt = {}
+ dic_edge_cnt = {}
+
+ # all procs
+ all_procs = CfgProcess.get_all()
+
+ for proc in all_procs:
+ cycle_cls = find_cycle_class(proc.id)
+ dic_proc_cnt[proc.id] = (cycle_cls.count_not_none_global_ids(proc.id), cycle_cls.count_all(proc.id))
+
+ # get infos
+ data = ProcLink.calc_proc_link()
+
+ # count matched per edge
+ for row in data:
+ if row.target_process_id:
+ dic_edge_cnt[f'{row.process_id}-{row.target_process_id}'] = row.matched_count
+
+ return dic_proc_cnt, dic_edge_cnt
+
+
+@log_execution_time()
+def clear_data_before_gen_proc_link():
+ # clear relation global id
+ GlobalRelation.delete_all()
+ for cycle_class in CYCLE_CLASSES:
+ cycle_class.clear_global_id()
+
+ # clear log in global_detail
+ ProcLink.delete_all()
+ db.session.commit()
+
+ # clear substring data
+ sensors = Sensor.query.all()
+ for sensor in sensors:
+ substr_check_res = get_from_to_substring_col(sensor)
+ if not substr_check_res:
+ continue
+
+ substr_cls, from_char, to_char = substr_check_res
+ substr_cls.delete_by_sensor_id(sensor.id)
+ db.session.delete(sensor)
+ db.session.commit()
diff --git a/histview2/api/trace_data/services/proc_link_simulation.py b/histview2/api/trace_data/services/proc_link_simulation.py
new file mode 100644
index 0000000..acbf58b
--- /dev/null
+++ b/histview2/api/trace_data/services/proc_link_simulation.py
@@ -0,0 +1,225 @@
+from collections import deque
+from typing import List, Union, Dict
+
+from sqlalchemy import and_
+
+from histview2 import db
+from histview2.api.trace_data.services.proc_link import gen_trace_key_info
+from histview2.common.common_utils import gen_sql_label
+from histview2.common.constants import DataType
+from histview2.common.logger import log_execution_time
+from histview2.setting_module.models import CfgTrace, CfgProcess
+from histview2.trace_data.models import find_cycle_class, Sensor, find_sensor_class
+
+PREDICT_SAMPLE = 10_000
+
+
+@log_execution_time('[SIMULATE GLOBAL ID]')
+def sim_gen_global_id(edges: List[CfgTrace]):
+ """
+ generate global id for universal db (root function)
+ """
+ edges = sim_order_before_mapping_data(edges)
+
+ # matched count on proc
+ dic_cycle_ids = {}
+
+ # matched count on edge
+ dic_edge_cnt = {}
+
+ # proc : rows in database
+ dic_proc_data = {}
+
+ # filtered flags
+ filtered_procs = []
+
+ # backward start leaf procs
+ for edge in edges:
+ # matching keys
+ start_keys = gen_trace_key_info(edge, False)
+ end_keys = gen_trace_key_info(edge, True)
+
+ start_proc_id = edge.target_process_id
+ start_proc_data = gen_start_proc_data(start_proc_id, dic_proc_data, filtered_procs, dic_cycle_ids)
+ if not start_proc_data:
+ continue
+
+ dic_start_data = gen_dic_proc_data(start_proc_data, start_keys)
+
+ end_proc_id = edge.self_process_id
+ end_proc_data = gen_end_proc_data(start_proc_id, start_keys, end_proc_id, end_keys, dic_proc_data)
+ dic_end_data = gen_dic_proc_data(end_proc_data, end_keys)
+
+ # init count data
+ dic_cycle_ids.setdefault(start_proc_id, set())
+ dic_cycle_ids.setdefault(end_proc_id, set())
+
+ # mapping
+ cnt = 0
+ for keys, end_row in dic_end_data.items():
+ start_row = dic_start_data.get(keys)
+ if not start_row:
+ continue
+
+ dic_cycle_ids[start_proc_id].add(start_row.id)
+ dic_cycle_ids[end_proc_id].add(end_row.id)
+ cnt += 1
+
+ # count matched per edge
+ dic_edge_cnt[f'{end_proc_id}-{start_proc_id}'] = cnt
+
+ dic_proc_cnt = {proc_id: [len(cycles), len(dic_proc_data[proc_id])] for proc_id, cycles in dic_cycle_ids.items()}
+ return dic_proc_cnt, dic_edge_cnt
+
+
+def get_sample_data(proc_id, cols_filters: Union[Dict, List], from_time=None, limit=PREDICT_SAMPLE):
+ """
+ build sql to query matching global
+ get cycles by proc_id
+ """
+ dic_filter = {}
+ if isinstance(cols_filters, dict):
+ dic_filter = cols_filters
+
+ column_names = list(cols_filters)
+
+ cycle_cls = find_cycle_class(proc_id)
+ data = db.session.query(cycle_cls.id, cycle_cls.global_id, cycle_cls.time).filter(cycle_cls.process_id == proc_id)
+ data = data.order_by(cycle_cls.time, cycle_cls.id)
+
+ offset = PREDICT_SAMPLE
+ if from_time:
+ data = data.filter(cycle_cls.time >= from_time)
+ offset = 0
+
+ # get sensor information of keys from database (run separate to reuse cache, keys are only 1 or 2 columns)
+ sensors = Sensor.query.filter(Sensor.process_id == proc_id).filter(Sensor.column_name.in_(column_names)).all()
+
+ for sensor in sensors:
+ sensor_val_cls = find_sensor_class(sensor.id, DataType(sensor.type), auto_alias=True)
+
+ filter_val = dic_filter.get(sensor.column_name)
+ if filter_val is None:
+ data = data.join(sensor_val_cls, sensor_val_cls.cycle_id == cycle_cls.id)
+ else:
+ data = data.join(sensor_val_cls,
+ and_(sensor_val_cls.cycle_id == cycle_cls.id, sensor_val_cls.value == filter_val))
+
+ data = data.filter(sensor_val_cls.sensor_id == sensor.id)
+ data = data.add_columns(sensor_val_cls.value.label(gen_sql_label(sensor.column_name)))
+
+ data = data.limit(limit + offset)
+
+ data = data.all()
+
+ # do not use offset , because maybe records count < 50
+ if not from_time:
+ data = data[min(offset * 2, len(data)) // 2:]
+
+ return data
+
+
+@log_execution_time()
+def sim_order_before_mapping_data(edges: List[CfgTrace]):
+ """ trace all node in dic_node , and gen sql
+ """
+ ordered_edges = []
+
+ max_loop = len(edges) * 10
+ edges = deque(edges)
+ cnt = 0
+ while edges:
+ if cnt > max_loop:
+ raise Exception('Edges made a ring circle, You must re-setting tracing edge to break the ring circle!!!')
+
+ # get first element
+ edge = edges.popleft()
+
+ # check if current start proc is in others edge's end procs
+ # if YES , we must wait for these end proc run first( move the current edge to the end)
+ # traceback. So target => start , self => end
+ if any((edge.target_process_id == other_edge.self_process_id for other_edge in edges)):
+ # move to the end of queue
+ edges.append(edge)
+ cnt += 1
+ else:
+ ordered_edges.append(edge)
+ cnt = 0
+
+ return ordered_edges
+
+
+def gen_start_proc_data(proc_id, dic_proc_data, filtered_procs, dic_cycle_ids):
+ # get sample data
+ proc_data = dic_proc_data.get(proc_id)
+ if proc_data:
+ # already a node of previous edge
+ if proc_id not in filtered_procs:
+ filter_cycle_ids = dic_cycle_ids.get(proc_id)
+
+ # only use data that matched before edge ( as a end proc of previous edge )
+ if filter_cycle_ids:
+ proc_data = [row for row in proc_data if row.id in filter_cycle_ids]
+ elif filter_cycle_ids is not None:
+ proc_data = []
+
+ # save after filtered
+ filtered_procs.append(proc_id)
+ dic_proc_data[proc_id] = proc_data
+ else:
+ # end leaf proc case
+ cfg_proc = CfgProcess.query.get(proc_id)
+ serials = cfg_proc.get_serials()
+ proc_data = get_sample_data(proc_id, serials)
+ dic_proc_data[proc_id] = proc_data
+
+ return proc_data
+
+
+def gen_end_proc_data(start_proc_id, start_keys, end_proc_id, end_keys, dic_proc_data):
+ # get sample data
+ end_proc_data = dic_proc_data.get(end_proc_id)
+
+ # reuse already exist data
+ if end_proc_data:
+ return end_proc_data
+
+ # get end proc time by start proc condition
+ start_proc_data = dic_proc_data[start_proc_id]
+ from_time = find_from_time(start_proc_data, start_keys, end_proc_id, end_keys)
+
+ # get data from db
+ cfg_proc = CfgProcess.query.get(end_proc_id)
+ serials = cfg_proc.get_serials()
+ end_proc_data = get_sample_data(end_proc_id, serials, from_time=from_time)
+ dic_proc_data[end_proc_id] = end_proc_data
+
+ return end_proc_data
+
+
+def find_from_time(start_proc_data, start_keys, end_proc_id, end_keys):
+ row = start_proc_data[0]
+ dic_keys = {end_key.column_name: getattr(row, gen_sql_label(start_key.column_name))
+ for start_key, end_key in zip(start_keys, end_keys)}
+
+ end_proc_data = get_sample_data(end_proc_id, dic_keys, limit=1)
+ if end_proc_data:
+ return end_proc_data[0].time
+
+ return row.time
+
+
+def gen_dic_proc_data(data, trace_key_infos):
+ dic_filter = {}
+ for row in data:
+ keys = []
+ for key in trace_key_infos:
+ val = str(getattr(row, gen_sql_label(key.column_name)))
+ if key.from_char:
+ val = val[key.from_char - 1:key.to_char]
+
+ keys.append(val)
+
+ dic_filter[tuple(keys)] = row
+
+ return dic_filter
diff --git a/histview2/api/trace_data/services/regex_infinity.py b/histview2/api/trace_data/services/regex_infinity.py
new file mode 100644
index 0000000..b0e476c
--- /dev/null
+++ b/histview2/api/trace_data/services/regex_infinity.py
@@ -0,0 +1,202 @@
+import re
+from collections import defaultdict
+
+import numpy as np
+import pandas as pd
+from pandas import DataFrame
+
+from histview2.common.logger import log_execution_time
+from histview2.trace_data.models import Cycle
+
+PATTERN_POS_1 = re.compile(r'^(9{4,}(\.0+)?|9{1,3}\.9{3,}0*)$')
+# PATTERN_NEG_1 = re.compile(r'^-(9{4,}(\.0+)?|9{1,3}\.9{3,}0*)$')
+
+# support to identify -9999.9 as -inf
+PATTERN_NEG_1 = re.compile(r'^-(9{4,}(\.)?|9{3,}\.9+|9{1,3}\.?9{3,})0*$')
+
+PATTERN_POS_2 = re.compile(r'^((\d)\2{3,}(\.0+)?|(\d)\4{0,2}\.\4{3,}0*)$')
+PATTERN_NEG_2 = re.compile(r'^-((\d)\2{3,}(\.0+)?|(\d)\4{0,2}\.\4{3,}0*)$')
+
+PATTERN_3 = re.compile(r'^(-+|0+(\d)\2{3,}(\.0+)?|(.)\3{4,}0*)$')
+
+# regex filter exclude columns
+EXCLUDE_COLS = [Cycle.id.key, Cycle.global_id.key, Cycle.time.key, Cycle.is_outlier.key]
+
+
+@log_execution_time()
+def filter_method(df: DataFrame, col_name, idxs, cond_gen_func, return_vals):
+ if len(idxs) == 0:
+ return df
+
+ target_data = df.loc[idxs, col_name].astype(str)
+ if len(target_data) == 0:
+ return df
+
+ conditions = cond_gen_func(target_data)
+ df.loc[target_data.index, col_name] = np.select(conditions, return_vals, df.loc[target_data.index, col_name])
+ return df
+
+
+@log_execution_time()
+def validate_numeric_minus(df: DataFrame, col_name, return_vals):
+ num = 0
+ if df[col_name].count() == 0:
+ return df
+
+ min_val = df[col_name].min()
+ if min_val >= num:
+ return df
+
+ # return_vals = [pd.NA, pd.NA]
+ # idxs = df.eval(f'{col_name} < {num}')
+ idxs = pd.eval(f'df["{col_name}"] < {num}')
+ df = filter_method(df, col_name, idxs, gen_neg_conditions, return_vals)
+
+ return df
+
+
+@log_execution_time()
+def validate_numeric_plus(df: DataFrame, col_name, return_vals):
+ num = 0
+ if df[col_name].count() == 0:
+ return df
+
+ max_val = df[col_name].max()
+ if max_val < num:
+ return df
+
+ # return_vals = [pd.NA, pd.NA, pd.NA]
+ idxs = pd.eval(f'df["{col_name}"] >= {num}')
+ df = filter_method(df, col_name, idxs, gen_pos_conditions, return_vals)
+
+ return df
+
+
+@log_execution_time()
+def validate_string(df: DataFrame, col_name):
+ if df[col_name].count() == 0:
+ return df
+
+ target_data = df[col_name].astype(str)
+ if len(target_data) == 0:
+ return df
+
+ conditions = gen_all_conditions(target_data)
+ return_vals = ['inf', '-inf', 'inf', '-inf', pd.NA]
+ df.loc[target_data.index, col_name] = np.select(conditions, return_vals, df.loc[target_data.index, col_name])
+
+ return df
+
+
+def gen_pos_conditions(df_str: DataFrame):
+ return [df_str.str.contains(PATTERN_POS_1), df_str.str.contains(PATTERN_POS_2), df_str.str.contains(PATTERN_3)]
+
+
+def gen_neg_conditions(df_str: DataFrame):
+ return [df_str.str.contains(PATTERN_NEG_1), df_str.str.contains(PATTERN_NEG_2)]
+
+
+def gen_all_conditions(df_str: DataFrame):
+ return [df_str.str.contains(PATTERN_POS_1),
+ df_str.str.contains(PATTERN_NEG_1),
+ df_str.str.contains(PATTERN_POS_2),
+ df_str.str.contains(PATTERN_NEG_2),
+ df_str.str.contains(PATTERN_3)]
+
+
+@log_execution_time()
+def get_changed_value_after_validate(df_before: DataFrame, df_after: DataFrame):
+ checked_cols = []
+ dic_abnormal = defaultdict(list)
+
+ for col in df_before.columns:
+ if not check_validate_target_column(col):
+ continue
+
+ checked_cols.append(col)
+ original_val = f'__{col}__'
+ s_before = df_before[col]
+ s_after = df_after[col].drop_duplicates()
+ idxs = s_before[~s_before.isin(s_after)].index
+
+ if not len(idxs):
+ continue
+
+ df = pd.DataFrame()
+ df[col] = df_after[col][idxs]
+ df[original_val] = df_before[col][idxs]
+ df.drop_duplicates(inplace=True)
+ series = df.groupby(col)[original_val].apply(list)
+
+ for idx, vals in series.items():
+ dic_abnormal[idx].extend(vals)
+
+ dic_abnormal = {key: list(set(vals)) for key, vals in dic_abnormal.items()}
+
+ return checked_cols, dic_abnormal
+
+
+@log_execution_time()
+def validate_data_with_regex(df):
+ # convert data types
+ df = df.convert_dtypes()
+
+ # integer cols
+ int_cols = df.select_dtypes(include='integer').columns.tolist()
+ return_vals = [pd.NA, pd.NA]
+ for col in int_cols:
+ if not check_validate_target_column(col):
+ continue
+
+ df = validate_numeric_minus(df, col, return_vals)
+ df = validate_numeric_plus(df, col, return_vals + [pd.NA])
+
+ # float
+ float_cols = df.select_dtypes(include='float').columns.tolist()
+ return_neg_vals = [float('-inf'), float('-inf')]
+ return_pos_vals = [float('inf'), float('inf'), np.NAN]
+ for col in float_cols:
+ if not check_validate_target_column(col):
+ continue
+
+ df = validate_numeric_minus(df, col, return_neg_vals)
+ df = validate_numeric_plus(df, col, return_pos_vals)
+
+ # non-numeric cols
+ for col in df.columns:
+ if not check_validate_target_column(col):
+ continue
+
+ if col in int_cols or col in float_cols:
+ continue
+ df = validate_string(df, col)
+
+ return df
+
+
+@log_execution_time()
+def validate_data_with_simple_searching(df, checked_cols, dic_abnormal):
+ # convert data types
+ df = df.convert_dtypes()
+
+ for col in checked_cols:
+ conditions = []
+ results = []
+ for result, vals in dic_abnormal.items():
+ conditions.append(df[col].isin(vals))
+ results.append(result)
+
+ if conditions:
+ df[col] = np.select(conditions, results, df[col])
+
+ return df
+
+
+def check_validate_target_column(col: str):
+ if col in EXCLUDE_COLS:
+ return False
+
+ if col.startswith(Cycle.time.key):
+ return False
+
+ return True
diff --git a/histview2/api/trace_data/services/time_series_chart.py b/histview2/api/trace_data/services/time_series_chart.py
new file mode 100644
index 0000000..9a20841
--- /dev/null
+++ b/histview2/api/trace_data/services/time_series_chart.py
@@ -0,0 +1,2885 @@
+import json
+import re
+import traceback
+from collections import defaultdict, Counter
+from copy import deepcopy
+from itertools import groupby
+from math import ceil
+from typing import List, Dict
+
+import numpy as np
+import pandas as pd
+from loguru import logger
+from numpy import quantile
+from pandas import DataFrame, Series
+from sqlalchemy import and_, or_
+
+from histview2 import db
+from histview2.api.trace_data.services.regex_infinity import validate_data_with_regex, get_changed_value_after_validate, \
+ validate_data_with_simple_searching, check_validate_target_column
+from histview2.common.common_utils import as_list, get_debug_data
+from histview2.common.common_utils import start_of_minute, end_of_minute, convert_time, add_days, gen_sql_label, \
+ gen_sql_like_value, gen_python_regex, chunks, gen_abbr_name
+from histview2.common.constants import *
+from histview2.common.logger import log_execution_time
+from histview2.common.memoize import memoize
+from histview2.common.services.ana_inf_data import calculate_kde_trace_data
+from histview2.common.services.form_env import bind_dic_param_to_class
+from histview2.common.services.request_time_out_handler import request_timeout_handling
+from histview2.common.services.sse import notify_progress
+from histview2.common.services.statistics import calc_summaries, get_mode
+from histview2.common.sigificant_digit import signify_digit
+from histview2.common.trace_data_log import trace_log, TraceErrKey, EventAction, Target, EventType, save_df_to_file
+from histview2.setting_module.models import CfgConstant, CfgProcess, CfgProcessColumn, CfgFilter, CfgFilterDetail, \
+ CfgVisualization
+from histview2.trace_data.models import find_cycle_class, GlobalRelation, Sensor, Cycle, find_sensor_class
+from histview2.trace_data.schemas import DicParam, EndProc, ConditionProc, CategoryProc
+
+
+@log_execution_time('[TRACE DATA]')
+@request_timeout_handling()
+@trace_log((TraceErrKey.TYPE, TraceErrKey.ACTION, TraceErrKey.TARGET),
+ (EventType.FPP, EventAction.PLOT, Target.GRAPH), send_ga=True)
+@memoize(is_save_file=True)
+def gen_graph_fpp(dic_param, max_graph=None):
+ dic_param, cat_exp, cat_procs, dic_cat_filters, use_expired_cache, temp_serial_column, temp_serial_order, \
+ temp_serial_process, temp_x_option, *_ = customize_dic_param_for_reuse_cache(dic_param)
+
+ dic_param, df, orig_graph_param, graph_param, graph_param_with_cate = gen_df(dic_param,
+ _use_expired_cache=use_expired_cache)
+
+ dic_proc_cfgs = get_procs_in_dic_param(graph_param)
+
+ # use for enable and disable index columns
+ all_procs = []
+ all_cols = []
+ for proc in graph_param.array_formval:
+ all_procs.append(proc.proc_id)
+ all_cols.extend(proc.col_ids)
+
+ dic_param[COMMON][DF_ALL_PROCS] = all_procs
+ dic_param[COMMON][DF_ALL_COLUMNS] = all_cols
+
+ if cat_exp:
+ for i, val in enumerate(cat_exp):
+ dic_param[COMMON][f'{CAT_EXP_BOX}{i + 1}'] = val
+ if cat_procs:
+ dic_param[COMMON][CATE_PROCS] = cat_procs
+
+ orig_graph_param = bind_dic_param_to_class(dic_param)
+
+ # order index with other param
+ if temp_x_option:
+ df = check_and_order_data(df, dic_proc_cfgs, temp_x_option, temp_serial_process, temp_serial_column,
+ temp_serial_order)
+ dic_param[COMMON][X_OPTION] = temp_x_option
+ dic_param[COMMON][SERIAL_PROCESS] = temp_serial_process
+ dic_param[COMMON][SERIAL_COLUMNS] = temp_serial_column
+ dic_param[COMMON][SERIAL_ORDER] = temp_serial_order
+
+ # distinct category for filter setting form
+ cate_col_ids = []
+ for proc in graph_param.common.cate_procs or []:
+ cate_col_ids += proc.col_ids
+
+ dic_unique_cate = gen_unique_data(df, dic_proc_cfgs, cate_col_ids)
+ cat_exp_list = gen_unique_data(df, dic_proc_cfgs, graph_param.common.cat_exp)
+ cat_exp_list = list(cat_exp_list.values())
+
+ # filter list
+ df = filter_df(df, dic_cat_filters)
+
+ # reset index (keep sorted position)
+ df.reset_index(inplace=True, drop=True)
+
+ str_cols = dic_param.get(STRING_COL_IDS)
+ dic_str_cols = get_str_cols_in_end_procs(dic_proc_cfgs, orig_graph_param)
+ dic_ranks = gen_before_rank_dict(df, dic_str_cols)
+ dic_data, is_graph_limited = gen_dic_data(df, orig_graph_param, graph_param_with_cate, max_graph)
+ dic_param[IS_GRAPH_LIMITED] = is_graph_limited
+
+ is_thin_data = False
+ # 4000 chunks x 3 values(min,median,max)
+ dic_thin_param = None
+ if len(df) > THIN_DATA_COUNT:
+ is_thin_data = True
+ dic_thin_param = deepcopy(dic_param)
+
+ dic_param = gen_dic_param(df, dic_param, dic_data, dic_proc_cfgs)
+ gen_dic_serial_data_from_df(df, dic_proc_cfgs, dic_param)
+
+ # calculate_summaries
+ calc_summaries(dic_param)
+
+ # calc common scale y min max
+ min_max_list, all_graph_min, all_graph_max = calc_raw_common_scale_y(dic_param[ARRAY_PLOTDATA], str_cols)
+
+ # get min max order columns
+ output_orders = []
+ x_option = graph_param.common.x_option
+ if x_option == 'INDEX' and graph_param.common.serial_columns:
+ group_col = '__group_col__'
+ dic_cfg_cols = {cfg_col.id: cfg_col for cfg_col in
+ CfgProcessColumn.get_by_ids(graph_param.common.serial_columns)}
+ dic_order_cols = {}
+ for order_col_id in graph_param.common.serial_columns:
+ cfg_col = dic_cfg_cols.get(order_col_id)
+ if not cfg_col:
+ continue
+
+ sql_label = gen_sql_label(RANK_COL, cfg_col.id, cfg_col.column_name)
+ if sql_label not in df.columns:
+ sql_label = gen_sql_label(cfg_col.id, cfg_col.column_name)
+ if sql_label not in df.columns:
+ continue
+
+ dic_order_cols[sql_label] = cfg_col
+
+ df_order = df[dic_order_cols]
+ if is_thin_data:
+ count_per_group = ceil(len(df_order) / THIN_DATA_CHUNK)
+ df_order[group_col] = df_order.index // count_per_group
+ df_order = df_order.dropna().groupby(group_col).agg(['min', 'max'])
+ for sql_label, col in dic_order_cols.items():
+ output_orders.append(dict(name=col.name, min=df_order[(sql_label, 'min')].tolist(),
+ max=df_order[(sql_label, 'max')].tolist()))
+ else:
+ for sql_label, col in dic_order_cols.items():
+ output_orders.append(dict(name=col.name, value=df_order[sql_label].tolist()))
+
+ full_arrays = None
+ if is_thin_data:
+ full_arrays = make_str_full_array_y(dic_param)
+ list_summaries = get_summary_infos(dic_param)
+ dic_cat_exp_labels = None
+ if graph_param.common.cat_exp:
+ df, dic_cat_exp_labels = gen_thin_df_cat_exp(dic_param)
+ else:
+ add_serials_to_thin_df(dic_param, df)
+
+ copy_dic_param_to_thin_dic_param(dic_param, dic_thin_param)
+ dic_param = gen_thin_dic_param(df, dic_thin_param, dic_proc_cfgs, dic_cat_exp_labels, dic_ranks)
+ dic_param['is_thin_data'] = is_thin_data
+
+ for i, plot in enumerate(dic_param[ARRAY_PLOTDATA]):
+ plot[SUMMARIES] = list_summaries[i]
+ else:
+ dic_param = gen_category_info(dic_param, dic_ranks)
+ set_str_rank_to_dic_param(dic_param, dic_ranks, full_arrays)
+ set_str_category_data(dic_param, dic_ranks)
+
+ calc_scale_info(dic_param[ARRAY_PLOTDATA], min_max_list, all_graph_min, all_graph_max, str_cols)
+
+ # kde
+ gen_kde_data_trace_data(dic_param, full_arrays)
+
+ # add unique category values
+ for dic_cate in dic_param.get(CATEGORY_DATA) or []:
+ col_id = dic_cate['column_id']
+ dic_cate[UNIQUE_CATEGORIES] = dic_unique_cate[col_id][UNIQUE_CATEGORIES] if dic_unique_cate.get(col_id) else []
+ if len(set(dic_cate.get('data', []))) > 200:
+ dic_cate[IS_OVER_UNIQUE_LIMIT] = True
+ else:
+ dic_cate[IS_OVER_UNIQUE_LIMIT] = False
+
+ dic_param[CAT_EXP_BOX] = cat_exp_list
+ dic_param[INDEX_ORDER_COLS] = output_orders
+ dic_param['proc_name'] = {k: proc.name for (k, proc) in dic_proc_cfgs.items()}
+
+ # remove unnecessary data
+ # if graph_param.common.x_option == 'INDEX':
+ # del dic_param[TIMES]
+
+ return dic_param
+
+
+def customize_dic_param_for_reuse_cache(dic_param):
+ use_expired_cache = False
+ for name in (DIC_CAT_FILTERS, TEMP_CAT_EXP, TEMP_CAT_PROCS, TEMP_X_OPTION, TEMP_SERIAL_PROCESS, TEMP_SERIAL_COLUMN,
+ TEMP_SERIAL_ORDER, MATRIX_COL, COLOR_ORDER):
+ if name in dic_param[COMMON]:
+ use_expired_cache = True
+ break
+ dic_cat_filters = json.loads(dic_param[COMMON].get(DIC_CAT_FILTERS, {})) if isinstance(
+ dic_param[COMMON].get(DIC_CAT_FILTERS, {}), str) else dic_param[COMMON].get(DIC_CAT_FILTERS, {})
+ cat_exp = [int(id) for id in dic_param[COMMON].get(TEMP_CAT_EXP, []) if id]
+ cat_procs = dic_param[COMMON].get(TEMP_CAT_PROCS, [])
+ for name in (DIC_CAT_FILTERS, TEMP_CAT_EXP, TEMP_CAT_PROCS):
+ if name in dic_param[COMMON]:
+ dic_param[COMMON].pop(name)
+ dic_param, temp_x_option, temp_serial_process, temp_serial_column, temp_serial_order = \
+ prepare_temp_x_option(dic_param)
+
+ matrix_col = dic_param[COMMON].get(MATRIX_COL)
+ if matrix_col and isinstance(matrix_col, (list, tuple)):
+ matrix_col = matrix_col[0]
+
+ if matrix_col:
+ matrix_col = int(matrix_col)
+
+ # set default for color order ( default : data value )
+ color_order = dic_param[COMMON].get(COLOR_ORDER)
+ if color_order:
+ color_order = ColorOrder(int(color_order))
+ else:
+ color_order = ColorOrder.DATA
+
+ return dic_param, cat_exp, cat_procs, dic_cat_filters, use_expired_cache, temp_serial_column, temp_serial_order, \
+ temp_serial_process, temp_x_option, matrix_col, color_order
+
+
+@notify_progress(60)
+def gen_graph(dic_param, max_graph=None):
+ dic_param, df, orig_graph_param, graph_param, graph_param_with_cate = gen_df(dic_param)
+ dic_data, is_graph_limited = gen_dic_data(df, orig_graph_param, graph_param_with_cate, max_graph)
+ dic_param = gen_dic_param(df, dic_param, dic_data)
+ dic_param[IS_GRAPH_LIMITED] = is_graph_limited
+
+ return dic_param
+
+
+@log_execution_time()
+def gen_dic_data(df, orig_graph_param, graph_param_with_cate, max_graph=None):
+ # create output data
+ cat_exp_cols = orig_graph_param.common.cat_exp
+ is_graph_limited = False
+ if cat_exp_cols:
+ dic_cfg_cat_exps = {cfg_col.id: cfg_col for cfg_col in CfgProcessColumn.get_by_ids(cat_exp_cols)}
+ dic_data, is_graph_limited = gen_dic_data_cat_exp_from_df(df, orig_graph_param, dic_cfg_cat_exps, max_graph)
+ dic_cates = defaultdict(dict)
+ for proc in orig_graph_param.common.cate_procs:
+ for col_id, col_name in zip(proc.col_ids, proc.col_names):
+ sql_label = gen_sql_label(col_id, col_name)
+ dic_cates[proc.proc_id][col_id] = df[sql_label].tolist() if sql_label in df.columns else []
+
+ dic_data[CATEGORY_DATA] = dic_cates
+ else:
+ dic_data = gen_dic_data_from_df(df, graph_param_with_cate, cat_exp_mode=True)
+
+ return dic_data, is_graph_limited
+
+
+def prepare_temp_x_option(dic_param):
+ params = [TEMP_X_OPTION, TEMP_SERIAL_PROCESS, TEMP_SERIAL_COLUMN, TEMP_SERIAL_ORDER]
+ temp_x_option = dic_param[COMMON].get(TEMP_X_OPTION, '')
+ temp_serial_process = as_list(dic_param[COMMON].get(TEMP_SERIAL_PROCESS))
+ temp_serial_column = as_list(dic_param[COMMON].get(TEMP_SERIAL_COLUMN))
+ temp_serial_order = as_list(dic_param[COMMON].get(TEMP_SERIAL_ORDER))
+
+ for param in params:
+ if param in dic_param[COMMON]:
+ dic_param[COMMON].pop(param)
+
+ return dic_param, temp_x_option, temp_serial_process, temp_serial_column, temp_serial_order
+
+
+@log_execution_time()
+@memoize(is_save_file=True)
+def gen_df(dic_param, _use_expired_cache=False):
+ """tracing data to show graph
+ 1 start point x n end point
+ filter by condition point
+ """
+ # bind dic_param
+ orig_graph_param = bind_dic_param_to_class(dic_param)
+ cat_exp_col = orig_graph_param.common.cat_exp
+
+ graph_param_with_cate = bind_dic_param_to_class(dic_param)
+ graph_param_with_cate.add_cate_procs_to_array_formval()
+
+ graph_param = bind_dic_param_to_class(dic_param)
+
+ # add start proc
+ graph_param.add_start_proc_to_array_formval()
+
+ # add condition procs
+ graph_param.add_cond_procs_to_array_formval()
+
+ # add category
+ if cat_exp_col:
+ graph_param.add_cat_exp_to_array_formval()
+
+ graph_param.add_cate_procs_to_array_formval()
+
+ # get serials
+ dic_proc_cfgs = get_procs_in_dic_param(graph_param)
+ for proc in graph_param.array_formval:
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ serial_ids = [serial.id for serial in proc_cfg.get_serials(column_name_only=False)]
+ proc.add_cols(serial_ids)
+
+ # get order columns
+ if graph_param.common.x_option == 'INDEX':
+ for proc_id, col_id in zip(graph_param.common.serial_processes, graph_param.common.serial_columns):
+ if proc_id and col_id:
+ proc_id = int(proc_id)
+ col_id = int(col_id)
+ graph_param.add_proc_to_array_formval(proc_id, col_id)
+
+ # get data from database
+ df, actual_record_number, is_res_limited = get_data_from_db(graph_param)
+
+ # string columns
+ df, str_cols = rank_str_cols(df, dic_proc_cfgs, orig_graph_param)
+ dic_param[STRING_COL_IDS] = str_cols
+
+ # check filter match or not ( for GUI show )
+ matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids = main_check_filter_detail_match_graph_data(
+ graph_param, df)
+
+ # matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids
+ dic_param[MATCHED_FILTER_IDS] = matched_filter_ids
+ dic_param[UNMATCHED_FILTER_IDS] = unmatched_filter_ids
+ dic_param[NOT_EXACT_MATCH_FILTER_IDS] = not_exact_match_filter_ids
+
+ # apply coef for text
+ df = apply_coef_text(df, graph_param_with_cate, dic_proc_cfgs)
+
+ # order data by order columns
+ x_option = graph_param.common.x_option or 'TIME'
+ serial_processes = graph_param.common.serial_processes or []
+ serial_cols = graph_param.common.serial_columns or []
+ serial_orders = graph_param.common.serial_orders or []
+ df = check_and_order_data(df, dic_proc_cfgs, x_option, serial_processes, serial_cols, serial_orders)
+
+ # flag to show that trace result was limited
+ dic_param[DATA_SIZE] = df.memory_usage(deep=True).sum()
+ dic_param[IS_RES_LIMITED] = is_res_limited
+ dic_param[ACTUAL_RECORD_NUMBER] = actual_record_number
+
+ return dic_param, df, orig_graph_param, graph_param, graph_param_with_cate
+
+
+@log_execution_time()
+def gen_dic_param(df, dic_param, dic_data, dic_proc_cfgs=None, dic_cates=None, dic_org_cates=None,
+ is_get_chart_infos=True):
+ graph_param = bind_dic_param_to_class(dic_param)
+ if not dic_proc_cfgs:
+ dic_proc_cfgs = get_procs_in_dic_param(graph_param)
+
+ times = df[Cycle.time.key].tolist() or []
+ if times and str(times[0])[-1].upper() != 'Z':
+ times = [convert_time(tm) for tm in times if tm]
+
+ # get chart infos
+ chart_infos = None
+ original_graph_configs = None
+ if is_get_chart_infos:
+ chart_infos, original_graph_configs = get_chart_infos(graph_param, dic_data, times)
+
+ dic_param[ARRAY_FORMVAL], dic_param[ARRAY_PLOTDATA] = gen_plotdata_fpp(graph_param, dic_data, chart_infos,
+ original_graph_configs)
+ dic_param[CATEGORY_DATA] = gen_category_data(dic_proc_cfgs, graph_param, dic_cates or dic_data, dic_org_cates)
+ dic_param[TIMES] = times
+
+ if Cycle.id.key in df.columns:
+ dic_param[CYCLE_IDS] = df.id.tolist()
+
+ return dic_param
+
+
+@log_execution_time()
+def rank_str_cols(df: DataFrame, dic_proc_cfgs, graph_param: DicParam):
+ dic_str_cols = get_str_cols_in_end_procs(dic_proc_cfgs, graph_param)
+ str_cols = []
+ for sql_label, (before_rank_label, _, col_id, _) in dic_str_cols.items():
+ if sql_label not in df.columns:
+ continue
+
+ df[before_rank_label] = df[sql_label]
+ df[sql_label] = np.where(df[sql_label].isnull(), df[sql_label], df[sql_label].astype('category').cat.codes + 1)
+
+ df[sql_label] = df[sql_label].convert_dtypes()
+ str_cols.append(col_id)
+
+ return df, str_cols
+
+
+@log_execution_time()
+def get_str_cols_in_end_procs(dic_proc_cfgs, graph_param: DicParam):
+ dic_output = {}
+ for proc in graph_param.array_formval:
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ dic_cols = {col.id: col for col in proc_cfg.get_cols_by_data_type(DataType.TEXT, False)}
+ for col_id, col_name in zip(proc.col_ids, proc.col_names):
+ cfg_col = dic_cols.get(col_id)
+ if cfg_col is None:
+ continue
+
+ rank_col_name = gen_sql_label(col_id, col_name)
+ before_rank_col_name = gen_sql_label(RANK_COL, rank_col_name)
+ dic_output[rank_col_name] = (before_rank_col_name, proc.proc_id, col_id, col_name)
+
+ return dic_output
+
+
+@log_execution_time()
+def gen_before_rank_dict(df: DataFrame, dic_str_cols):
+ dic_output = {}
+ for sql_label, (before_rank_label, _, col_id, _) in dic_str_cols.items():
+ if before_rank_label in df.columns:
+ df_rank = df[[sql_label, before_rank_label]].drop_duplicates().dropna()
+ dic_output[col_id] = dict(zip(df_rank[sql_label], df_rank[before_rank_label]))
+
+ return dic_output
+
+
+@log_execution_time()
+def set_str_rank_to_dic_param(dic_param, dic_ranks, dic_full_array_y=None):
+ for i, plot in enumerate(dic_param[ARRAY_PLOTDATA]):
+ col_id = plot.get(END_COL_ID)
+ dic_col_ranks = dic_ranks.get(col_id)
+ if not dic_col_ranks:
+ continue
+
+ # enc col (show graph)
+ # plot[ARRAY_Y] = reduce_stepped_chart_data(plot.get(ARRAY_Y))
+ plot[ARRAY_Y_MIN] = None
+ plot[ARRAY_Y_MAX] = None
+
+ category_distributed = {}
+ full_dat = dic_full_array_y[i] if dic_full_array_y else plot.get(ARRAY_Y)
+ none_idxs = plot.get(NONE_IDXS)
+ total_counts = 0
+
+ dic_cat_counter = Counter(full_dat)
+ ranks = []
+ before_ranks = []
+ for rank_val, cat_count in dic_cat_counter.items():
+ if rank_val is None:
+ continue
+
+ cat_name = dic_col_ranks.get(rank_val)
+ if cat_name is None:
+ continue
+
+ ranks.append(rank_val)
+ before_ranks.append(cat_name)
+
+ short_name = gen_abbr_name(cat_name)
+ category_distributed[cat_name] = {
+ 'counts': cat_count,
+ 'short_name': short_name,
+ 'counts_org': cat_count,
+ 'pctg': 0,
+ }
+ total_counts += cat_count
+
+ plot[RANK_COL] = [ranks, before_ranks]
+
+ for k, cat in category_distributed.items():
+ cat_dist = signify_digit(cat['counts'] * 100 / total_counts) if total_counts else 0
+ category_distributed[k]['pctg'] = cat_dist
+ category_distributed[k]['counts'] = '{} ({}%)'.format(cat['counts'], cat_dist)
+
+ # show end col summary info
+ series = pd.Series(full_dat)
+ if none_idxs is None:
+ pass
+ else:
+ if none_idxs:
+ series = series[(series.notnull()) | (series.index.isin(none_idxs))]
+ else:
+ series.dropna(inplace=True)
+
+ n_total = len(series)
+ non_na_count = len(series.dropna())
+ na_count = n_total - non_na_count
+
+ step_chart_summary = {
+ N_TOTAL: n_total,
+ N: non_na_count,
+ N_PCTG: signify_digit(100 * non_na_count / n_total) if n_total else 0,
+ N_NA: na_count,
+ N_NA_PCTG: signify_digit(100 * na_count / n_total) if n_total else 0
+ }
+ plot[CAT_DISTRIBUTE] = category_distributed
+ plot[CAT_SUMMARY] = step_chart_summary
+
+
+@log_execution_time()
+def set_str_category_data(dic_param, dic_ranks):
+ for dic_cate in dic_param[CATEGORY_DATA]:
+ col_id = dic_cate.get('column_id')
+ if col_id not in dic_ranks:
+ continue
+
+ dic_cate['data'] = pd.Series(dic_cate.get('data')).map(dic_ranks[col_id]).tolist()
+
+
+@log_execution_time()
+def gen_thin_dic_param(df, dic_param, dic_proc_cfgs, dic_cat_exp_labels=None, dic_ranks=None):
+ # bind dic_param
+ graph_param = bind_dic_param_to_class(dic_param)
+ dic_datetime_serial_cols = get_serials_and_date_col(graph_param, dic_proc_cfgs)
+ dic_str_cols = get_str_cols_in_end_procs(dic_proc_cfgs, graph_param)
+ df_thin, dic_cates, dic_org_cates, group_counts = reduce_data(df, graph_param, dic_str_cols)
+
+ # create output data
+ df_cat_exp = gen_df_thin_values(df, graph_param, df_thin, dic_str_cols)
+ dic_data = gen_dic_data_from_df(df_cat_exp, graph_param, cat_exp_mode=True, dic_cat_exp_labels=dic_cat_exp_labels,
+ calculate_cycle_time=False)
+ dic_param = gen_dic_param(df_cat_exp, dic_param, dic_data, dic_proc_cfgs, dic_cates, dic_org_cates,
+ is_get_chart_infos=False)
+ gen_dic_serial_data_from_df_thin(df_cat_exp, dic_param, dic_datetime_serial_cols, dic_ranks)
+
+ # get start proc time
+ start_tm = start_of_minute(graph_param.common.start_date, graph_param.common.start_time)
+ end_tm = end_of_minute(graph_param.common.end_date, graph_param.common.end_time)
+ threshold_filter_detail_ids = graph_param.common.threshold_boxes
+
+ # gen min max for thin data
+ for plot in dic_param[ARRAY_PLOTDATA]:
+ sql_label = gen_sql_label(plot[END_COL_ID], plot[END_COL_NAME], plot.get(CAT_EXP_BOX))
+ time_label = gen_sql_label(TIMES, sql_label)
+ min_label = gen_sql_label(ARRAY_Y_MIN, sql_label)
+ max_label = gen_sql_label(ARRAY_Y_MAX, sql_label)
+ cycle_label = gen_sql_label(CYCLE_IDS, sql_label)
+
+ if time_label in df_cat_exp.columns:
+ plot[ARRAY_X] = df_cat_exp[time_label].replace({np.nan: None}).tolist()
+ # get chart infos
+ plot[CHART_INFOS_ORG], plot[CHART_INFOS] = get_chart_info_detail(plot[ARRAY_X], plot[END_COL_ID],
+ threshold_filter_detail_ids,
+ plot[END_PROC_ID],
+ graph_param.common.start_proc,
+ start_tm, end_tm,
+ dic_param[TIMES])
+
+ if min_label in df_cat_exp.columns:
+ plot[ARRAY_Y_MIN] = df_cat_exp[min_label].tolist()
+
+ if max_label in df_cat_exp.columns:
+ plot[ARRAY_Y_MAX] = df_cat_exp[max_label].tolist()
+
+ if cycle_label in df_cat_exp.columns:
+ plot[CYCLE_IDS] = df_cat_exp[cycle_label].tolist()
+
+ if plot[END_COL_ID] in dic_ranks:
+ # category variable
+ p_array_y = pd.Series(plot[ARRAY_Y]).dropna().tolist()
+ cat_size = 0
+ if len(p_array_y):
+ cat_size = np.unique(p_array_y).size
+ plot[CAT_TOTAL] = cat_size
+ plot[IS_CAT_LIMITED] = True if cat_size >= MAX_CATEGORY_SHOW else False
+
+ # ignore show none value in thin mode
+ plot[NONE_IDXS] = []
+
+ # group count
+ dic_param[THIN_DATA_GROUP_COUNT] = group_counts
+
+ return dic_param
+
+
+def make_str_full_array_y(dic_param):
+ return [plot[ARRAY_Y] for plot in dic_param[ARRAY_PLOTDATA]]
+
+
+def get_summary_infos(dic_param):
+ return [plot[SUMMARIES] for plot in dic_param[ARRAY_PLOTDATA]]
+
+
+@log_execution_time()
+def check_and_order_data(df, dic_proc_cfgs, x_option='TIME', serial_processes=[], serial_cols=[], serial_orders=[]):
+ if x_option.upper() == 'TIME':
+ df = df.sort_values(Cycle.time.key, ascending=True)
+ return df
+
+ cols = []
+ orders = []
+ for proc_id in set(serial_processes):
+ if not proc_id:
+ continue
+
+ proc_cfg: CfgProcess = dic_proc_cfgs.get(int(proc_id))
+ if not proc_cfg:
+ continue
+ order_cols: List[CfgProcessColumn] = proc_cfg.get_order_cols(column_name_only=False)
+
+ if not order_cols:
+ continue
+
+ dic_order_cols = {col.id: gen_sql_label(col.id, col.column_name) for col in order_cols}
+ for col_id, order in zip(serial_cols, serial_orders):
+ if not col_id:
+ continue
+ col_label = dic_order_cols.get(int(col_id))
+ if col_label and col_label in df.columns and col_label not in cols:
+ cols.append(dic_order_cols.get(int(col_id)))
+ orders.append(bool(int(order)))
+
+ if cols:
+ df = df.sort_values(cols, ascending=orders)
+
+ return df
+
+
+def gen_blank_df_end_col(proc: EndProc, columns):
+ dic_cols = {}
+ for cfg_col in columns:
+ if cfg_col.column_name not in proc.col_names:
+ continue
+
+ name = gen_sql_label(cfg_col.id, cfg_col.column_name)
+ dic_cols[name] = []
+
+ dic_cols.update({Cycle.id.key: [], Cycle.global_id.key: [], Cycle.time.key: []})
+ return pd.DataFrame(dic_cols)
+
+
+def gen_blank_df_end_cols(procs: List[EndProc]):
+ params = dict()
+ for proc in procs:
+ params.update({gen_sql_label(col_id, proc.col_names[idx]): [] for idx, col_id in enumerate(proc.col_ids)})
+ params.update({'{}{}'.format(Cycle.time.key, create_rsuffix(proc.proc_id)): []})
+ params.update({Cycle.id.key: [], Cycle.global_id.key: [], Cycle.time.key: []})
+
+ df = pd.DataFrame(params)
+ df = df.append(pd.Series(), ignore_index=True)
+ return df.replace({np.nan: ''})
+
+
+def gen_df_end(proc: EndProc, start_relate_ids=None, start_tm=None, end_tm=None):
+ proc_id = proc.proc_id
+
+ # get serials
+ cfg_cols = CfgProcessColumn.get_all_columns(proc_id)
+ serials = [col for col in cfg_cols if col.is_serial_no]
+ serials = [gen_sql_label(serial.id, serial.column_name) for serial in serials]
+
+ # get sensor values
+ df_end = get_sensor_values(proc, start_relate_ids=start_relate_ids, start_tm=start_tm, end_tm=end_tm)
+ if df_end.empty:
+ df_end = gen_blank_df_end_col(proc, cfg_cols)
+
+ # filter duplicate
+ if df_end.columns.size:
+ df_end = df_end[df_end.eval('global_id.notnull()')]
+
+ # drop duplicate
+ if df_end.columns.size and serials:
+ cols = [col for col in serials if col in df_end.columns]
+ if cols:
+ df_end = df_end.drop_duplicates(subset=cols, keep='last')
+
+ # set index
+ if df_end.columns.size:
+ df_end.set_index(Cycle.global_id.key, inplace=True)
+
+ return df_end
+
+
+def gen_df_end_same_with_start(proc: EndProc, start_proc_id, start_tm, end_tm, drop_duplicate=True,
+ with_limit=None):
+ # proc_id = proc.proc_id
+
+ # get serials
+ serials = CfgProcessColumn.get_serials(start_proc_id)
+ serials = [gen_sql_label(serial.id, serial.column_name) for serial in serials]
+
+ # get sensor values
+ df_end = get_sensor_values(proc, start_tm=start_tm, end_tm=end_tm, use_global_id=False, with_limit=with_limit)
+ if df_end.empty:
+ return pd.DataFrame()
+
+ df_end.set_index(Cycle.id.key, inplace=True)
+
+ # if only 1 proc, show all data without filter duplicate
+ if drop_duplicate and len(serials): # TODO ask PO
+ cols = [col for col in serials if col in df_end.columns]
+ if cols:
+ df_end.drop_duplicates(subset=cols, keep='last', inplace=True)
+
+ return df_end
+
+
+def filter_proc_same_with_start(proc: ConditionProc, start_tm, end_tm, with_limit=None):
+ if not proc.dic_col_id_filters:
+ return None
+
+ cond_records = get_cond_data(proc, start_tm=start_tm, end_tm=end_tm, use_global_id=False, with_limit=with_limit)
+ # important : None is no filter, [] is no data
+ if cond_records is None:
+ return None
+
+ return [cycle.id for cycle in cond_records]
+
+
+def filter_proc(proc: ConditionProc, start_relate_ids=None, start_tm=None, end_tm=None):
+ if not proc.dic_col_id_filters:
+ return None
+
+ cond_records = get_cond_data(proc, start_relate_ids=start_relate_ids, start_tm=start_tm, end_tm=end_tm)
+ # important : None is no filter, [] is no data
+ if cond_records is None:
+ return None
+
+ return [cycle.global_id for cycle in cond_records]
+
+
+def create_rsuffix(proc_id):
+ return '_{}'.format(proc_id)
+
+
+@log_execution_time()
+@notify_progress(30)
+@memoize(is_save_file=True)
+def graph_one_proc(proc_id, start_tm, end_tm, cond_procs, end_procs, sql_limit, same_proc_only=False,
+ with_time_order=True):
+ """ get data from database
+
+ Arguments:
+ trace {[type]} -- [description]
+ dic_param {[type]} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+
+ # start proc
+ data = get_start_proc_data(proc_id, start_tm, end_tm, with_limit=sql_limit, with_time_order=with_time_order)
+ # no data
+ if not data:
+ return gen_blank_df()
+
+ df_start = pd.DataFrame(data)
+ df_start.set_index(Cycle.id.key, inplace=True)
+
+ # condition
+ for proc in cond_procs:
+ if same_proc_only and proc.proc_id != proc_id:
+ continue
+
+ ids = filter_proc_same_with_start(proc, start_tm, end_tm, with_limit=sql_limit)
+ if ids is None:
+ continue
+
+ df_start = df_start[df_start.index.isin(ids)]
+
+ # end proc
+ for proc in end_procs:
+ if same_proc_only and proc.proc_id != proc_id:
+ continue
+ df_end = gen_df_end_same_with_start(proc, proc_id, start_tm, end_tm, drop_duplicate=False)
+ df_start = df_start.join(df_end, rsuffix=create_rsuffix(proc.proc_id)).reset_index()
+
+ return df_start
+
+
+@log_execution_time()
+@notify_progress(30)
+@memoize(is_save_file=True)
+def graph_many_proc(start_proc_id, start_tm, end_tm, cond_procs: List[ConditionProc], end_procs: List[EndProc],
+ sql_limit, with_time_order=True):
+ """ get data from database
+
+ Arguments:
+ trace {[type]} -- [description]
+ dic_param {[type]} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+ # without relate
+ data = get_start_proc_data(start_proc_id, start_tm, end_tm, with_limit=sql_limit, with_time_order=with_time_order)
+ # no data
+ if not data:
+ return gen_blank_df(), False
+
+ df_start = pd.DataFrame(data)
+
+ # with relate
+ data_with_relate_id = get_start_proc_data_with_relate_id(start_proc_id, start_tm, end_tm, with_limit=sql_limit)
+ if data_with_relate_id:
+ df_start_with_relate_id = pd.DataFrame(data_with_relate_id)
+ df_start = df_start.append(df_start_with_relate_id, ignore_index=True)
+
+ # downcast data type
+ # data_types = {Cycle.global_id.key: np.int64, Cycle.is_outlier.key: 'category'}
+ # for col in data_types:
+ # df_start[col].replace({np.nan: None}, inplace=True)
+ # df_start = df_start.astype(data_types)
+
+ start_relate_ids = list(df_start[df_start.eval('global_id.notnull()')][Cycle.global_id.key])
+
+ is_res_limited = True
+ if len(start_relate_ids) < 5000:
+ start_relate_ids = [start_relate_ids[x:x + 900] for x in range(0, len(start_relate_ids), 900)]
+ is_res_limited = False
+ else:
+ start_relate_ids = None
+
+ # set index
+ df_start.set_index(Cycle.id.key, drop=False, inplace=True)
+
+ # condition that same with start
+ cycle_ids = None
+ is_filter = False
+ for proc in cond_procs:
+ if not proc.proc_id == start_proc_id:
+ continue
+
+ ids = filter_proc_same_with_start(proc, start_tm, end_tm, with_limit=sql_limit)
+ if ids is None:
+ continue
+
+ if cycle_ids is None:
+ cycle_ids = set(ids)
+ else:
+ cycle_ids.intersection_update(ids)
+
+ is_filter = True
+
+ if is_filter:
+ df_start = df_start[df_start.index.isin(cycle_ids)]
+ if not df_start.columns.size:
+ return gen_blank_df(), False
+
+ # end proc that same with start
+ for proc in end_procs:
+ if not proc.proc_id == start_proc_id:
+ continue
+
+ # get sensor value data
+ df_end = gen_df_end_same_with_start(proc, proc.proc_id, start_tm, end_tm, with_limit=sql_limit)
+ df_start = df_start.join(df_end, how='inner', rsuffix=create_rsuffix(proc.proc_id))
+
+ if not df_start.columns.size:
+ return gen_blank_df(), False
+
+ # get min max time {proc_id:[min,max]}
+ e_start_tm = convert_time(start_tm, return_string=False)
+ e_start_tm = add_days(e_start_tm, -14)
+ e_start_tm = convert_time(e_start_tm)
+ e_end_tm = convert_time(end_tm, return_string=False)
+ e_end_tm = add_days(e_end_tm, 14)
+ e_end_tm = convert_time(e_end_tm)
+
+ global_ids = None
+ is_filter = False
+ for proc in cond_procs:
+ if proc.proc_id == start_proc_id:
+ continue
+
+ ids = filter_proc(proc, start_relate_ids, e_start_tm, e_end_tm)
+ if ids is None:
+ continue
+
+ if global_ids is None:
+ global_ids = set(ids)
+ else:
+ global_ids.intersection_update(ids)
+
+ is_filter = True
+
+ if is_filter:
+ if data_with_relate_id:
+ idxs = df_start[df_start[Cycle.global_id.key].isin(global_ids)].index
+ idxs = set(idxs)
+ df_start = df_start.loc[idxs]
+ else:
+ df_start = df_start[df_start[Cycle.global_id.key].isin(global_ids)]
+
+ # set new Index
+ df_start.set_index(Cycle.global_id.key, inplace=True)
+
+ # end proc
+ for proc in end_procs:
+ if proc.proc_id == start_proc_id:
+ continue
+
+ df_end = gen_df_end(proc, start_relate_ids, e_start_tm, e_end_tm)
+ df_start = df_start.join(df_end, rsuffix=create_rsuffix(proc.proc_id))
+
+ # group by cycle id to drop duplicate ( 1:n with global relation)
+ df_start.set_index(Cycle.id.key, inplace=True)
+ if data_with_relate_id:
+ df_start = df_start.groupby(df_start.index).first().reset_index()
+
+ # sort by time
+ if with_time_order:
+ df_start.sort_values(Cycle.time.key, inplace=True)
+
+ return df_start, is_res_limited
+
+
+@notify_progress(40)
+@log_execution_time()
+@trace_log((TraceErrKey.ACTION, TraceErrKey.TARGET), (EventAction.READ, Target.DATABASE))
+def get_data_from_db(graph_param: DicParam, with_time_order=True, is_save_df_to_file=True):
+ # DEBUG Function
+ if get_debug_data(DebugKey.IS_DEBUG_MODE.name):
+ df = get_debug_data(DebugKey.GET_DATA_FROM_DB.name)
+ return df, df.index.size, None
+
+ # with limit
+ sql_limit = SQL_LIMIT
+
+ # start proc
+ start_tm = start_of_minute(graph_param.common.start_date, graph_param.common.start_time)
+ end_tm = end_of_minute(graph_param.common.end_date, graph_param.common.end_time)
+
+ is_res_limited = False
+ proc_ids = get_proc_ids_in_dic_param(graph_param)
+ if len(proc_ids) == 1:
+ df = graph_one_proc(graph_param.common.start_proc, start_tm, end_tm, graph_param.common.cond_procs,
+ graph_param.array_formval, sql_limit,
+ with_time_order=with_time_order)
+ else:
+ df, is_res_limited = graph_many_proc(graph_param.common.start_proc, start_tm, end_tm,
+ graph_param.common.cond_procs, graph_param.array_formval, sql_limit,
+ with_time_order=with_time_order)
+
+ # reset index
+ df.reset_index(inplace=True)
+
+ # save log
+ if is_save_df_to_file:
+ save_df_to_file(df)
+
+ # with limit
+ actual_record_number = df.index.size
+
+ # graph_param.common.is_validate_data = True
+ if graph_param.common.is_validate_data:
+ df = validate_data(df)
+
+ return df, actual_record_number, is_res_limited
+
+
+@log_execution_time()
+def validate_data(df: DataFrame):
+ if len(df) > THIN_DATA_COUNT:
+ df_before = get_sample_df(df)
+ df_before = df_before.convert_dtypes()
+ df_after = validate_data_with_regex(df_before)
+ checked_cols, dic_abnormal = get_changed_value_after_validate(df_before, df_after)
+ df = validate_data_with_simple_searching(df, checked_cols, dic_abnormal)
+ else:
+ df = validate_data_with_regex(df)
+
+ return df
+
+
+@log_execution_time()
+def get_sample_df(df):
+ sample_df = df.head(THIN_DATA_COUNT)
+ number_cols = df.select_dtypes(include=['integer', 'float']).columns.tolist()
+ for col in number_cols:
+ if not check_validate_target_column(col):
+ continue
+ try:
+ min_idx = df[col].idxmin()
+ max_idx = df[col].idxmax()
+ sample_df = sample_df.append(df.loc[min_idx], ignore_index=True)
+ sample_df = sample_df.append(df.loc[max_idx], ignore_index=True)
+ except Exception:
+ pass
+
+ return sample_df
+
+
+@log_execution_time()
+def gen_df_thin_values(df: DataFrame, graph_param: DicParam, df_thin, dic_str_cols):
+ thin_idxs_len = len(df_thin)
+ thin_boxes = [None] * thin_idxs_len
+ df_cat_exp = pd.DataFrame()
+ df_cat_exp[Cycle.time.key] = thin_boxes
+
+ # df_cat_exp[Cycle.time.key] = df_thin[Cycle.time.key]
+ if CAT_EXP_BOX in df_thin.columns:
+ df_cat_exp[CAT_EXP_BOX] = df_thin[CAT_EXP_BOX]
+
+ series = pd.Series(thin_boxes, index=df_thin.index)
+ for proc in graph_param.array_formval:
+ orig_sql_label_serial = gen_sql_label(SERIAL_DATA, proc.proc_id)
+ time_col_alias = '{}_{}'.format(Cycle.time.key, proc.proc_id)
+
+ for col_id, col_name in zip(proc.col_ids, proc.col_names):
+ col_id_name = gen_sql_label(col_id, col_name)
+ cols_in_df = [col for col in df_thin.columns if col.startswith(col_id_name)]
+ target_col_info = dic_str_cols.get(col_id_name)
+ for sql_label in cols_in_df:
+ sql_label_min = gen_sql_label(ARRAY_Y_MIN, sql_label)
+ sql_label_max = gen_sql_label(ARRAY_Y_MAX, sql_label)
+ sql_label_cycle = gen_sql_label(CYCLE_IDS, sql_label)
+ sql_label_serial = gen_sql_label(SERIAL_DATA, sql_label)
+ sql_label_time = gen_sql_label(TIMES, sql_label)
+ idxs = df_thin[sql_label].notnull()
+
+ if not len(idxs) or not len(df_thin[idxs]):
+ df_cat_exp[sql_label] = thin_boxes
+ df_cat_exp[sql_label_min] = thin_boxes
+ df_cat_exp[sql_label_max] = thin_boxes
+ continue
+
+ # before rank
+ if target_col_info:
+ rows = df_thin[sql_label]
+ df_cat_exp[sql_label] = rows
+ df_cat_exp[sql_label_min] = thin_boxes
+ df_cat_exp[sql_label_max] = thin_boxes
+ continue
+
+ min_idxs, med_idxs, max_idxs = list(zip(*df_thin.loc[idxs, sql_label]))
+ min_idxs, med_idxs, max_idxs = list(min_idxs), list(med_idxs), list(max_idxs)
+ series[:] = None
+ series[idxs] = df.loc[med_idxs, sql_label].values
+ df_cat_exp[sql_label] = series
+
+ # time start proc
+ if Cycle.time.key in df.columns:
+ series[:] = None
+ series[idxs] = df.loc[med_idxs, Cycle.time.key].values
+ df_cat_exp[Cycle.time.key] = np.where(series.isnull(), df_cat_exp[Cycle.time.key], series)
+
+ # time end proc
+ if time_col_alias in df.columns:
+ series[:] = None
+ series[idxs] = df.loc[med_idxs, time_col_alias].values
+ df_cat_exp[sql_label_time] = series
+
+ # cycle ids
+ if Cycle.id.key in df.columns:
+ series[:] = None
+ series[idxs] = df.loc[med_idxs, Cycle.id.key].values
+ df_cat_exp[sql_label_cycle] = series
+
+ # serial ids
+ if orig_sql_label_serial in df.columns:
+ series[:] = None
+ series[idxs] = df.loc[med_idxs, orig_sql_label_serial].values
+ df_cat_exp[sql_label_serial] = series
+
+ # add min value to median position
+ series[:] = None
+ series[idxs] = df.loc[min_idxs, sql_label].values
+ df_cat_exp[sql_label_min] = series
+
+ # add max value to median position
+ series[:] = None
+ series[idxs] = df.loc[max_idxs, sql_label].values
+ df_cat_exp[sql_label_max] = series
+
+ return df_cat_exp
+
+
+@log_execution_time()
+def gen_dic_data_from_df(df: DataFrame, graph_param: DicParam, cat_exp_mode=None, dic_cat_exp_labels=None,
+ calculate_cycle_time=True):
+ """
+ :param df:
+ :param graph_param:
+ :param cat_exp_mode:
+ :param dic_cat_exp_labels:
+ :param calculate_cycle_time:
+ :return:
+ """
+ dic_data = defaultdict(dict)
+ blank_vals = [None] * df.index.size
+ for proc in graph_param.array_formval:
+ # TODO: CfgProcessColumn call many times because outside loop
+ dic_datetime_cols = {cfg_col.id: cfg_col for cfg_col in
+ CfgProcessColumn.get_by_data_type(proc.proc_id, DataType.DATETIME)}
+ dic_data_cat_exp = defaultdict(list)
+ for col_id, col_name in zip(proc.col_ids, proc.col_names):
+ col_id_name = gen_sql_label(col_id, col_name)
+ sql_labels = [col for col in df.columns if col.startswith(col_id_name)]
+ series_lst = []
+ for sql_label in sql_labels:
+ if sql_label in df.columns:
+ if calculate_cycle_time and col_id in dic_datetime_cols:
+ series = pd.to_datetime(df[sql_label])
+ series.sort_values(inplace=True)
+ series = series.diff().dt.total_seconds()
+ series.sort_index(inplace=True)
+ df[sql_label] = series
+ else:
+ series = df[sql_label]
+
+ series = series.replace({np.nan: None}).tolist()
+ else:
+ series = blank_vals
+
+ series_lst.append(series)
+ if dic_cat_exp_labels:
+ sql_label_vals = dic_cat_exp_labels.get(sql_label)
+ if sql_label_vals:
+ dic_data_cat_exp[col_id].append(sql_label_vals[2])
+
+ if series_lst:
+ dic_data[proc.proc_id][col_id] = series_lst if cat_exp_mode else series_lst[0]
+ else:
+ dic_data[proc.proc_id][col_id] = []
+
+ if len(dic_data_cat_exp):
+ dic_data[proc.proc_id][CAT_EXP_BOX] = dic_data_cat_exp
+
+ time_col_alias = '{}_{}'.format(Cycle.time.key, proc.proc_id)
+ if time_col_alias in df:
+ dic_data[proc.proc_id][Cycle.time.key] = df[time_col_alias].replace({np.nan: None}).tolist()
+ else:
+ dic_data[proc.proc_id][Cycle.time.key] = []
+
+ # if CAT_EXP_BOX in df.columns:
+ # dic_data[CAT_EXP_BOX] = df[CAT_EXP_BOX].tolist()
+
+ return dic_data
+
+
+@log_execution_time()
+def gen_dic_data_cat_exp_from_df(df: DataFrame, graph_param: DicParam, dic_cfg_cat_exps, max_graph=None):
+ is_graph_limited = False
+ dic_data = defaultdict(dict)
+ if not len(df):
+ return dic_data
+
+ cat_exp_cols = gen_cat_exp_names(graph_param.common.cat_exp)
+ for cat_exp_col, cat_exp_label in zip(graph_param.common.cat_exp, cat_exp_cols):
+ if cat_exp_label not in df.columns:
+ cfg_cat_exp = dic_cfg_cat_exps[cat_exp_col]
+ sql_label = gen_sql_label(cfg_cat_exp.id, cfg_cat_exp.column_name)
+ df[cat_exp_label] = df[sql_label]
+
+ df_group = df.groupby(cat_exp_cols, dropna=False)
+ dic_df_group = {key: df_sub for key, df_sub in df_group}
+
+ blank_vals = [None] * len(df)
+ series = pd.Series(blank_vals, index=df.index)
+ graph_count = 0
+ for proc in graph_param.array_formval:
+ if max_graph and graph_count >= max_graph:
+ is_graph_limited = True
+ break
+
+ dic_datetime_cols = {cfg_col.id: cfg_col for cfg_col in
+ CfgProcessColumn.get_by_data_type(proc.proc_id, DataType.DATETIME)}
+ dic_none_idxs = defaultdict(list)
+ dic_cat_exp_names = defaultdict(list)
+ time_col_alias = '{}_{}'.format(Cycle.time.key, proc.proc_id)
+ if time_col_alias in df:
+ dic_data[proc.proc_id][Cycle.time.key] = df[time_col_alias].replace({np.nan: None}).tolist()
+ else:
+ dic_data[proc.proc_id][Cycle.time.key] = []
+
+ for col_id, col_name in zip(proc.col_ids, proc.col_names):
+ if max_graph and graph_count >= max_graph:
+ is_graph_limited = True
+ break
+
+ sql_label = gen_sql_label(col_id, col_name)
+ if sql_label not in df.columns:
+ dic_data[proc.proc_id][col_id] = blank_vals
+ dic_none_idxs[col_id].append(list(range(len(df))))
+ dic_cat_exp_names[col_id].append(NA_STR)
+ continue
+
+ plots = []
+ # for cat_exp_val, idxs in df_group.groups.items():
+ for cat_exp_val, df_sub in dic_df_group.items():
+ if graph_count >= 20:
+ break
+
+ idxs = df_sub.index
+ if not len(idxs):
+ continue
+
+ series[:] = None
+ temp_series: Series = df_sub[sql_label]
+ if col_id in dic_datetime_cols:
+ temp_series = pd.to_datetime(temp_series)
+ temp_series.sort_values(inplace=True)
+ temp_series = temp_series.diff().dt.total_seconds()
+ temp_series.sort_index(inplace=True)
+
+ nan_idxs = temp_series.isnull()
+ nan_series = temp_series[nan_idxs]
+ if len(temp_series) == len(nan_series):
+ continue
+
+ series[idxs] = temp_series.tolist()
+ if len(nan_series):
+ series[nan_series.index] = None
+
+ plots.append(series.tolist())
+ dic_none_idxs[col_id].append(nan_series.index.tolist())
+ dic_cat_exp_names[col_id].append(NA_STR if cat_exp_val is None or pd.isna(cat_exp_val) else cat_exp_val)
+
+ graph_count += 1
+
+ if plots:
+ dic_data[proc.proc_id][col_id] = plots
+ dic_data[proc.proc_id][CAT_EXP_BOX] = dic_cat_exp_names
+ dic_data[proc.proc_id][NONE_IDXS] = dic_none_idxs
+
+ return dic_data, is_graph_limited
+
+
+@log_execution_time()
+def gen_dic_serial_data_from_df(df: DataFrame, dic_proc_cfgs, dic_param):
+ dic_param[SERIAL_DATA] = dict()
+ dic_param[COMMON_INFO] = dict()
+ for proc_id, proc_cfg in dic_proc_cfgs.items():
+ serial_cols = proc_cfg.get_serials(column_name_only=False)
+ datetime_col = proc_cfg.get_date_col(column_name_only=False)
+ if datetime_col:
+ datetime_col = datetime_col.name
+ sql_labels = [gen_sql_label(serial_col.id, serial_col.column_name) for serial_col in serial_cols]
+ before_rank_sql_labels = [gen_sql_label(RANK_COL, sql_label) for sql_label in sql_labels]
+ serial_cols = [serial_col.name for serial_col in serial_cols]
+ dic_param[COMMON_INFO][proc_id] = {
+ DATETIME_COL: datetime_col or '',
+ SERIAL_COLUMNS: serial_cols,
+ }
+ cols = []
+ for sql_label, before_rank_label in zip(sql_labels, before_rank_sql_labels):
+ if before_rank_label in df.columns:
+ cols.append(before_rank_label)
+ else:
+ cols.append(sql_label)
+
+ is_not_exist = set(cols) - set(list(df.columns))
+ if not is_not_exist and cols:
+ dic_param[SERIAL_DATA][proc_id] = df[cols].replace({np.nan: ''}).to_records(index=False).tolist()
+ else:
+ dic_param[SERIAL_DATA][proc_id] = []
+
+
+@log_execution_time()
+def gen_dic_serial_data_from_df_thin(df: DataFrame, dic_param, dic_datetime_serial_cols, dic_ranks):
+ dic_param[COMMON_INFO] = {}
+
+ for plot in dic_param[ARRAY_PLOTDATA]:
+ col_id = plot[END_COL_ID]
+ if col_id in dic_ranks:
+ continue
+
+ proc_id = plot[END_PROC_ID]
+ col_name = plot[END_COL_NAME]
+ cat_exp = plot.get(CAT_EXP_BOX)
+ datetime_col, serial_cols = dic_datetime_serial_cols.get(proc_id, (None, None))
+ if datetime_col:
+ dic_param[COMMON_INFO][proc_id] = {
+ DATETIME_COL: datetime_col.name,
+ SERIAL_COLUMNS: [serial_col.name for serial_col in serial_cols],
+ }
+
+ sql_label = gen_sql_label(col_id, col_name, cat_exp)
+ sql_label = gen_sql_label(SERIAL_DATA, sql_label)
+ if sql_label in df.columns:
+ plot[SERIAL_DATA] = df[sql_label].tolist()
+ else:
+ plot[SERIAL_DATA] = []
+
+
+@log_execution_time()
+def get_start_proc_data_with_relate_id(proc_id, start_tm, end_tm, with_limit=None):
+ """
+ inner join with relate table
+ :param proc_id:
+ :param start_tm:
+ :param end_tm:
+ :param with_limit:
+ :return:
+ """
+ # start proc subquery
+ cycle_cls = find_cycle_class(proc_id)
+ data = db.session.query(cycle_cls.id, GlobalRelation.relate_id.label(Cycle.global_id.key), cycle_cls.time,
+ cycle_cls.is_outlier)
+ data = data.filter(cycle_cls.process_id == proc_id)
+ data = data.filter(cycle_cls.time >= start_tm)
+ data = data.filter(cycle_cls.time < end_tm)
+
+ # join global relation
+ data = data.join(GlobalRelation, GlobalRelation.global_id == cycle_cls.global_id)
+
+ if with_limit:
+ data = data.limit(with_limit)
+
+ data = data.all()
+
+ return data
+
+
+@log_execution_time()
+def get_start_proc_data(proc_id, start_tm, end_tm, with_limit=None, with_time_order=None):
+ """
+ get start proc only (with out relation)
+ :param proc_id:
+ :param start_tm:
+ :param end_tm:
+ :param with_limit:
+ :param with_time_order:
+ :return:
+ """
+ cycle_cls = find_cycle_class(proc_id)
+ cycle = db.session.query(cycle_cls.id, cycle_cls.global_id, cycle_cls.time, cycle_cls.is_outlier)
+ cycle = cycle.filter(cycle_cls.process_id == proc_id)
+ cycle = cycle.filter(cycle_cls.time >= start_tm)
+ cycle = cycle.filter(cycle_cls.time < end_tm)
+
+ if with_time_order:
+ cycle = cycle.order_by(cycle_cls.time)
+
+ if with_limit:
+ cycle = cycle.limit(with_limit)
+
+ cycle = cycle.all()
+
+ return cycle
+
+
+def get_sensor_values_chunk(data_query, chunk_sensor, dic_sensors, cycle_cls, start_relate_ids=None, start_tm=None,
+ end_tm=None, with_limit=None):
+ for col_id, col_name in chunk_sensor:
+ if col_name not in dic_sensors:
+ continue
+ sensor = dic_sensors[col_name]
+ sensor_val_cls = find_sensor_class(sensor.id, DataType(sensor.type), auto_alias=True)
+ sensor_val = sensor_val_cls.coef(col_id)
+
+ data_query = data_query.outerjoin(
+ sensor_val_cls,
+ and_(sensor_val_cls.cycle_id == cycle_cls.id, sensor_val_cls.sensor_id == sensor.id)
+ )
+
+ data_query = data_query.add_columns(sensor_val)
+
+ # chunk
+ if start_relate_ids:
+ records = []
+ for ids in start_relate_ids:
+ temp = data_query.filter(cycle_cls.global_id.in_(ids))
+ records += temp.all()
+ id_key = Cycle.global_id.key
+ else:
+ data_query = data_query.filter(cycle_cls.time >= start_tm)
+ data_query = data_query.filter(cycle_cls.time < end_tm)
+ if with_limit:
+ data_query = data_query.limit(with_limit)
+
+ records = data_query.all()
+ id_key = Cycle.id.key
+
+ if records:
+ return pd.DataFrame(records)
+ else:
+ params = {gen_sql_label(col_id, col_name) for col_id, col_name in chunk_sensor}
+ params.update({
+ id_key: [],
+ Cycle.time.key: [],
+ })
+ df_chunk = pd.DataFrame({gen_sql_label(col_id, col_name): [] for col_id, col_name in chunk_sensor})
+ return df_chunk
+
+
+@log_execution_time()
+def get_sensor_values(proc: EndProc, start_relate_ids=None, start_tm=None, end_tm=None, use_global_id=True,
+ with_limit=None):
+ """gen inner join sql for all column in 1 proc
+
+ Arguments:
+ proc_id {[string]} -- [process id]
+ cols {[list]} -- [column name list]
+ """
+ dic_sensors = gen_dic_sensors(proc.proc_id, proc.col_names)
+
+ cycle_cls = find_cycle_class(proc.proc_id)
+ if use_global_id:
+ data = db.session.query(cycle_cls.global_id, cycle_cls.time)
+ else:
+ data = db.session.query(cycle_cls.id, cycle_cls.time)
+
+ data = data.filter(cycle_cls.process_id == proc.proc_id)
+ dataframes = []
+ all_sensors = list(zip(proc.col_ids, proc.col_names))
+ for idx, chunk_sensor in enumerate(chunks(all_sensors, 50)):
+ df_chunk = get_sensor_values_chunk(data, chunk_sensor, dic_sensors, cycle_cls, start_relate_ids, start_tm,
+ end_tm, with_limit=with_limit)
+ if idx != 0 and Cycle.time.key in df_chunk.columns:
+ df_chunk = df_chunk.drop(Cycle.time.key, axis=1)
+ dataframes.append(df_chunk)
+
+ df = pd.DataFrame()
+ if dataframes:
+ df = pd.concat([dfc.set_index(dfc.columns[0]) for dfc in dataframes], ignore_index=False, axis=1).reset_index()
+ return df
+
+
+@log_execution_time()
+def get_cond_data(proc: ConditionProc, start_relate_ids=None, start_tm=None, end_tm=None, use_global_id=True,
+ with_limit=None):
+ """generate subquery for every condition procs
+ """
+ # get sensor info ex: sensor id , data type (int,real,text)
+ filter_query = Sensor.query.filter(Sensor.process_id == proc.proc_id)
+
+ # filter
+ cycle_cls = find_cycle_class(proc.proc_id)
+ if use_global_id:
+ data = db.session.query(cycle_cls.global_id)
+ else:
+ data = db.session.query(cycle_cls.id)
+
+ data = data.filter(cycle_cls.process_id == proc.proc_id)
+
+ # for filter_sensor in filter_sensors:
+ for col_name, filter_details in proc.dic_col_name_filters.items():
+ sensor = filter_query.filter(Sensor.column_name == col_name).first()
+ sensor_val = find_sensor_class(sensor.id, DataType(sensor.type), auto_alias=True)
+
+ ands = []
+ for filter_detail in filter_details:
+ comp_ins = []
+ comp_likes = []
+ comp_regexps = []
+ cfg_filter_detail: CfgFilterDetail
+ for cfg_filter_detail in filter_detail.cfg_filter_details:
+ val = cfg_filter_detail.filter_condition
+ if cfg_filter_detail.filter_function == FilterFunc.REGEX.name:
+ comp_regexps.append(val)
+ elif not cfg_filter_detail.filter_function \
+ or cfg_filter_detail.filter_function == FilterFunc.MATCHES.name:
+ comp_ins.append(val)
+ else:
+ comp_likes.extend(gen_sql_like_value(val, FilterFunc[cfg_filter_detail.filter_function],
+ position=cfg_filter_detail.filter_from_pos))
+
+ ands.append(
+ or_(
+ sensor_val.value.in_(comp_ins),
+ *[sensor_val.value.op(SQL_REGEXP_FUNC)(val) for val in comp_regexps if val is not None],
+ *[sensor_val.value.like(val) for val in comp_likes if val is not None],
+ )
+ )
+
+ data = data.join(
+ sensor_val, and_(
+ sensor_val.cycle_id == cycle_cls.id,
+ sensor_val.sensor_id == sensor.id,
+ *ands,
+ )
+ )
+
+ # chunk
+ if start_relate_ids:
+ records = []
+ for ids in start_relate_ids:
+ temp = data.filter(cycle_cls.global_id.in_(ids))
+ records += temp.all()
+ else:
+ data = data.filter(cycle_cls.time >= start_tm)
+ data = data.filter(cycle_cls.time < end_tm)
+ if with_limit:
+ data = data.limit(with_limit)
+
+ records = data.all()
+
+ return records
+
+
+def create_graph_config(cfgs: List[CfgVisualization] = []):
+ if not cfgs:
+ return [{
+ THRESH_HIGH: None,
+ THRESH_LOW: None,
+ Y_MAX: None,
+ Y_MIN: None,
+ PRC_MAX: None,
+ PRC_MIN: None,
+ ACT_FROM: None,
+ ACT_TO: None,
+ 'type': None,
+ 'name': None,
+ }]
+
+ list_cfgs = []
+ for cfg in cfgs:
+ list_cfgs.append({
+ THRESH_HIGH: cfg.ucl,
+ THRESH_LOW: cfg.lcl,
+ Y_MAX: cfg.ymax,
+ Y_MIN: cfg.ymin,
+ PRC_MAX: cfg.upcl,
+ PRC_MIN: cfg.lpcl,
+ ACT_FROM: cfg.act_from,
+ ACT_TO: cfg.act_to,
+ 'type': cfg.filter_column.name if cfg.filter_column else None,
+ 'name': cfg.filter_detail.name if cfg.filter_detail else None,
+ 'eng_name': cfg.filter_column.english_name if cfg.filter_column else None,
+ })
+ return list_cfgs
+
+
+def get_default_graph_config(col_id, start_tm, end_tm):
+ # get sensor default cfg chart info
+ sensor_default_cfg: List[CfgVisualization] = CfgVisualization.get_sensor_default_chart_info(col_id, start_tm,
+ end_tm) or []
+ return create_graph_config(sensor_default_cfg)
+
+
+def get_col_graph_configs(col_id, filter_detail_ids, start_tm, end_tm):
+ if not filter_detail_ids:
+ return get_default_graph_config(col_id, start_tm, end_tm)
+
+ graph_configs = CfgVisualization.get_by_control_n_filter_detail_ids(col_id, filter_detail_ids, start_tm, end_tm)
+ if graph_configs:
+ return create_graph_config(graph_configs)
+
+ return get_default_graph_config(col_id, start_tm, end_tm)
+
+
+def convert_chart_info_time_range(chart_config, start_proc_times, end_proc_times, query_start_tm, query_end_tm):
+ last_idx = len(end_proc_times) - 1
+ act_from = chart_config.get(ACT_FROM)
+ act_to = chart_config.get(ACT_TO)
+ if act_from:
+ act_from = convert_time(act_from)
+ if act_to:
+ act_to = convert_time(act_to)
+ converted_act_from = None
+ converted_act_to = None
+ if act_from and act_to:
+ found_act_from = False
+ found_act_to = False
+ for idx, end_proc_time in enumerate(end_proc_times):
+ back_idx = last_idx - idx
+ if not found_act_from:
+ if act_from <= end_proc_time <= act_to:
+ found_act_from = True
+ # if idx == 0: # if it's first point -> converted act_from = -inf
+ # converted_act_from = None # -inf
+ # else:
+ # converted_act_from = start_proc_times[idx]
+ converted_act_from = start_proc_times[idx]
+ if idx == 0:
+ converted_act_from = query_start_tm
+ if not found_act_to:
+ back_time = end_proc_times[back_idx]
+ if act_from <= back_time <= act_to:
+ found_act_to = True
+ # if back_idx == last_idx:
+ # converted_act_to = None # if it's last point -> converted act_to = +inf
+ # else:
+ # converted_act_to = start_proc_times[back_idx]
+ converted_act_to = start_proc_times[back_idx]
+ if back_idx == last_idx:
+ converted_act_to = query_end_tm
+ if found_act_from and found_act_to:
+ break
+ else:
+ if act_from:
+ for idx, end_proc_time in enumerate(end_proc_times):
+ if act_from <= end_proc_time:
+ converted_act_from = start_proc_times[idx]
+ if idx == 0:
+ converted_act_from = query_start_tm
+ break
+ if act_to:
+ for idx in range(len(end_proc_times)):
+ back_idx = last_idx - idx
+ if end_proc_times[back_idx] <= act_to:
+ converted_act_to = start_proc_times[back_idx]
+ if back_idx == last_idx:
+ converted_act_to = query_end_tm
+ break
+
+ return converted_act_from, converted_act_to
+
+
+@log_execution_time()
+def get_chart_infos_by_stp_var(graph_param: DicParam):
+ graph_configs = {}
+ var_col_id = graph_param.get_cate_var_col_id()
+ start_tm = start_of_minute(graph_param.common.start_date, graph_param.common.start_time)
+ end_tm = end_of_minute(graph_param.common.end_date, graph_param.common.end_time)
+ start_tm = convert_time(start_tm)
+ end_tm = convert_time(end_tm)
+ get_end_cols = graph_param.get_end_cols(graph_param.get_start_proc())
+
+ # query by var_col_id
+ for end_col in get_end_cols:
+ graph_configs[end_col] = {}
+ chart_infos: List[CfgVisualization] \
+ = CfgVisualization.get_all_by_control_n_filter_col_id(end_col, var_col_id, start_tm, end_tm)
+ for chart_info in chart_infos:
+ filter_detail_id = chart_info.filter_detail_id
+ if not graph_configs[end_col].get(filter_detail_id):
+ graph_configs[end_col][filter_detail_id] = []
+ graph_configs[end_col][filter_detail_id].append(chart_info)
+
+ return graph_configs
+
+
+@log_execution_time()
+def build_regex_index(var_col_id):
+ cfg_filter: CfgFilter = CfgFilter.get_filter_by_col_id(var_col_id)
+ cfg_filter_details = []
+ if cfg_filter:
+ cfg_filter_details = cfg_filter.filter_details or []
+
+ return {
+ cfg.id: gen_python_regex(cfg.filter_condition, FilterFunc[cfg.filter_function], cfg.filter_from_pos)
+ for cfg in cfg_filter_details
+ }
+
+
+@log_execution_time()
+def map_stp_val_2_cfg_details(stp_value, map_filter_detail_2_regex={}):
+ mapped_cfg_detail_ids = []
+ for cfg_id, regex in map_filter_detail_2_regex.items():
+ if regex and re.match(regex, str(stp_value)):
+ mapped_cfg_detail_ids.append(cfg_id)
+ return mapped_cfg_detail_ids
+
+
+@log_execution_time()
+def get_chart_infos_by_stp_value(stp_value, end_col, dic_filter_detail_2_regex, chart_infos_by_stp_var):
+ mapped_cfg_detail_ids = map_stp_val_2_cfg_details(stp_value, dic_filter_detail_2_regex) or []
+ chart_infos_for_stp_value = []
+ sensor_chart_infos = chart_infos_by_stp_var.get(end_col) or {}
+ for cfg_detail_id in mapped_cfg_detail_ids:
+ chart_infos_for_stp_value.extend(sensor_chart_infos.get(cfg_detail_id) or [])
+
+ # None means default chart info of category var
+ # In cfg_visualization table, filter_detail_id = null means default of control column/filter column
+ if not chart_infos_for_stp_value:
+ chart_infos_for_stp_value.extend(sensor_chart_infos.get(None) or [])
+ return create_graph_config(chart_infos_for_stp_value)
+
+
+@log_execution_time()
+def get_chart_infos(graph_param: DicParam, dic_data=None, start_proc_times=None, no_convert=False):
+ graph_configs = {}
+ original_graph_configs = {}
+ start_proc = graph_param.common.start_proc
+ threshold_filter_detail_ids = graph_param.common.threshold_boxes
+ for proc in graph_param.array_formval:
+ graph_configs[proc.proc_id] = {}
+ original_graph_configs[proc.proc_id] = {}
+
+ start_tm = start_of_minute(graph_param.common.start_date, graph_param.common.start_time)
+ end_tm = end_of_minute(graph_param.common.end_date, graph_param.common.end_time)
+ end_proc = proc.proc_id
+ end_proc_times = dic_data[proc.proc_id].get(Cycle.time.key) if dic_data else []
+ for col_id in proc.col_ids:
+ orig_graph_cfg, graph_cfg = get_chart_info_detail(end_proc_times, col_id, threshold_filter_detail_ids,
+ end_proc, start_proc, start_tm, end_tm, start_proc_times,
+ no_convert=no_convert)
+ original_graph_configs[proc.proc_id][col_id] = orig_graph_cfg
+ graph_configs[proc.proc_id][col_id] = graph_cfg
+
+ return graph_configs, original_graph_configs
+
+
+@log_execution_time()
+def get_chart_info_detail(end_proc_times, end_col, threshold_filter_detail_ids, end_proc=None, start_proc=None,
+ start_tm=None, end_tm=None, start_proc_times=None, no_convert=False):
+ start_tm = convert_time(start_tm)
+ end_tm = convert_time(end_tm)
+ query_start_tm = start_tm
+ query_end_tm = end_tm
+ if end_proc_times:
+ end_proc_times = pd.Series(end_proc_times, dtype='string')
+ end_proc_times = end_proc_times[end_proc_times.notna()]
+ if len(end_proc_times):
+ start_tm = end_proc_times.min()
+ end_tm = end_proc_times.max()
+
+ end_proc_times = end_proc_times.to_list()
+
+ # get chart thresholds for each sensor
+ col_graph_configs = get_col_graph_configs(end_col, threshold_filter_detail_ids, start_tm, end_tm)
+ orig_graph_cfgs = deepcopy(col_graph_configs)
+
+ if end_proc_times and start_proc and end_proc and start_proc != end_proc and not no_convert and start_proc_times:
+ # convert thresholds
+ for chart_config in col_graph_configs:
+ act_from, act_to = convert_chart_info_time_range(chart_config, start_proc_times, end_proc_times,
+ query_start_tm, query_end_tm)
+ chart_config[ACT_FROM] = act_from
+ chart_config[ACT_TO] = act_to
+
+ return col_graph_configs, orig_graph_cfgs
+
+
+@log_execution_time()
+def gen_dic_sensors(proc_id, cols=None):
+ """gen dictionary of sensors
+ {column_name: T_sensor instance}
+
+ Arguments:
+ proc_id {string} -- process id
+ """
+
+ sensors = Sensor.query.filter(Sensor.process_id == proc_id)
+ if cols:
+ sensors = sensors.filter(Sensor.column_name.in_(cols))
+
+ return {sensor.column_name: sensor for sensor in sensors}
+
+
+@log_execution_time()
+def order_end_proc_sensor(orig_graph_param: DicParam, reorder):
+ dic_orders = {}
+ for proc in orig_graph_param.array_formval:
+ proc_id = proc.proc_id
+ orders = CfgConstant.get_value_by_type_name(type=CfgConstantType.TS_CARD_ORDER.name, name=proc_id) or '{}'
+ orders = json.loads(orders)
+ if orders:
+ dic_orders[proc_id] = orders
+
+ lst_proc_end_col = []
+ for proc in orig_graph_param.array_formval:
+ proc_id = proc.proc_id
+ for col_id, col_name in zip(proc.col_ids, proc.col_names):
+ proc_order = dic_orders.get(proc_id) or {}
+ order = proc_order.get(str(col_id)) or 999
+ lst_proc_end_col.append((proc_id, col_id, col_name, order))
+
+ if not reorder:
+ return lst_proc_end_col
+
+ return sorted(lst_proc_end_col, key=lambda x: x[-1])
+
+
+@log_execution_time()
+def gen_plotdata(orig_graph_param: DicParam, dic_data, chart_infos=None, original_graph_configs=None, reorder=True):
+ # re-order proc-sensors to show to UI
+ lst_proc_end_col = order_end_proc_sensor(orig_graph_param, reorder)
+
+ plotdatas = []
+ array_formval = []
+ for proc_id, col_id, col_name, _ in lst_proc_end_col:
+ array_y = dic_data.get(proc_id, {}).get(col_id, [])
+ array_x = dic_data.get(proc_id, {}).get(Cycle.time.key, [])
+ plotdata = {ARRAY_Y: array_y, ARRAY_X: array_x, END_PROC_ID: proc_id, END_COL_ID: col_id,
+ END_COL_NAME: col_name}
+
+ plotdatas.append(plotdata)
+
+ array_formval.append({
+ END_PROC: proc_id,
+ GET02_VALS_SELECT: col_id
+ })
+
+ # add chart info
+ if chart_infos:
+ set_chart_infos_to_plotdata(col_id, chart_infos, original_graph_configs, plotdata)
+
+ return array_formval, plotdatas
+
+
+@log_execution_time()
+def gen_plotdata_fpp(orig_graph_param: DicParam, dic_data, chart_infos=None, original_graph_configs=None,
+ dic_cycle_ids=None, reorder=True):
+ # re-order proc-sensors to show to UI
+ lst_proc_end_col = order_end_proc_sensor(orig_graph_param, reorder)
+
+ plotdatas = []
+ array_formval = []
+ dic_proc_name = gen_dict_procs([proc_id for proc_id, *_ in lst_proc_end_col])
+ for proc_id, col_id, col_name, _ in lst_proc_end_col:
+ if proc_id not in dic_data or col_id not in dic_data.get(proc_id):
+ continue
+
+ y_list = dic_data.get(proc_id, {}).get(col_id) or [[]]
+ array_x = dic_data.get(proc_id, {}).get(Cycle.time.key, [])
+ ranks = dic_data[proc_id].get(RANK_COL, {}).get(col_id)
+ if not isinstance(y_list, (list, tuple)):
+ y_list = [y_list]
+
+ cate_names = dic_data.get(proc_id, {}).get(CAT_EXP_BOX, {}).get(col_id)
+ none_idxs = dic_data.get(proc_id, {}).get(NONE_IDXS, {}).get(col_id)
+ for idx, array_y in enumerate(y_list):
+ if orig_graph_param.common.cat_exp and not array_y:
+ continue
+
+ plotdata = {ARRAY_Y: array_y, ARRAY_X: array_x, END_PROC_ID: proc_id,
+ END_PROC_NAME: dic_proc_name[proc_id].name,
+ END_COL_ID: col_id,
+ END_COL_NAME: col_name}
+
+ if cate_names:
+ plotdata.update({CAT_EXP_BOX: cate_names[idx]})
+
+ if none_idxs:
+ plotdata.update({NONE_IDXS: none_idxs[idx]})
+
+ if dic_cycle_ids:
+ plotdata.update({CYCLE_IDS: dic_cycle_ids.get(proc_id, {}).get(col_id, [])})
+
+ if ranks:
+ plotdata.update({RANK_COL: ranks[idx]})
+
+ plotdatas.append(plotdata)
+
+ array_formval.append({
+ END_PROC: proc_id,
+ GET02_VALS_SELECT: col_id
+ })
+
+ # add chart info
+ if chart_infos:
+ set_chart_infos_to_plotdata(col_id, chart_infos, original_graph_configs, plotdata)
+
+ return array_formval, plotdatas
+
+
+def set_chart_infos_to_plotdata(col_id, chart_infos, original_graph_configs, plotdata):
+ """
+ set chart config
+ :param col_id:
+ :param chart_infos:
+ :param original_graph_configs:
+ :param plotdata:
+ :return:
+ """
+ if chart_infos is None:
+ chart_infos = {}
+
+ if original_graph_configs is None:
+ original_graph_configs = {}
+
+ chart_info = []
+ original_graph_config = []
+ for proc_id, dic_col in chart_infos.items():
+ if col_id in dic_col:
+ chart_info = dic_col[col_id]
+ original_graph_config = original_graph_configs[proc_id][col_id]
+ break
+
+ plotdata[CHART_INFOS] = chart_info
+ plotdata[CHART_INFOS_ORG] = original_graph_config
+
+
+@log_execution_time()
+def gen_category_data(dic_proc_cfgs: Dict[int, CfgProcess], graph_param: DicParam, dic_data,
+ dic_org_cates=None):
+ plotdatas = []
+ cate_procs: List[CategoryProc] = graph_param.common.cate_procs
+ if graph_param.common.cat_exp:
+ dic_cates = dic_data.get(CATEGORY_DATA) or dic_data
+ else:
+ dic_cates = dic_data
+
+ for proc in cate_procs:
+ proc_id = proc.proc_id
+ dic_proc = dic_cates.get(proc_id)
+ if dic_proc is None:
+ continue
+
+ proc_cfg = dic_proc_cfgs[proc_id]
+
+ for col_id, column_name, col_show_name in zip(proc.col_ids, proc.col_names, proc.col_show_names):
+ data = dic_proc.get(col_id)
+ if not data:
+ continue
+
+ if isinstance(data[0], (list, tuple)):
+ array_y = data[0]
+ else:
+ array_y = data
+
+ cate_summary = None
+ if dic_org_cates:
+ cate_summary = dic_org_cates[proc_id].get(col_id) if dic_org_cates.get(proc_id) else None
+
+ plotdata = dict(proc_name=proc_id, proc_master_name=proc_cfg.name, column_name=column_name,
+ column_master_name=col_show_name, data=array_y, summary=cate_summary, column_id=col_id)
+ plotdatas.append(plotdata)
+
+ return plotdatas
+
+
+@log_execution_time()
+def clear_all_keyword(dic_param):
+ """ clear [All] keyword in selectbox
+
+ Arguments:
+ dic_param {json} -- [params from client]
+ """
+ dic_common = dic_param[COMMON]
+ cate_procs = dic_common.get(CATE_PROCS, [])
+ dic_formval = dic_param[ARRAY_FORMVAL]
+ for idx in range(len(dic_formval)):
+ select_vals = dic_formval[idx][GET02_VALS_SELECT]
+ if isinstance(select_vals, (list, tuple)):
+ dic_formval[idx][GET02_VALS_SELECT] = [val for val in select_vals if val not in [SELECT_ALL, NO_FILTER]]
+ else:
+ dic_formval[idx][GET02_VALS_SELECT] = [select_vals]
+
+ for idx in range(len(cate_procs)):
+ select_vals = cate_procs[idx][GET02_CATE_SELECT]
+ if isinstance(select_vals, (list, tuple)):
+ cate_procs[idx][GET02_CATE_SELECT] = [val for val in select_vals if val not in [SELECT_ALL, NO_FILTER]]
+ else:
+ cate_procs[idx][GET02_CATE_SELECT] = [select_vals]
+
+ # Need NO_FILTER keyword to decide filter or not , so we can not remove NO_FILTER keyword here.
+ for cond in dic_common[COND_PROCS]:
+ for key, value in cond.items():
+ if isinstance(value, (list, tuple)):
+ vals = value
+ else:
+ vals = [value]
+
+ if NO_FILTER in vals:
+ continue
+
+ cond[key] = [val for val in vals if not val == SELECT_ALL]
+
+
+@log_execution_time()
+def update_outlier_flg(proc_id, cycle_ids, is_outlier):
+ """update outlier to t_cycle table
+
+ Arguments:
+ cycle_ids {[type]} -- [description]
+ is_outlier {[type]} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+
+ # get global_ids linked to target cycles
+ cycle_cls = find_cycle_class(proc_id)
+ cycle_recs = cycle_cls.get_cycles_by_ids(cycle_ids)
+ if not cycle_recs:
+ return True
+
+ global_ids = []
+ for rec in cycle_recs:
+ if rec.global_id:
+ global_ids.append(rec.global_id)
+ else:
+ rec.is_outlier = is_outlier
+
+ target_global_ids = GlobalRelation.get_all_relations_by_globals(global_ids, set_done_globals=set())
+
+ # update outlier for linked global ids
+ # TODO: fix front end
+ cycle_cls.update_outlier_by_global_ids(list(target_global_ids), is_outlier)
+
+ db.session.commit()
+ return True
+
+
+@log_execution_time()
+def get_serials(trace, proc_name):
+ return [s.split()[0] for s in trace.hist2_yaml.get_serial_col(proc_name) if s]
+
+
+@log_execution_time()
+def get_date_col(trace, proc_name):
+ date_col = trace.hist2_yaml.get_date_col(proc_name)
+ date_col = date_col.split()[0]
+ return date_col
+
+
+def gen_new_dic_param(dic_param, dic_non_sensor, start_proc_first=False):
+ pass
+
+
+def get_non_sensor_cols(dic_proc_cfgs: Dict[int, CfgProcess], graph_param: DicParam):
+ """get non sensor headers
+
+ Arguments:
+ trace {[type]} -- [description]
+ dic_param {[type]} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+ dic_header = {}
+
+ for proc in graph_param.array_formval:
+ proc_id = proc.proc_id
+ proc_cfg = dic_proc_cfgs[proc_id]
+ serials = proc_cfg.get_serials()
+ date_col = proc_cfg.get_date_col()
+ cols = serials + [date_col]
+ dic_header[proc_id] = cols
+
+ # start proc
+ proc_id = graph_param.common.start_proc
+ if not dic_header.get(proc_id):
+ proc_cfg = dic_proc_cfgs[proc_id]
+ serials = proc_cfg.get_serials()
+ date_col = proc_cfg.get_date_col()
+ cols = serials + [date_col]
+ dic_header[proc_id] = cols
+
+ return dic_header
+
+
+def get_cate_var(graph_param: DicParam):
+ cate_procs = graph_param.common.cate_procs
+ if cate_procs:
+ return {ele[CATE_PROC]: ele[GET02_CATE_SELECT] for ele in cate_procs if
+ ele.get(CATE_PROC) and ele.get(GET02_CATE_SELECT)}
+
+ return None
+
+
+def gen_relate_ids(row):
+ """
+ gen start proc relate ids
+ """
+
+ relate_ids = []
+ if row.global_id:
+ relate_ids.append(row.global_id)
+ if row.relate_id:
+ relate_ids.append(row.relate_id)
+
+ return relate_ids
+
+
+@log_execution_time()
+@request_timeout_handling()
+def make_irregular_data_none(dic_param):
+ use_list = [YType.NORMAL.value, YType.OUTLIER.value, YType.NEG_OUTLIER.value]
+ none_list = [float('inf'), float('-inf')]
+ array_plotdata = dic_param.get(ARRAY_PLOTDATA)
+ for num, plotdata in enumerate(array_plotdata):
+ array_y = plotdata.get(ARRAY_Y) or []
+ array_y_type = plotdata.get(ARRAY_Y_TYPE) or []
+
+ if array_y_type: # use y_type to check for irregular data
+ # TODO : save data as {(from,to): value} is better
+ # array_plotdata[num][ARRAY_Y] = [None if array_y_type[idx] not in {YType.NORMAL.value, YType.OUTLIER.value,
+ # YType.NEG_OUTLIER.value} else e for idx, e in enumerate(array_y)]
+ df = pd.DataFrame({ARRAY_Y: array_y, ARRAY_Y_TYPE: array_y_type})
+ df[ARRAY_Y] = np.where(df[ARRAY_Y_TYPE].isin(use_list), df[ARRAY_Y], None)
+ else: # or use value to check for irregular data directly
+ # array_plotdata[num][ARRAY_Y] = [None if e == float('inf') or e == float('-inf') else e for e in array_y]
+ df = pd.DataFrame({ARRAY_Y: array_y})
+ df[ARRAY_Y] = np.where(df[ARRAY_Y].isin(none_list), None, df[ARRAY_Y])
+
+ array_plotdata[num][ARRAY_Y] = df[ARRAY_Y].to_list()
+
+ return dic_param
+
+
+def get_min_max_of_all_chart_infos(chart_infos):
+ vals = [chart.get(Y_MIN) for chart in chart_infos if chart.get(Y_MIN) is not None]
+ vals += [chart.get(Y_MAX) for chart in chart_infos if chart.get(Y_MAX) is not None]
+ y_min = None
+ y_max = None
+ if vals:
+ y_min = min(vals)
+ y_max = max(vals)
+
+ return y_min, y_max
+
+
+def get_threshold_min_max_chartinfo(chart_infos):
+ vals = [chart.get(THRESH_LOW) for chart in chart_infos if chart.get(THRESH_LOW) is not None]
+ vals += [chart.get(THRESH_HIGH) for chart in chart_infos if chart.get(THRESH_HIGH) is not None]
+
+ y_min = None
+ y_max = None
+ if vals:
+ y_min = min(vals)
+ y_max = max(vals)
+
+ return y_min, y_max
+
+
+def calc_upper_lower_range(array_y: Series):
+ arr = array_y[array_y.notnull()]
+ arr = arr[~arr.isin([float('inf'), float('-inf')])]
+ # arr = [e for e in arr if e not in {None, float('inf'), float('-inf')} and not pd.isna(e)]
+ if not len(arr):
+ return None, None
+
+ q1, q3 = quantile(arr, [0.25, 0.75], interpolation='midpoint')
+ iqr = q3 - q1
+ if iqr:
+ lower_range = q1 - 2.5 * iqr
+ upper_range = q3 + 2.5 * iqr
+ else:
+ lower_range = 0.9 * min(arr)
+ upper_range = 1.1 * max(arr)
+ if lower_range == upper_range:
+ lower_range -= 1
+ upper_range += 1
+
+ return float(lower_range), float(upper_range)
+
+
+def save_proc_sensor_order_to_db(orders):
+ try:
+ for proc_code, new_orders in orders.items():
+ CfgConstant.create_or_merge_by_type(const_type=CfgConstantType.TS_CARD_ORDER.name,
+ const_name=proc_code,
+ const_value=new_orders)
+ except Exception as ex:
+ traceback.print_exc()
+ logger.error(ex)
+
+
+def get_proc_ids_in_dic_param(graph_param: DicParam):
+ """
+ get process
+ :param graph_param:
+ :return:
+ """
+ procs = set()
+ procs.add(graph_param.common.start_proc)
+ for proc in graph_param.common.cond_procs:
+ procs.add(proc.proc_id)
+
+ for proc in graph_param.common.cate_procs:
+ procs.add(proc.proc_id)
+
+ for proc in graph_param.array_formval:
+ procs.add(proc.proc_id)
+
+ return list(procs)
+
+
+def get_procs_in_dic_param(graph_param: DicParam):
+ """
+ get process
+ :param graph_param:
+ :return:
+ """
+ proc_ids = get_proc_ids_in_dic_param(graph_param)
+ dic_procs = gen_dict_procs(proc_ids)
+ return dic_procs
+
+
+def gen_dict_procs(proc_ids):
+ return {proc.id: proc for proc in CfgProcess.get_procs(proc_ids)}
+
+
+def get_end_procs_in_dic_param(graph_param: DicParam):
+ """
+ get process
+ :param graph_param:
+ :return:
+ """
+ procs = set()
+ for proc in graph_param.array_formval:
+ procs.add(proc.proc_id)
+
+ return {proc.id: proc for proc in CfgProcess.get_procs(procs)}
+
+
+def gen_blank_df():
+ data = {Cycle.time.key: [], Cycle.is_outlier.key: []}
+ return pd.DataFrame(data)
+
+
+def fx(v): return pd.NA
+
+
+@log_execution_time()
+def apply_coef_text(df: DataFrame, graph_param: DicParam, dic_proc_cfgs: dict):
+ for proc_id, proc_cfg in dic_proc_cfgs.items():
+ if graph_param.is_end_proc(proc_id):
+ end_col_ids = graph_param.get_end_cols(proc_id) or []
+ end_cols: List[CfgProcessColumn] = proc_cfg.get_cols(end_col_ids) or []
+ for end_col in end_cols:
+ if DataType[end_col.data_type] is DataType.TEXT \
+ and end_col.coef is not None and end_col.operator == Operator.REGEX.value:
+ col_label = gen_sql_label(end_col.id, end_col.column_name)
+ if col_label in df.columns:
+ df[col_label] = df[col_label].astype('object').str \
+ .replace('^(?!{})'.format(end_col.coef), fx, regex=True)
+ return df
+
+
+def get_filter_detail_ids(proc_ids, column_ids):
+ """
+ get filter detail ids to check if this filter matching dataset of graph
+ :param proc_ids:
+ :param column_ids:
+ :return:
+ """
+ not_exact_matches = []
+ dic_col_filter_details = defaultdict(list)
+ cfg_filters = CfgFilter.get_by_proc_n_col_ids(proc_ids, column_ids)
+ for cfg_filter in cfg_filters:
+ cfg_filter: CfgFilter
+ cfg_column: CfgProcessColumn = cfg_filter.column
+ df_col_name = gen_sql_label(cfg_column.id, cfg_column.column_name)
+ for cfg_detail in cfg_filter.filter_details:
+ if cfg_detail.filter_function == FilterFunc.MATCHES.name:
+ dic_col_filter_details[df_col_name].append((cfg_detail.id, cfg_detail.filter_condition))
+ else:
+ not_exact_matches.append(cfg_detail.id)
+
+ return dic_col_filter_details, not_exact_matches
+
+
+@log_execution_time()
+def gen_dic_uniq_value_from_df(df, col_names):
+ dic_col_values = {}
+ for col in col_names:
+ if col in df.columns:
+ vals = set(df[col])
+ vals = [str(val) for val in vals]
+ dic_col_values[col] = set(vals)
+
+ return dic_col_values
+
+
+def check_filter_detail_match_graph_data(dic_col_filter_details, dic_col_values):
+ matched_filter_ids = []
+ unmatched_filter_ids = []
+ for col_name, filter_details in dic_col_filter_details.items():
+ vals = dic_col_values.get(col_name, [])
+ for filter_detail_id, filter_condition in filter_details:
+ if filter_condition in vals:
+ matched_filter_ids.append(filter_detail_id)
+ else:
+ unmatched_filter_ids.append(filter_detail_id)
+
+ return matched_filter_ids, unmatched_filter_ids
+
+
+@log_execution_time()
+def main_check_filter_detail_match_graph_data(graph_param: DicParam, df: DataFrame):
+ cond_proc_ids = [cond.proc_id for cond in graph_param.common.cond_procs]
+ cond_col_ids = graph_param.get_all_end_col_ids()
+ dic_col_filter_details, not_exact_match_filter_ids = get_filter_detail_ids(cond_proc_ids, cond_col_ids)
+ dic_col_values = gen_dic_uniq_value_from_df(df, dic_col_filter_details)
+ matched_filter_ids, unmatched_filter_ids = check_filter_detail_match_graph_data(dic_col_filter_details,
+ dic_col_values)
+
+ return matched_filter_ids, unmatched_filter_ids, not_exact_match_filter_ids
+
+
+def reduce_data(df_orig: DataFrame, graph_param, dic_str_cols):
+ """
+ make data for thin mode
+ :param df_orig:
+ :param graph_param:
+ :param dic_str_cols:
+ :return:
+ """
+
+ # end cols
+ dic_end_col_names = {}
+ rank_cols = []
+ for proc in graph_param.array_formval:
+ for col_id, col_name in zip(proc.col_ids, proc.col_names):
+ sql_label = gen_sql_label(col_id, col_name)
+ cols_in_df = [col for col in df_orig.columns if col.startswith(sql_label)]
+ target_col_info = dic_str_cols.get(sql_label)
+ if target_col_info:
+ rank_cols += cols_in_df
+ else:
+ for col_in_df in cols_in_df:
+ dic_end_col_names[col_in_df] = (proc.proc_id, col_id, col_name)
+
+ # category
+ dic_cate_names = {}
+ cat_exp_col = graph_param.common.cat_exp
+ for proc in graph_param.common.cate_procs:
+ for col_id, col_name in zip(proc.col_ids, proc.col_names):
+ if cat_exp_col:
+ sql_label = gen_sql_label(CATEGORY_DATA, col_id, col_name)
+ else:
+ sql_label = gen_sql_label(col_id, col_name)
+
+ if sql_label in df_orig.columns:
+ dic_cate_names[sql_label] = (proc.proc_id, col_id, col_name)
+
+ all_cols = list(
+ set([Cycle.time.key] + list(dic_end_col_names) + list(dic_cate_names) + rank_cols))
+ group_col = '__group_col__'
+ index_col = '__index_col__'
+ all_cols = [col for col in all_cols if col in df_orig.columns]
+ df = df_orig[all_cols]
+ x_option = graph_param.common.x_option or 'TIME'
+ if x_option.upper() == 'TIME':
+ df[group_col] = pd.to_datetime(df[Cycle.time.key]).values.astype(float)
+ min_epoc_time = df[group_col].min()
+ max_epoc_time = df[group_col].max()
+ count_per_group = calc_data_per_group(min_epoc_time, max_epoc_time)
+ df[group_col] = (df[group_col] - min_epoc_time) // count_per_group
+ df[group_col] = df[group_col].astype(int)
+ else:
+ count_per_group = ceil(len(df) / THIN_DATA_CHUNK)
+ df[group_col] = df.index // count_per_group
+
+ # count element in one group
+ group_counts = df[group_col].value_counts().tolist()
+
+ df[index_col] = df.index
+ df.set_index(group_col, inplace=True)
+
+ # get category mode(most common)
+ df_blank = pd.DataFrame(index=range(THIN_DATA_CHUNK))
+ dfs = [df_blank]
+ str_cols = list(set(list(dic_cate_names) + rank_cols))
+ df_cates = None
+ if str_cols:
+ df_temp = df[str_cols]
+ df_temp = df_temp.groupby(group_col).agg(get_mode)
+
+ for col in str_cols:
+ if col not in df_temp.columns:
+ df_temp[col] = None
+
+ df_cates = pd.concat([df_blank, df_temp], axis=1)
+ if rank_cols:
+ dfs.append(df_cates[rank_cols])
+
+ cols = []
+ for sql_label, (proc_id, *_) in dic_end_col_names.items():
+ df_temp = df[[index_col, sql_label]].dropna()
+ df_temp = df_temp.sort_values([group_col, sql_label], ascending=[True, True])
+ df_temp.drop(sql_label, axis=1, inplace=True)
+ df_temp = df_temp.groupby(group_col).agg(get_min_median_max_pos)
+ df_temp = df_temp.rename(columns={index_col: sql_label})
+ if len(df_temp) == 0:
+ blank_vals = [None] * THIN_DATA_CHUNK
+ df_temp[sql_label] = blank_vals
+
+ dfs.append(df_temp)
+ cols.append(sql_label)
+
+ df_box = pd.concat(dfs, axis=1)
+
+ # add time
+ # start_tm = start_of_minute(graph_param.common.start_date, graph_param.common.start_time)
+ # end_tm = end_of_minute(graph_param.common.end_date, graph_param.common.end_time)
+ # times = pd.date_range(start=start_tm, end=end_tm, periods=THIN_DATA_CHUNK)
+ # df_box[Cycle.time.key] = times
+ # df_box[Cycle.time.key] = df_box[Cycle.time.key].astype('datetime64[s]')
+
+ # remove blanks
+ df_box.dropna(how="all", subset=cols + rank_cols, inplace=True)
+
+ # remove blank category
+ dic_cates = defaultdict(dict)
+ dic_org_cates = defaultdict(dict)
+ for sql_label, (proc_id, col_id, _) in dic_cate_names.items():
+ if df_cates is not None and sql_label in df_cates:
+ dic_cates[proc_id][col_id] = df_cates.loc[df_box.index, sql_label].tolist()
+ dic_org_cates[proc_id][col_id] = get_available_ratio(df[sql_label])
+
+ return df_box, dic_cates, dic_org_cates, group_counts
+
+
+def get_min_median_max_pos(df):
+ # last = len(df) - 1
+ last = df.size - 1
+ mid = last // 2
+ try:
+ return df.iloc[[0, mid, last]].to_list()
+ except Exception as e:
+ raise e
+
+
+def calc_data_per_group(min_val, max_val, box=THIN_DATA_CHUNK):
+ dif_val = max_val - min_val + 1
+ ele_per_box = dif_val / box
+ return ele_per_box
+
+
+def reduce_stepped_chart_data(array_y):
+ rows = [None] * len(array_y)
+
+ idx = 0
+ for key, vals in groupby(array_y):
+ rows[idx] = key
+ idx += len(list(vals))
+
+ return rows
+
+
+@log_execution_time()
+def calc_raw_common_scale_y(plots, string_col_ids=None):
+ """
+ calculate y min max in common scale
+ :param plots:
+ :param string_col_ids:
+ :return:
+ """
+ y_commons = []
+ min_max_list = []
+ for plot in plots:
+ s = pd.Series(plot[ARRAY_Y])
+
+ s = s[s.notnull()]
+ if not len(s):
+ min_max_list.append((None, None))
+ continue
+
+ s = s.convert_dtypes()
+ s_without_inf = s[np.isfinite(s)]
+ # s_without_inf = remove_inf(s)
+
+ min_val = s_without_inf.min()
+ max_val = s_without_inf.max()
+ if pd.isna(min_val):
+ min_val = None
+ if pd.isna(max_val):
+ max_val = None
+
+ min_max_list.append((min_val, max_val))
+
+ if string_col_ids and plot[END_COL_ID] in string_col_ids:
+ continue
+
+ if min_val:
+ y_commons.append(min_val)
+
+ if max_val:
+ y_commons.append(max_val)
+
+ all_graph_min = None
+ all_graph_max = None
+ if y_commons:
+ all_graph_min = min(y_commons)
+ all_graph_max = max(y_commons)
+
+ return min_max_list, all_graph_min, all_graph_max
+
+
+def calc_stp_raw_common_scale_y(dic_param):
+ """
+ calculate y min max in common scale
+ :param dic_param:
+ :return:
+ """
+ y_commons = []
+ min_max_list = []
+ for k, plots in dic_param[ARRAY_PLOTDATA].items():
+ for plot in plots:
+ s = pd.Series(plot[ARRAY_Y])
+
+ s = s[s.notnull()]
+ if not len(s):
+ min_max_list.append((None, None))
+ continue
+
+ s_without_inf = s[np.isfinite(s)]
+ min_val = s_without_inf.min()
+ max_val = s_without_inf.max()
+ min_max_list.append((min_val, max_val))
+
+ # if plot[END_COL_ID] in dic_param[STRING_COL_IDS]:
+ # continue
+
+ if min_val:
+ y_commons.append(min_val)
+
+ if max_val:
+ y_commons.append(max_val)
+
+ all_graph_min = None
+ all_graph_max = None
+ if y_commons:
+ all_graph_min = min(y_commons)
+ all_graph_max = max(y_commons)
+
+ return min_max_list, all_graph_min, all_graph_max
+
+
+def detect_abnormal_data(series_x, series_y, none_idxs=None):
+ nones = none_idxs
+ if none_idxs is None:
+ nones = series_y[series_y.isnull()].index.tolist()
+
+ return {UNLINKED_IDXS: series_x[series_x.isnull()].index.tolist(),
+ NONE_IDXS: nones,
+ INF_IDXS: series_y[series_y == float('inf')].index.tolist(),
+ NEG_INF_IDXS: series_y[series_y == float('-inf')].index.tolist()}
+
+
+def calc_auto_scale_y(plotdata, series_y):
+ notna_series_y = series_y[series_y.notna()]
+ if not len(notna_series_y):
+ return {Y_MIN: 0, Y_MAX: 1, LOWER_OUTLIER_IDXS: [], UPPER_OUTLIER_IDXS: []}
+
+ summaries = plotdata.get(SUMMARIES) or []
+ lower_range = None
+ upper_range = None
+ for summary in summaries:
+ dic_non_param = summary.get('non_parametric')
+ if dic_non_param:
+ lower_range = dic_non_param.get('lower_range_org')
+ upper_range = dic_non_param.get('upper_range_org')
+
+ if lower_range is None:
+ p25, p75 = np.percentile(notna_series_y, [25, 75])
+ iqr = p75 - p25
+ lower_range = p25 - 2.5 * iqr
+ upper_range = p75 + 2.5 * iqr
+
+ lower_outlier_idxs = series_y[series_y < lower_range].index.tolist() if lower_range is not None else []
+ upper_outlier_idxs = series_y[series_y > upper_range].index.tolist() if upper_range is not None else []
+
+ if lower_range and series_y.min() >= 0:
+ lower_range = max(0, lower_range)
+
+ if upper_range and series_y.max() <= 0:
+ upper_range = min(0, upper_range)
+
+ lower_range, upper_range = extend_min_max(lower_range, upper_range)
+
+ return {Y_MIN: lower_range, Y_MAX: upper_range,
+ LOWER_OUTLIER_IDXS: lower_outlier_idxs,
+ UPPER_OUTLIER_IDXS: upper_outlier_idxs}
+
+
+def calc_setting_scale_y(plotdata, series_y):
+ # calculate upper/lower limit
+ chart_infos = plotdata.get(CHART_INFOS)
+ dic_scale_auto = plotdata.get(SCALE_AUTO, {})
+ if not chart_infos:
+ if dic_scale_auto:
+ return dic_scale_auto
+
+ return {Y_MIN: series_y.min(), Y_MAX: series_y.max()}
+
+ ymin, ymax = get_min_max_of_all_chart_infos(chart_infos)
+ if ymin is None and ymax is None:
+ if dic_scale_auto:
+ return dic_scale_auto
+
+ return {Y_MIN: series_y.min(), Y_MAX: series_y.max()}
+
+ if ymin is None:
+ ymin = dic_scale_auto.get(Y_MIN)
+ lower_outlier_idxs = dic_scale_auto.get(LOWER_OUTLIER_IDXS)
+ else:
+ lower_outlier_idxs = series_y[series_y < ymin].index.tolist()
+
+ if ymax is None:
+ ymax = dic_scale_auto.get(Y_MAX)
+ upper_outlier_idxs = dic_scale_auto.get(UPPER_OUTLIER_IDXS)
+ else:
+ upper_outlier_idxs = series_y[series_y > ymax].index.tolist()
+
+ ymin, ymax = extend_min_max(ymin, ymax)
+
+ return {Y_MIN: ymin, Y_MAX: ymax, LOWER_OUTLIER_IDXS: lower_outlier_idxs, UPPER_OUTLIER_IDXS: upper_outlier_idxs}
+
+
+def calc_threshold_scale_y(plotdata, series_y):
+ # calculate upper/lower limit
+ chart_infos = plotdata.get(CHART_INFOS)
+ dic_scale_auto = plotdata.get(SCALE_AUTO, {})
+ if not chart_infos:
+ return dic_scale_auto
+
+ thresh_low, thresh_high = get_threshold_min_max_chartinfo(chart_infos)
+ if thresh_low is None and thresh_high is None:
+ return dic_scale_auto
+
+ if thresh_low is None:
+ thresh_low = dic_scale_auto.get(THRESH_LOW)
+ lower_outlier_idxs = dic_scale_auto.get(LOWER_OUTLIER_IDXS)
+ else:
+ lower_outlier_idxs = series_y[series_y < thresh_low].index.tolist()
+
+ if thresh_high is None:
+ thresh_high = dic_scale_auto.get(THRESH_HIGH)
+ upper_outlier_idxs = dic_scale_auto.get(UPPER_OUTLIER_IDXS)
+ else:
+ upper_outlier_idxs = series_y[series_y > thresh_high].index.tolist()
+
+ thresh_low, thresh_high = extend_min_max(thresh_low, thresh_high)
+
+ return {Y_MIN: thresh_low, Y_MAX: thresh_high,
+ LOWER_OUTLIER_IDXS: lower_outlier_idxs,
+ UPPER_OUTLIER_IDXS: upper_outlier_idxs}
+
+
+@log_execution_time()
+def calc_scale_info(array_plotdata, min_max_list, all_graph_min, all_graph_max, string_col_ids=None, has_val_idxs=None):
+ dic_datetime_cols = {}
+ for idx, plotdata in enumerate(array_plotdata):
+ # datetime column
+ proc_id = plotdata.get(END_PROC_ID)
+ col_id = plotdata.get(END_COL_ID)
+ if proc_id and proc_id not in dic_datetime_cols:
+ dic_datetime_cols[proc_id] = {cfg_col.id: cfg_col for cfg_col in
+ CfgProcessColumn.get_by_data_type(proc_id, DataType.DATETIME)}
+
+ is_datetime_col = True if col_id in dic_datetime_cols.get(proc_id, {}) else False
+
+ y_min = min_max_list[idx][0]
+ y_min = all_graph_min if y_min is None else y_min
+ y_max = min_max_list[idx][1]
+ y_max = all_graph_max if y_max is None else y_max
+
+ y_min, y_max = extend_min_max(y_min, y_max)
+ all_graph_min, all_graph_max = extend_min_max(all_graph_min, all_graph_max)
+
+ array_y = plotdata.get(ARRAY_Y)
+ array_x = plotdata.get(ARRAY_X)
+ if (not len(array_y)) or (not len(array_x)) or (string_col_ids and plotdata[END_COL_ID] in string_col_ids):
+ dic_base_scale = {Y_MIN: y_min, Y_MAX: y_max, LOWER_OUTLIER_IDXS: [], UPPER_OUTLIER_IDXS: []}
+ plotdata[SCALE_AUTO] = dic_base_scale
+ plotdata[SCALE_SETTING] = dic_base_scale
+ plotdata[SCALE_THRESHOLD] = dic_base_scale
+ plotdata[SCALE_COMMON] = dic_base_scale
+ plotdata[SCALE_FULL] = dic_base_scale
+ continue
+
+ series_x = pd.Series(array_x)
+ series_y = pd.Series(array_y)
+
+ # don't do with all blank idxs
+ if has_val_idxs is not None:
+ series_x = series_x.loc[has_val_idxs]
+ series_y = series_y.loc[has_val_idxs]
+
+ none_idxs = plotdata.get(NONE_IDXS)
+ dic_abnormal_data = detect_abnormal_data(series_x, series_y, none_idxs)
+ plotdata.update(dic_abnormal_data)
+ for _idxs in dic_abnormal_data.values():
+ if _idxs:
+ # array_y[_idxs] = None
+ for _idx in _idxs:
+ array_y[_idx] = None
+
+ series_y = pd.Series(array_y)
+ if has_val_idxs is not None:
+ series_y = series_y.loc[has_val_idxs]
+
+ plotdata[SCALE_AUTO] = calc_auto_scale_y(plotdata, series_y)
+ if is_datetime_col:
+ plotdata[SCALE_AUTO][Y_MIN] = y_min
+
+ plotdata[SCALE_SETTING] = calc_setting_scale_y(plotdata, series_y)
+ plotdata[SCALE_THRESHOLD] = calc_threshold_scale_y(plotdata, series_y)
+ plotdata[SCALE_COMMON] = {Y_MIN: all_graph_min, Y_MAX: all_graph_max, LOWER_OUTLIER_IDXS: [],
+ UPPER_OUTLIER_IDXS: []}
+ plotdata[SCALE_FULL] = {Y_MIN: y_min, Y_MAX: y_max, LOWER_OUTLIER_IDXS: [], UPPER_OUTLIER_IDXS: []}
+ if is_datetime_col:
+ plotdata[SCALE_FULL][Y_MIN] = 0
+
+ return True
+
+
+@log_execution_time()
+@request_timeout_handling()
+def gen_kde_data_trace_data(dic_param, full_arrays=None):
+ array_plotdata = dic_param.get(ARRAY_PLOTDATA)
+ for num, plotdata in enumerate(array_plotdata):
+ full_array_y = full_arrays[num] if full_arrays else None
+ kde_list = calculate_kde_trace_data(plotdata, full_array_y=full_array_y)
+ plotdata[SCALE_SETTING][KDE_DATA], plotdata[SCALE_COMMON][KDE_DATA], plotdata[SCALE_THRESHOLD][KDE_DATA], \
+ plotdata[SCALE_AUTO][KDE_DATA], plotdata[SCALE_FULL][KDE_DATA] = kde_list
+
+ return dic_param
+
+
+def extend_min_max(y_min, y_max):
+ if y_max is None:
+ y_max = y_min * 1.2 if y_min is not None else 1
+
+ if y_min is None:
+ y_min = y_max * 0.8
+
+ if y_min == y_max:
+ y_min *= 0.8
+ y_max *= 1.2
+
+ if y_min == 0 and y_max == 0:
+ y_min = -1
+ y_max = 1
+
+ return y_min, y_max
+
+
+def copy_dic_param_to_thin_dic_param(dic_param, dic_thin_param):
+ ignore_keys = [COMMON, ARRAY_FORMVAL, ARRAY_PLOTDATA, CYCLE_IDS, SERIAL_DATA]
+ for key, val in dic_param.items():
+ if key in ignore_keys:
+ continue
+
+ dic_thin_param[key] = dic_param[key]
+
+ return True
+
+
+def gen_thin_df_cat_exp(dic_param):
+ df = pd.DataFrame()
+ dic_end_cols = {}
+
+ # df['index'] = list(range(len(dic_param[TIMES])))
+ df[Cycle.id.key] = dic_param[CYCLE_IDS]
+ df[Cycle.time.key] = dic_param[TIMES]
+
+ for plot in dic_param[ARRAY_PLOTDATA] or []:
+ time_sql_label = f'time_{plot[END_PROC_ID]}'
+ if time_sql_label not in df.columns:
+ df[time_sql_label] = plot[ARRAY_X]
+
+ sql_label = gen_sql_label(plot[END_COL_ID], plot[END_COL_NAME], plot.get(CAT_EXP_BOX))
+ dic_end_cols[sql_label] = (plot[END_COL_ID], plot[END_COL_NAME], plot.get(CAT_EXP_BOX))
+ df[sql_label] = plot[ARRAY_Y]
+
+ # serials
+ add_serials_to_thin_df(dic_param, df)
+
+ # categories
+ add_categories_to_thin_df(dic_param, df)
+
+ return df, dic_end_cols
+
+
+def get_available_ratio(series: Series):
+ n_total = series.size
+ # na_counts = series.isnull().sum().sum()
+ non_na_counts = len(series.dropna())
+ na_counts = n_total - non_na_counts
+ na_percentage = signify_digit(100 * na_counts / n_total) if n_total else 0
+ non_na_percentage = signify_digit(100 - na_percentage)
+ return dict(nTotal=n_total, nonNACounts=non_na_counts, nonNAPercentage=non_na_percentage)
+
+
+@log_execution_time()
+def add_serials_to_thin_df(dic_param, df):
+ for plot in dic_param[ARRAY_PLOTDATA] or []:
+ proc_id = plot[END_PROC_ID]
+ sql_label = gen_sql_label(SERIAL_DATA, proc_id)
+ if sql_label in df.columns:
+ continue
+
+ serials = dic_param.get(SERIAL_DATA, {}).get(proc_id)
+ if serials is not None and len(serials):
+ df[sql_label] = serials
+
+
+@log_execution_time()
+def add_categories_to_thin_df(dic_param, df):
+ for dic_cate in dic_param.get(CATEGORY_DATA) or []:
+ col_id = dic_cate.get('column_id')
+ col_name = dic_cate.get('column_name')
+ data = dic_cate.get('data')
+ sql_label = gen_sql_label(CATEGORY_DATA, col_id, col_name)
+ if sql_label in df.columns:
+ continue
+
+ df[sql_label] = data
+
+
+@log_execution_time()
+def get_serials_and_date_col(graph_param: DicParam, dic_proc_cfgs):
+ dic_output = {}
+ for proc in graph_param.array_formval:
+ proc_cfg = dic_proc_cfgs[proc.proc_id]
+ serial_cols = proc_cfg.get_serials(column_name_only=False)
+ datetime_col = proc_cfg.get_date_col(column_name_only=False)
+ dic_output[proc.proc_id] = (datetime_col, serial_cols)
+
+ return dic_output
+
+
+@log_execution_time()
+def gen_cat_exp_names(cat_exps):
+ if cat_exps:
+ return [gen_sql_label(CAT_EXP_BOX, level) for level, _ in enumerate(cat_exps, 1)]
+
+ return None
+
+
+@log_execution_time()
+def gen_unique_data(df, dic_proc_cfgs, col_ids, ):
+ if not col_ids:
+ return {}
+
+ dic_unique_cate = {}
+ dic_cols = {col.id: col for col in CfgProcessColumn.get_by_ids(col_ids)}
+ for col_id in col_ids:
+ cfg_col = dic_cols.get(col_id)
+ col_name = cfg_col.column_name
+ master_name = cfg_col.name
+ proc_id = cfg_col.process_id
+
+ sql_label = gen_sql_label(RANK_COL, col_id, col_name)
+ if sql_label not in df.columns:
+ sql_label = gen_sql_label(col_id, col_name)
+
+ unique_data = []
+ if sql_label in df.columns:
+ # unique_data = df[sql_label].drop_duplicates().dropna().tolist()
+ s = df[sql_label].value_counts()
+ unique_data = s.index.tolist()
+
+ cfg_proc_name = dic_proc_cfgs[proc_id].name
+ unique_data = {'proc_name': proc_id, 'proc_master_name': cfg_proc_name, 'column_name': col_name,
+ 'column_master_name': master_name, 'column_id': col_id,
+ UNIQUE_CATEGORIES: unique_data}
+
+ dic_unique_cate[col_id] = unique_data
+
+ return dic_unique_cate
+
+
+@log_execution_time()
+def filter_df(df, dic_filter):
+ if not dic_filter:
+ return df
+
+ dic_names = {col.id: col for col in CfgProcessColumn.get_by_ids(dic_filter)}
+ for col_id, vals in dic_filter.items():
+ if not vals:
+ continue
+
+ if not isinstance(vals, (list, tuple)):
+ vals = [vals]
+
+ if NO_FILTER in vals:
+ continue
+
+ vals = [val for val in vals if val not in [SELECT_ALL, NO_FILTER]]
+ if not vals:
+ continue
+
+ cfg_col = dic_names.get(col_id, None)
+ if cfg_col is None:
+ continue
+
+ sql_label = gen_sql_label(RANK_COL, col_id, cfg_col.column_name)
+ if sql_label not in df.columns:
+ sql_label = gen_sql_label(col_id, cfg_col.column_name)
+
+ dtype_name = cfg_col.data_type
+ if dtype_name == DataType.INTEGER.name:
+ vals = [int(val) for val in vals]
+ elif dtype_name == DataType.REAL.name:
+ vals = [float(val) for val in vals]
+ elif dtype_name == DataType.TEXT.name:
+ vals = [str(val) for val in vals]
+ df[sql_label] = df[sql_label].astype(str)
+
+ df = df[df[sql_label].isin(vals)]
+
+ return df
+
+
+def gen_category_info(dic_param, dic_ranks):
+ for plot in dic_param[ARRAY_PLOTDATA]:
+ if plot[END_COL_ID] in dic_ranks:
+ # category variable
+ p_array_y = pd.Series(plot[ARRAY_Y]).dropna().tolist()
+ cat_size = 0
+ if len(p_array_y):
+ cat_size = np.unique(p_array_y).size
+ plot[CAT_TOTAL] = cat_size
+ plot[IS_CAT_LIMITED] = True if cat_size >= MAX_CATEGORY_SHOW else False
+ return dic_param
+
+
+@memoize()
+def get_cfg_proc_col_info(col_ids):
+ dic_cols = {cfg_col.id: cfg_col for cfg_col in CfgProcessColumn.get_by_ids(col_ids)}
+ proc_ids = list(set(cfg_col.process_id for cfg_col in dic_cols.values()))
+ dic_procs = {cfg_proc.id: cfg_proc for cfg_proc in CfgProcess.get_procs(proc_ids)}
+
+ return dic_procs, dic_cols
diff --git a/histview2/categorical_plot/__init__.py b/histview2/categorical_plot/__init__.py
new file mode 100644
index 0000000..8a22657
--- /dev/null
+++ b/histview2/categorical_plot/__init__.py
@@ -0,0 +1,4 @@
+
+def create_module(app, **kwargs):
+ from histview2.categorical_plot.controllers import categorical_plot_blueprint
+ app.register_blueprint(categorical_plot_blueprint)
diff --git a/histview2/categorical_plot/controllers.py b/histview2/categorical_plot/controllers.py
new file mode 100644
index 0000000..f4244ac
--- /dev/null
+++ b/histview2/categorical_plot/controllers.py
@@ -0,0 +1,27 @@
+import os
+
+from flask import Blueprint, render_template
+
+from histview2 import dic_yaml_config_file
+from histview2.common.constants import *
+from histview2.common.services.form_env import get_common_config_data
+
+categorical_plot_blueprint = Blueprint(
+ 'categorical_plot',
+ __name__,
+ template_folder=os.path.join('..', 'templates', 'categorical_plot'),
+ static_folder=os.path.join('..', 'static', 'categorical_plot'),
+ url_prefix='/histview2'
+)
+
+# ローカルパラメータの設定
+local_params = {
+ "config_yaml_fname_proc": dic_yaml_config_file[YAML_CONFIG_PROC],
+ "config_yaml_fname_histview2": dic_yaml_config_file[YAML_CONFIG_HISTVIEW2],
+ "config_yaml_fname_db": dic_yaml_config_file[YAML_CONFIG_DB]}
+
+
+@categorical_plot_blueprint.route('/stp')
+def categorical_plot():
+ output_dict = get_common_config_data()
+ return render_template("categorical_plot.html", **output_dict)
diff --git a/histview2/categorical_plot/services/__init__.py b/histview2/categorical_plot/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/histview2/categorical_plot/services/utils.py b/histview2/categorical_plot/services/utils.py
new file mode 100644
index 0000000..422901d
--- /dev/null
+++ b/histview2/categorical_plot/services/utils.py
@@ -0,0 +1,30 @@
+from histview2.common.constants import *
+from histview2.common.yaml_utils import YamlConfig, DBConfigYaml, ProcConfigYaml
+
+
+def get_valid_procs(procs):
+ """
+ Get valid process to show on selectbox 起点
+ Arguments:
+ procs {dict}
+
+ Returns:
+ dict -- valid process on 起点
+ """
+ proc_list = {}
+ filter_info = procs['filter_info']
+ proc_master = procs['proc_master']
+
+ for key, value in filter_info.items():
+ if len(filter_info[key]) > 0:
+ filter_time = False
+ for item in filter_info[key]:
+ if item.get('item_info', {}) \
+ and item['item_info'].get('type') \
+ and item['item_info']['type'] == 'datehour-range':
+ filter_time = True
+ if filter_time:
+ proc_list.update({key: proc_master[key]})
+
+ return proc_list
+
diff --git a/histview2/co_occurrence/__init__.py b/histview2/co_occurrence/__init__.py
new file mode 100644
index 0000000..473b2c4
--- /dev/null
+++ b/histview2/co_occurrence/__init__.py
@@ -0,0 +1,4 @@
+
+def create_module(app, **kwargs):
+ from .controllers import co_occurrence_blueprint
+ app.register_blueprint(co_occurrence_blueprint)
diff --git a/histview2/co_occurrence/controllers.py b/histview2/co_occurrence/controllers.py
new file mode 100644
index 0000000..e585994
--- /dev/null
+++ b/histview2/co_occurrence/controllers.py
@@ -0,0 +1,19 @@
+import os
+
+from flask import Blueprint, render_template
+
+from histview2.common.services.form_env import get_common_config_data
+
+co_occurrence_blueprint = Blueprint(
+ 'co_occurrence',
+ __name__,
+ template_folder=os.path.join('..', 'templates', 'co_occurrence'),
+ static_folder=os.path.join('..', 'static', 'co_occurrence'),
+ url_prefix='/histview2'
+)
+
+
+@co_occurrence_blueprint.route('/cog')
+def index():
+ output_dict = get_common_config_data(get_visualization_config=False)
+ return render_template("co_occurrence_csv.html", **output_dict)
diff --git a/histview2/co_occurrence/services/__init__.py b/histview2/co_occurrence/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/histview2/co_occurrence/services/utils.py b/histview2/co_occurrence/services/utils.py
new file mode 100644
index 0000000..e69de29
diff --git a/histview2/common/__init__.py b/histview2/common/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/histview2/common/backup_db.py b/histview2/common/backup_db.py
new file mode 100644
index 0000000..c620b0c
--- /dev/null
+++ b/histview2/common/backup_db.py
@@ -0,0 +1,67 @@
+import os
+from datetime import datetime
+
+from apscheduler.triggers.cron import CronTrigger
+
+from histview2 import log_execution, dic_config, SQLITE_CONFIG_DIR, APP_DB_FILE, UNIVERSAL_DB_FILE, make_dir
+from histview2.common.common_utils import copy_file
+from histview2.common.logger import log_execution_time
+from histview2.common.scheduler import scheduler_app_context, JobType, add_job_to_scheduler
+from histview2.setting_module.services.background_process import send_processing_info
+from histview2.common.constants import AUTO_BACKUP
+from histview2.common.yaml_utils import BasicConfigYaml
+
+BACKUP_TRANS_DATA_INTERVAL_DAY = 7
+CONFIG_BK_PATH = os.path.join(dic_config[SQLITE_CONFIG_DIR], 'backup')
+
+
+@log_execution()
+def backup_config_db():
+ db_file_path = dic_config[APP_DB_FILE]
+ make_dir(CONFIG_BK_PATH)
+ copy_file(db_file_path, CONFIG_BK_PATH)
+
+
+@log_execution()
+def backup_universal_db():
+ db_file_path = dic_config[UNIVERSAL_DB_FILE]
+ today = datetime.now()
+ created_date = datetime.fromtimestamp(os.path.getctime(db_file_path))
+ if (today - created_date).days < BACKUP_TRANS_DATA_INTERVAL_DAY:
+ return
+
+ make_dir(CONFIG_BK_PATH)
+ copy_file(dic_config[UNIVERSAL_DB_FILE], CONFIG_BK_PATH)
+
+
+@log_execution()
+def backup_dbs():
+ basic_config_yaml = BasicConfigYaml()
+ auto_backup = BasicConfigYaml.get_node(basic_config_yaml.dic_config, ['info', AUTO_BACKUP], False)
+ yield 0
+ backup_config_db()
+ if auto_backup:
+ yield 50
+ backup_universal_db()
+ yield 100
+
+
+@scheduler_app_context
+def backup_dbs_job(_job_id, _job_name, *args, **kwargs):
+ """ backup config database
+
+ Keyword Arguments:
+ _job_id {[type]} -- [description] (default: {None})
+ _job_name {[type]} -- [description] (default: {None})
+ """
+ gen = backup_dbs(*args, **kwargs)
+ send_processing_info(gen, JobType.BACKUP_DATABASE)
+
+
+@log_execution_time()
+def add_backup_dbs_job(is_run_now=None):
+ job_name = JobType.BACKUP_DATABASE.name
+ trigger = CronTrigger(hour=3, minute=0, second=0)
+ kwargs = dict(_job_id=job_name, _job_name=job_name)
+ add_job_to_scheduler(job_id=job_name, job_name=job_name, trigger=trigger, import_func=backup_dbs_job,
+ run_now=is_run_now, dic_import_param=kwargs)
diff --git a/histview2/common/check_available_port.py b/histview2/common/check_available_port.py
new file mode 100644
index 0000000..14e0bc1
--- /dev/null
+++ b/histview2/common/check_available_port.py
@@ -0,0 +1,29 @@
+import socket as s
+import sys
+
+from loguru import logger
+
+from histview2.common.common_utils import parse_int_value
+from histview2.common.logger import log_execution
+
+
+@log_execution()
+def check_available_port(port):
+ port = parse_int_value(port)
+ sock = s.socket(s.AF_INET, s.SOCK_STREAM)
+ sock.settimeout(1)
+ try:
+ result = sock.connect(('127.0.0.1', port))
+ if not result:
+ logger.info("Port %d is not available right now, please check and run again." % (port))
+ input("Please type any key to close application\n")
+ if input:
+ sys.exit()
+ except (s.timeout, s.gaierror) as ex:
+ logger.error("Checking port availability timeout!", ex)
+ # logger.exception(ex)
+ except Exception as ex:
+ logger.error("Checking port availability error!", ex)
+ # logger.exception(ex)
+ finally:
+ sock.close()
diff --git a/histview2/common/clean_old_data.py b/histview2/common/clean_old_data.py
new file mode 100644
index 0000000..a687d22
--- /dev/null
+++ b/histview2/common/clean_old_data.py
@@ -0,0 +1,185 @@
+import datetime as dt
+import os
+import shutil
+import sys
+
+from apscheduler.triggers import interval
+from pytz import utc
+
+from histview2 import check_exist
+from histview2.common.logger import log_execution_time, log_execution
+from histview2.common.scheduler import scheduler_app_context, JobType, scheduler
+from histview2.script.hide_exe_root_folder import heartbeat_bundle_folder
+from histview2.setting_module.services.background_process import send_processing_info
+
+
+@scheduler_app_context
+def clean_old_data_job(_job_id=None, _job_name=None, *args, **kwargs):
+ """ scheduler job to delete process from db
+
+ Keyword Arguments:
+ _job_id {[type]} -- [description] (default: {None})
+ _job_name {[type]} -- [description] (default: {None})
+ """
+ gen = clean_old_files(*args, **kwargs)
+ send_processing_info(gen, JobType.CLEAN_DATA, is_check_disk=False)
+
+
+@log_execution_time()
+def clean_old_files(folder=None, num_day_ago=30):
+ """ Delete old files in a folder
+ Arguments:
+ prefix {[type]} -- [file prefix]
+ postfix {[type]} -- [file postfix]
+
+ Keyword Arguments:
+ db_id {[type]} -- [description] (default: {None})
+ Yields:
+ [type] -- [description]
+ """
+ percent = 0
+ yield percent
+
+ cleanup_unused_exe_folder()
+
+ if not folder:
+ yield 100
+ return
+
+ files = get_files_of_last_n_days(folder, num_day_ago=num_day_ago, subdirectory=True)
+ percent_step = round(100 / (len(files) + 1))
+ for file in files:
+ try:
+ os.remove(file)
+ except Exception:
+ pass
+ percent = percent + percent_step
+ yield percent
+
+ yield 100
+
+
+@log_execution_time()
+def get_files_of_last_n_days(directory, num_day_ago=30, subdirectory=False, extension=None):
+ """get file in folder
+
+ Arguments:
+ directory {[type]} -- [description]
+ num_days {int} -- [description] (default: {1})
+ subdirectory {int} -- [description]
+ extension {string} -- [use created_time or modified_time] (default: {False})
+
+ Keyword Arguments:
+
+ Returns:
+ output_files [type] -- [files to be cleaned]
+ """
+ now = dt.datetime.utcnow()
+ n_days_ago = now - dt.timedelta(days=num_day_ago)
+
+ output_files = []
+ if not directory:
+ return output_files
+
+ root_folder = True
+ for root, dirs, files in os.walk(directory):
+ # limit depth of recursion
+ if subdirectory is False and root_folder is False:
+ break
+
+ # list files
+ for file in files:
+ if extension and not file.endswith(extension):
+ continue
+
+ abs_file_name = os.path.join(root, file)
+ st = os.stat(abs_file_name)
+
+ time_of_file = dt.datetime.fromtimestamp(st.st_mtime)
+ if time_of_file <= n_days_ago:
+ output_files.append(abs_file_name)
+
+ root_folder = False
+
+ return output_files
+
+
+@log_execution_time()
+def get_folders_of_last_n_days(directory, num_day_ago=30, startswith=None):
+ """get file in folder
+
+ Arguments:
+ directory {[type]} -- [description]
+ num_days {int} -- [description] (default: {1})
+ startswith {string} -- [use created_time or modified_time] (default: {False})
+
+ Keyword Arguments:
+
+ Returns:
+ output_files [type] -- [files to be cleaned]
+ """
+ now = dt.datetime.utcnow()
+ n_days_ago = now - dt.timedelta(days=num_day_ago)
+
+ output_folders = []
+ if not directory:
+ return output_folders
+
+ root_folder = True
+ for root, folders, files in os.walk(directory):
+ # list dirs
+ for folder in folders:
+ if startswith and not folder.startswith(startswith):
+ continue
+
+ abs_folder_name = os.path.join(root, folder)
+ st = os.stat(abs_folder_name)
+
+ time_of_file = dt.datetime.fromtimestamp(st.st_mtime)
+ if time_of_file <= n_days_ago:
+ output_folders.append(abs_folder_name)
+
+ break
+
+ return output_folders
+
+
+@log_execution()
+def cleanup_unused_exe_folder():
+ if not getattr(sys, 'frozen', False):
+ return
+
+ current_app_folder, current_file, file_ext = heartbeat_bundle_folder()
+ root_folder = os.path.dirname(current_app_folder)
+ folders = get_folders_of_last_n_days(root_folder, num_day_ago=2, startswith='_MEI')
+ files = get_files_of_last_n_days(root_folder, num_day_ago=2, extension=file_ext)
+ for folder_path in folders:
+ file_path = folder_path + file_ext
+ if file_path in files or not check_exist(file_path):
+ try:
+ os.remove(file_path)
+ shutil.rmtree(folder_path)
+ except Exception:
+ pass
+
+
+@log_execution()
+def run_clean_data_job(folder='.', num_day_ago=30, job_repeat_sec=86400, job_id=-1):
+ """ Trigger cleaning data job
+ :return:
+ """
+ clean_job_id = f'{JobType.CLEAN_DATA.name}'
+ interval_trigger = interval.IntervalTrigger(seconds=job_repeat_sec, timezone=utc)
+
+ scheduler.add_job(
+ clean_job_id, clean_old_data_job,
+ trigger=interval_trigger,
+ replace_existing=True,
+ next_run_time=dt.datetime.now().astimezone(utc),
+ kwargs=dict(
+ _job_id=clean_job_id,
+ _job_name=JobType.CLEAN_DATA.name,
+ folder=folder,
+ num_day_ago=num_day_ago,
+ )
+ )
diff --git a/histview2/common/common_utils.py b/histview2/common/common_utils.py
new file mode 100644
index 0000000..27a6fa7
--- /dev/null
+++ b/histview2/common/common_utils.py
@@ -0,0 +1,1055 @@
+import copy
+import csv
+import fnmatch
+import os
+import pickle
+import re
+import shutil
+import socket
+import sys
+from collections import OrderedDict
+from datetime import datetime, timedelta
+from itertools import islice, chain, permutations
+
+import chardet
+import numpy as np
+import pandas as pd
+import pyper
+# from charset_normalizer import detect
+from dateutil import parser
+from dateutil.relativedelta import relativedelta
+from flask import g
+from pandas import DataFrame
+
+from histview2.common.constants import AbsPath, DataType, YAML_AUTO_INCREMENT_COL, CsvDelimiter, R_PORTABLE, \
+ SQL_COL_PREFIX, FilterFunc, ENCODING_ASCII, ENCODING_UTF_8, FlaskGKey, LANGUAGES
+from histview2.common.logger import logger, log_execution_time
+from histview2.common.services.normalization import unicode_normalize_nfkc
+
+INCLUDES = ['*.csv', '*.tsv']
+DATE_FORMAT_STR = '%Y-%m-%dT%H:%M:%S.%fZ'
+DATE_FORMAT_QUERY = '%Y-%m-%dT%H:%M:%S.%f'
+DATE_FORMAT_STR_CSV = '%Y-%m-%d %H:%M:%S.%f'
+DATE_FORMAT_STR_CSV_FOLDER = '%Y%m%d'
+DATE_FORMAT_STR_FACTORY_DB = '%Y-%m-%d %H:%M:%S.%f'
+DATE_FORMAT_STR_ONLY_DIGIT = '%Y%m%d%H%M%S.%f'
+DATE_FORMAT = '%Y-%m-%d'
+TIME_FORMAT = '%H:%M'
+RL_DATETIME_FORMAT = '%Y-%m-%dT%H:%M'
+
+
+def get_current_timestamp(format_str=DATE_FORMAT_STR):
+ return datetime.utcnow().strftime(format_str)
+
+
+def parse_int_value(value):
+ """
+ Parse integral value from text or numeric data
+ :param value:
+ :return: parsed integral value.
+ """
+ if type(value) is str:
+ value = unicode_normalize_nfkc(value)
+ if value.isdigit():
+ return int(value)
+ elif type(value) is int:
+ return value
+
+ return None
+
+
+def dict_deep_merge(source, destination):
+ """
+ Deep merge two dictionary to one.
+
+ >>> a = { 'first' : { 'all_rows' : { 'pass' : 'dog', 'number' : '1' } } }
+ >>> b = { 'first' : { 'all_rows' : { 'fail' : 'cat', 'number' : '5' } } }
+ >>> dict_deep_merge(b, a) == { 'first' : { 'all_rows' : { 'pass' : 'dog', 'fail' : 'cat', 'number' : '5' } } }
+ """
+ if source:
+ for key, value in source.items():
+ if isinstance(value, dict) and destination.get(key):
+ # get node or create one
+ node = destination.setdefault(key, {})
+ dict_deep_merge(value, node)
+ else:
+ destination[key] = copy.deepcopy(value)
+
+ return destination
+
+
+def convert_json_to_ordered_dict(json):
+ """
+ Deeply convert a normal dict to OrderedDict.
+ :param json: input json
+ :return: ordered json
+ """
+ if isinstance(json, dict):
+ ordered_json = OrderedDict(json)
+ try:
+ for key, value in ordered_json.items():
+ ordered_json[key] = convert_json_to_ordered_dict(value)
+ except AttributeError:
+ pass
+ return ordered_json
+
+ return json
+
+
+def get_columns_selected(histview_cfg, proc_cfg):
+ date_col = ''
+ result = serial_cols = alias_names = column_names = master_names = operators = coefs = []
+ check_columns = proc_cfg.get('checked-columns', {})
+
+ # Get date-column
+ if histview_cfg.get('date-column'):
+ date_col = re.sub(r'(["*\/\'\s]+)', '', re.split(' as ', histview_cfg['date-column'])[0])
+ # date_col = re.sub(r'(["*\/\'\s]+)', '', re.split(r'[-+*/]\d+', hvc['date-column'])[0])
+
+ # Get serial column
+ if histview_cfg.get('serial-column'):
+ serial_cols = list(map(lambda x: re.sub(r'(["*\/\'\s]+)', '', re.split(' as ', x)[0]),
+ histview_cfg['serial-column']))
+
+ # Get serial column
+ auto_increment_col = histview_cfg.get(YAML_AUTO_INCREMENT_COL)
+
+ # Get params from checked-columns
+ if check_columns:
+ alias_names = check_columns.get('alias-names', []) or []
+ column_names = check_columns.get('column-names', []) or []
+ master_names = check_columns.get('master-names', []) or []
+ operators = check_columns.get('operators', []) or []
+ coefs = check_columns.get('coefs', []) or []
+ data_types = check_columns.get('data-types', []) or []
+
+ # Merge params to result dict
+ for key, value in enumerate(column_names):
+ column_name = re.sub(r'(["*\/\'\s]+)', '', value)
+ alias = alias_names[key]
+ master_name = master_names[key]
+ operator = operators[key]
+ coef = coefs[key]
+ data_type = data_types[key]
+
+ is_datetime = True if (value == date_col) else False
+ is_serial = True if (value in serial_cols) else False
+ is_auto_increment = value == auto_increment_col
+
+ result.append({
+ 'master_name': master_name,
+ 'column_name': column_name,
+ 'alias': alias,
+ 'operator': operator,
+ 'coef': coef,
+ 'is_datetime': is_datetime,
+ 'is_serial': is_serial,
+ 'is_auto_increment': is_auto_increment,
+ 'data_type': data_type
+ })
+ return result
+
+
+def _excludes(root, folders):
+ fd = folders[:]
+ ex = []
+ try:
+ for folder in fd:
+ if datetime.strptime(folder, '%Y%m%d'):
+ ex.append(folder)
+ except Exception:
+ pass
+ ex.sort()
+
+ if len(fd) > 0:
+ fd.remove(ex[-1])
+ return map(lambda d: os.path.join(root, d), fd)
+
+
+def _filter(paths, excludes):
+ matches = []
+ for path in paths:
+ append = None
+
+ for include in INCLUDES:
+ if os.path.isdir(path):
+ append = True
+ break
+
+ if fnmatch.fnmatch(path, include):
+ append = True
+ break
+
+ for exclude in excludes:
+ if os.path.isdir(path) and path == exclude:
+ append = False
+ break
+
+ if fnmatch.fnmatch(path, exclude):
+ append = False
+ break
+
+ if append:
+ matches.append(path)
+
+ return matches
+
+
+def get_latest_file(root_name):
+ try:
+ latest_files = get_files(root_name, depth_from=1, depth_to=100, extension=['csv', 'tsv'])
+ latest_files.sort()
+ return latest_files[-1].replace(os.sep, '/')
+ except Exception:
+ return ''
+
+
+def get_sorted_files(root_name):
+ try:
+ latest_files = get_files(root_name, depth_from=1, depth_to=100, extension=['csv', 'tsv'])
+ latest_files = [file_path.replace(os.sep, '/') for file_path in latest_files]
+ latest_files.sort(reverse=True)
+ return latest_files
+ except Exception:
+ return []
+
+
+def start_of_minute(start_date, start_tm, delimeter='T'):
+ if start_date is None or start_tm is None:
+ return None
+ if start_tm and len(start_tm) == 5:
+ start_tm = start_tm + ':00'
+
+ return '{}{}{}'.format(start_date.replace('/', '-'), delimeter, start_tm)
+
+
+def end_of_minute(start_date, start_tm, delimeter='T'):
+ if start_date is None or start_tm is None:
+ return None
+ if start_tm and len(start_tm) == 5:
+ start_tm = start_tm + ':00'
+
+ start_tm = start_tm[:8] + '.999999'
+
+ return '{}{}{}'.format(start_date.replace('/', '-'), delimeter, start_tm)
+
+
+def clear_special_char(target):
+ if not target:
+ return target
+
+ if isinstance(target, (list, tuple)):
+ return [_clear_special_char(s) for s in target]
+ elif isinstance(target, str):
+ return _clear_special_char(target)
+
+
+def _clear_special_char(target_str):
+ if not target_str:
+ return target_str
+
+ output = target_str
+ for s in ('\"', '\'', '*'):
+ output = output.replace(s, '')
+
+ return output
+
+
+def universal_db_exists():
+ universal_db = os.path.join(os.getcwd(), 'instance', 'universal.sqlite3')
+ # if getattr(sys, 'frozen', False): # Use for EXE file only
+ instance_folder = os.path.join(os.getcwd(), 'instance')
+ if not os.path.exists(instance_folder):
+ os.makedirs(instance_folder)
+ return os.path.exists(universal_db)
+
+
+# convert time before save to database YYYY-mm-DDTHH:MM:SS.NNNNNNZ
+def convert_time(time=None, format_str=DATE_FORMAT_STR, return_string=True, only_milisecond=False):
+ if not time:
+ time = datetime.utcnow()
+ elif isinstance(time, str):
+ time = parser.parse(time)
+
+ if return_string:
+ time = time.strftime(format_str)
+ if only_milisecond:
+ time = time[:-3]
+ return time
+
+
+def fast_convert_time(time, format_str=DATE_FORMAT_STR):
+ return parser.parse(time).strftime(format_str)
+
+
+def add_miliseconds(time=None, milis=0):
+ """add miliseconds
+
+ Keyword Arguments:
+ time {[type]} -- [description] (default: {datetime.now()})
+ days {int} -- [description] (default: {0})
+
+ Returns:
+ [type] -- [description]
+ """
+ if not time:
+ time = datetime.utcnow()
+
+ return time + timedelta(milliseconds=milis)
+
+
+def add_seconds(time=None, seconds=0):
+ """add seconds
+
+ Keyword Arguments:
+ time {[type]} -- [description] (default: {datetime.now()})
+ days {int} -- [description] (default: {0})
+
+ Returns:
+ [type] -- [description]
+ """
+ if not time:
+ time = datetime.utcnow()
+
+ return time + timedelta(seconds=seconds)
+
+
+def add_days(time=datetime.utcnow(), days=0):
+ """add days
+
+ Keyword Arguments:
+ time {[type]} -- [description] (default: {datetime.now()})
+ days {int} -- [description] (default: {0})
+
+ Returns:
+ [type] -- [description]
+ """
+ return time + timedelta(days)
+
+
+def add_years(time=datetime.utcnow(), years=0):
+ """add days
+
+ Keyword Arguments:
+ time {[type]} -- [description] (default: {datetime.now()})
+ years {int} -- [description] (default: {0})
+
+ Returns:
+ [type] -- [description]
+ """
+ return time + relativedelta(years=years)
+
+
+def get_files(directory, depth_from=1, depth_to=2, extension=[''], file_name_only=False):
+ """get files in folder
+
+ Arguments:
+ directory {[type]} -- [description]
+
+ Keyword Arguments:
+ depth_limit {int} -- [description] (default: 2)
+ extension {list} -- [description] (default: [''])
+
+ Returns:
+ [type] -- [description]
+ """
+ output_files = []
+ if not directory:
+ return output_files
+
+ if not check_exist(directory):
+ raise Exception('Folder not found!')
+
+ root_depth = directory.count(os.path.sep)
+ for root, dirs, files in os.walk(directory):
+ # limit depth of recursion
+ current_depth = root.count(os.path.sep) + 1
+ # assume that directory depth is 1, sub folders are 2, 3, ...
+ # default is to just read children sub folder, depth from 1 to 2
+ if (current_depth < root_depth + depth_from) or (current_depth > root_depth + depth_to):
+ continue
+
+ # list files with extension
+ for file in files:
+ # Check file is modified in [in_modified_days] days or not
+ if any([file.endswith(ext) for ext in extension]):
+ if file_name_only:
+ output_files.append(file)
+ else:
+ output_files.append(os.path.join(root, file))
+
+ return output_files
+
+
+def add_double_quotes(instr: str):
+ """add double quotes to a string (column name)
+
+ Arguments:
+ instr {str} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+ if not instr:
+ return instr
+
+ instr = instr.strip('\"')
+
+ return f'"{instr}"'
+
+
+def guess_data_types(instr: str):
+ """ guess data type of all kind of databases to 4 type (INTEGER,REAL,DATETIME,TEXT)
+
+ Arguments:
+ instr {str} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+ dates = ['date', 'time']
+ ints = ['int', 'bit', r'num.*\([^,]+$', r'num.*\(.*,\ *0']
+ reals = ['num', 'real', 'float', 'double', 'long', 'dec', 'money']
+
+ instr = instr.lower()
+ for data_type in dates:
+ if re.search(data_type, instr):
+ return DataType.DATETIME
+
+ for data_type in ints:
+ if re.search(data_type, instr):
+ return DataType.INTEGER
+
+ for data_type in reals:
+ if re.search(data_type, instr):
+ return DataType.REAL
+ return DataType.TEXT
+
+
+def resource_path(*relative_path, level=None):
+ """ make absolute path
+
+ Keyword Arguments:
+ level {int} -- [0: auto, 1: user can see folder, 2: user can not see folder(MEIPASS)] (default: {0})
+
+ Returns:
+ [type] -- [description]
+ """
+
+ show_path = os.getcwd()
+ hide_path = getattr(sys, '_MEIPASS', show_path)
+
+ if level is AbsPath.SHOW:
+ basedir = show_path
+ elif level is AbsPath.HIDE:
+ basedir = hide_path
+ else:
+ if getattr(sys, 'frozen', False):
+ basedir = hide_path
+ else:
+ basedir = show_path
+
+ return os.path.join(basedir, *relative_path)
+
+
+class RUtils:
+ def __init__(self, package, *args, **kwargs):
+ # r instance
+ r_portable_env = os.environ.get('R-PORTABLE')
+ if r_portable_env:
+ self.r_exe = os.path.join(r_portable_env, 'bin', 'R.exe')
+ self.r_library = os.path.join(r_portable_env, 'library')
+ else:
+ self.r_exe = resource_path(R_PORTABLE, 'bin', 'R.exe', level=AbsPath.SHOW)
+ self.r_library = resource_path(R_PORTABLE, 'library', level=AbsPath.SHOW)
+
+ # specify R-Portable execution
+ self.r = pyper.R(RCMD=self.r_exe, *args, **kwargs)
+ logger.info(self.r('Sys.getenv()'))
+
+ # specify R-Portable library
+ self.r(f'.libPaths(c(""))')
+ self.r(f'.libPaths(c("{self.r_library}"))')
+ logger.info(self.r('.libPaths()'))
+
+ # R package folder
+ self.source = resource_path('histview2', 'script', 'r_scripts', package)
+ self.r(f'source("{self.source}")')
+
+ def __call__(self, func, *args, _number_of_recheck_r_output=1000, **kwargs) -> object:
+ """ call funtion with parameters
+
+ Arguments:
+ func {[string]} -- R function name
+
+ Keyword Arguments:
+ _number_of_recheck_r_output {int} -- [R function may take time to return output, Python must check many
+ time to get final output] (default: {100})
+
+ Returns:
+ [type] -- [R output]
+ """
+ args_prefix = 'args__'
+ kwargs_prefix = 'kwargs__'
+ output_var = 'output__'
+
+ r_args = []
+ for i, val in enumerate(args):
+ para = f'{args_prefix}{i}'
+ self.r.assign(para, val)
+ r_args.append(para)
+
+ r_kwargs = []
+ for i, (key, val) in enumerate(kwargs.items()):
+ para = f'{kwargs_prefix}{i}'
+ self.r.assign(para, val)
+ r_kwargs.append(f'{key}={para}')
+
+ final_args = ','.join(chain(r_args, r_kwargs))
+
+ self.r(f'{output_var} = {func}({final_args})')
+
+ # wait for R return an output
+ output = None
+ while (not output) and _number_of_recheck_r_output:
+ output = self.r.get(output_var)
+ _number_of_recheck_r_output -= 1
+
+ print(_number_of_recheck_r_output, output)
+ return output
+
+
+def get_file_size(f_name):
+ """get file size
+
+ Arguments:
+ f_name {[type]} -- [description]
+
+ Returns:
+ [type] -- [description]
+ """
+ return os.path.getsize(f_name)
+
+
+def write_csv_file(data, file_path, headers, delimiter='\t'):
+ """save csv, tsv file
+
+ Arguments:
+ data {[type]} -- [description]
+ file_path {[type]} -- [description]
+ headers {[type]} -- [description]
+
+ Keyword Arguments:
+ delimiter {str} -- [description] (default: {'\t'})
+ """
+ make_dir_from_file_path(file_path)
+
+ with open(file_path, 'w', newline='') as f:
+ writer = csv.writer(f, delimiter=delimiter)
+ for row in chain([headers], data):
+ writer.writerow(row)
+
+
+def create_file_path(prefix, suffix='.tsv', dt=None):
+ f_name = f'{prefix}_{convert_time(dt, format_str=DATE_FORMAT_STR_ONLY_DIGIT)}{suffix}'
+ file_path = resource_path(get_data_path(abs=False), f_name, level=AbsPath.SHOW)
+
+ if not os.path.exists(os.path.dirname(file_path)):
+ os.makedirs(os.path.dirname(file_path))
+
+ return file_path
+
+
+def copy_file(source, target):
+ """copy file
+
+ Arguments:
+ source {[type]} -- [description]
+ target {[type]} -- [description]
+ """
+ if not check_exist(source):
+ return False
+
+ shutil.copy2(source, target)
+ return True
+
+
+def path_split_all(path):
+ """split all part of a path
+
+ Arguments:
+ path {[string]} -- [full path]
+ """
+ allparts = []
+ while True:
+ parts = os.path.split(path)
+ if parts[0] == path: # sentinel for absolute paths
+ allparts.insert(0, parts[0])
+ break
+ elif parts[1] == path: # sentinel for relative paths
+ allparts.insert(0, parts[1])
+ break
+ else:
+ path = parts[0]
+ allparts.insert(0, parts[1])
+
+ return allparts
+
+
+def get_data_path(abs=True):
+ """get data folder path
+
+ Returns:
+ [type] -- [description]
+ """
+ folder_name = 'data'
+ return resource_path(folder_name, level=AbsPath.SHOW) if abs else folder_name
+
+
+# TODO : delete
+def get_import_error_path(abs=True):
+ """get import folder path
+
+ Returns:
+ [type] -- [description]
+ """
+ folder_name = 'error'
+ return resource_path(folder_name, level=AbsPath.SHOW) if abs else folder_name
+
+
+def get_error_trace_path(abs=True):
+ """get import folder path
+
+ Returns:
+ [type] -- [description]
+ """
+ folder_name = ['error', 'trace']
+ return resource_path(*folder_name, level=AbsPath.SHOW) if abs else folder_name
+
+
+def get_error_import_path(abs=True):
+ """get import folder path
+
+ Returns:
+ [type] -- [description]
+ """
+ folder_name = ['error', 'import']
+ return resource_path(*folder_name, level=AbsPath.SHOW) if abs else folder_name
+
+
+def get_about_md_file():
+ """
+ get about markdown file path
+ """
+ folder_name = 'about'
+ file_name = 'Endroll.md'
+ return resource_path(folder_name, file_name, level=AbsPath.SHOW)
+
+
+def get_terms_of_use_md_file(current_locale):
+ """
+ get about markdown file path
+ """
+ folder_name = 'about'
+ if current_locale.language == 'ja':
+ file_name = 'terms_of_use_jp.md'
+ else:
+ file_name = 'terms_of_use_en.md'
+ return resource_path(folder_name, file_name, level=AbsPath.SHOW)
+
+
+def get_wrapr_path():
+ """get wrap r folder path
+
+ Returns:
+ [type] -- [description]
+ """
+ folder_names = ['histview2', 'script', 'r_scripts', 'wrapr']
+ return resource_path(*folder_names, level=AbsPath.HIDE)
+
+
+def get_temp_path():
+ """get temporaty folder path
+
+ Returns:
+ [type] -- [description]
+ """
+ folder_name = 'temp'
+ data_folder = get_data_path()
+ return resource_path(data_folder, folder_name, level=AbsPath.SHOW)
+
+
+def get_cache_path():
+ """get cache folder path
+
+ Returns:
+ [type] -- [description]
+ """
+ folder_name = 'cache'
+ data_folder = get_data_path()
+ return resource_path(data_folder, folder_name, level=AbsPath.SHOW)
+
+
+def get_export_path():
+ """get cache folder path
+
+ Returns:
+ [type] -- [description]
+ """
+ folder_name = 'export'
+ data_folder = get_data_path()
+ return resource_path(data_folder, folder_name, level=AbsPath.SHOW)
+
+
+def get_view_path():
+ """get view/image folder path
+
+ Returns:
+ [type] -- [description]
+ """
+ folder_name = 'view'
+ data_folder = get_data_path()
+ return resource_path(data_folder, folder_name, level=AbsPath.SHOW)
+
+
+def get_etl_path(*sub_paths):
+ """get etl output folder path
+
+ Returns:
+ [type] -- [description]
+ """
+ folder_name = 'etl'
+ data_folder = get_data_path()
+
+ return resource_path(data_folder, folder_name, *sub_paths, level=AbsPath.SHOW)
+
+
+def df_chunks(df, size):
+ """Yield n-sized chunks from dataframe."""
+ if df.columns.size == 0:
+ return df
+
+ for i in range(0, df.shape[0], size):
+ yield df.iloc[i:i + size]
+
+
+def chunks(lst, size):
+ """Yield n-sized chunks from lst."""
+ for i in range(0, len(lst), size):
+ yield lst[i:i + size]
+
+
+def chunks_dic(data, size):
+ it = iter(data)
+ for i in range(0, len(data), size):
+ yield {k: data[k] for k in islice(it, size)}
+
+
+def get_base_dir(path, is_file=True):
+ dir_name = os.path.dirname(path) if is_file else path
+ return os.path.basename(dir_name)
+
+
+def make_dir(dir_path):
+ if not os.path.exists(dir_path):
+ os.makedirs(dir_path)
+
+ return True
+
+
+def get_basename(path):
+ return os.path.basename(path)
+
+
+def get_datetime_without_timezone(time):
+ """ remove timezone string from time
+
+ Args:
+ time ([type]): [description]
+ """
+ regex_str = r"(\d{4}[-\/]\d{2}[-\/]\d{2}\s\d{2}:\d{2}:\d{2}(.\d{1,6})?)"
+ res = re.search(regex_str, time)
+ if res:
+ return convert_time(res.group())
+
+ return None
+
+
+def strip_quote_csv(instr):
+ return str(instr).strip("'").strip()
+
+
+def strip_all_quote(instr):
+ return str(instr).strip("'").strip('"')
+
+
+def strip_space(instr):
+ return str(instr).strip()
+
+
+def get_csv_delimiter(csv_delimiter):
+ """
+ return tab , comma depend on input data
+ :param csv_delimiter:
+ :return:
+ """
+ if csv_delimiter is None:
+ return CsvDelimiter.CSV.value
+
+ if isinstance(csv_delimiter, CsvDelimiter):
+ return csv_delimiter.value
+
+ return CsvDelimiter[csv_delimiter].value
+
+
+def sql_regexp(expr, item):
+ reg = re.compile(expr, re.I)
+ return reg.search(str(item)) is not None
+
+
+def set_sqlite_params(conn):
+ cursor = conn.cursor()
+ cursor.execute(f'PRAGMA journal_mode=WAL')
+ cursor.execute('PRAGMA synchronous=NORMAL')
+ cursor.execute('PRAGMA cache_size=10000')
+ cursor.execute('pragma mmap_size = 30000000000')
+ cursor.execute('PRAGMA temp_store=MEMORY')
+ cursor.close()
+
+
+def gen_sql_label(*args):
+ return SQL_COL_PREFIX + SQL_COL_PREFIX.join([str(name).strip(SQL_COL_PREFIX) for name in args if name is not None])
+
+
+def gen_sql_like_value(val, func: FilterFunc, position=None):
+ if func is FilterFunc.STARTSWITH:
+ return [val + '%']
+
+ if func is FilterFunc.ENDSWITH:
+ return ['%' + val]
+
+ if func is FilterFunc.CONTAINS:
+ return ['%' + val + '%']
+
+ if func is FilterFunc.SUBSTRING:
+ if position is None:
+ position = 1
+ return ['_' * max(0, (position - 1)) + val + '%']
+
+ if func is FilterFunc.AND_SEARCH:
+ conds = set(val.split())
+ cond_patterns = list(permutations(conds)) # temp solution, conditions are not so many
+ return ['%' + '%'.join(cond_pattern) + '%' for cond_pattern in cond_patterns]
+
+ if func is FilterFunc.OR_SEARCH:
+ return ['%' + cond + '%' for cond in val.split()]
+
+ return []
+
+
+def gen_python_regex(val, func: FilterFunc, position=None):
+ if func is FilterFunc.MATCHES:
+ return '^' + val + '$'
+
+ if func is FilterFunc.STARTSWITH:
+ return '^' + val
+
+ if func is FilterFunc.ENDSWITH:
+ return val + '$'
+
+ if func is FilterFunc.CONTAINS:
+ return '.*' + val + '.*'
+
+ if func is FilterFunc.SUBSTRING:
+ if position is None:
+ position = 1
+ return '^' + '.' * max(0, (position - 1)) + val
+ return val
+
+
+def make_dir_from_file_path(file_path):
+ dirname = os.path.dirname(file_path)
+ # make dir
+ if not os.path.exists(dirname):
+ os.makedirs(dirname)
+
+ return dirname
+
+
+def delete_file(file_path):
+ if os.path.exists(file_path):
+ os.remove(file_path)
+
+
+def rename_file(src, des):
+ if os.path.exists(src):
+ os.rename(src, des)
+
+
+def check_exist(file_path):
+ return os.path.exists(file_path)
+
+
+def any_not_none_in_dict(dict_input):
+ """
+ check any not None in a list of dictionary
+ :param dict_input: [{'a': None, 'b': None}, {'a': 1, 'b': 2}]
+ :return: boolean
+ """
+ return True in [any(k is not None for k in v.values()) for _, v in enumerate(dict_input)]
+
+
+def calc_overflow_boundary(arr, remove_non_real=False):
+ if len(arr):
+ q1 = np.quantile(arr, 0.25, interpolation='midpoint')
+ q3 = np.quantile(arr, 0.75, interpolation='midpoint')
+ iqr = q3 - q1
+ if iqr:
+ lower_boundary = q1 - 4.5 * iqr
+ upper_boundary = q3 + 4.5 * iqr
+ return lower_boundary, upper_boundary
+ return None, None
+
+
+def reformat_dt_str(start_time, dt_format=DATE_FORMAT_QUERY):
+ if not start_time:
+ return start_time
+ dt = parser.parse(start_time)
+ return dt.strftime(dt_format)
+
+
+def as_list(param):
+ if type(param) in [tuple, list, set]:
+ return list(param)
+ else:
+ return [param]
+
+
+def is_empty(v):
+ if not v and v != 0:
+ return True
+ return False
+
+
+def detect_file_encoding(file):
+ encoding = chardet.detect(file).get('encoding')
+ if encoding == ENCODING_ASCII:
+ encoding = ENCODING_UTF_8
+
+ return encoding
+
+
+def detect_encoding(f_name, read_line=10000):
+ with open(f_name, 'rb') as f:
+ if read_line:
+ data = f.read(read_line)
+ else:
+ data = f.read()
+
+ encoding = chardet.detect(data).get('encoding')
+ if encoding == ENCODING_ASCII:
+ encoding = ENCODING_UTF_8
+
+ return encoding
+
+
+def is_eof(f):
+ cur = f.tell() # save current position
+ f.seek(0, os.SEEK_END)
+ end = f.tell() # find the size of file
+ f.seek(cur, os.SEEK_SET)
+ return cur == end
+
+
+def replace_str_in_file(file_name, search_str, replace_to_str):
+ # get encoding
+ encoding = detect_encoding(file_name)
+ with open(file_name, encoding=encoding) as f:
+ replaced_text = f.read().replace(search_str, replace_to_str)
+
+ with open(file_name, "w", encoding=encoding) as f_out:
+ f_out.write(replaced_text)
+
+
+def get_file_modify_time(file_path):
+ file_time = datetime.utcfromtimestamp(os.path.getmtime(file_path))
+ return convert_time(file_time)
+
+
+def split_path_to_list(file_path):
+ folders = os.path.normpath(file_path).split(os.path.sep)
+ return folders
+
+
+def get_ip_address():
+ hostname = socket.gethostname()
+ ip_addr = socket.gethostbyname(hostname)
+
+ return ip_addr
+
+
+def gen_abbr_name(name, len_of_col_name=10):
+ suffix = '...'
+ short_name = name
+ if len(short_name) > len_of_col_name:
+ short_name = name[:len_of_col_name - len(suffix)] + suffix
+
+ return short_name
+
+
+# def remove_inf(series):
+# return series[~series.isin([float('inf'), float('-inf')])]
+
+def read_pickle_file(file):
+ with open(file, 'rb') as f:
+ pickle_data = pickle.load(f)
+ return pickle_data
+
+
+def write_to_pickle(data, file):
+ with open(file, 'wb') as f:
+ pickle.dump(data, f)
+ return file
+
+
+def get_debug_g_dict():
+ return g.setdefault(FlaskGKey.DEBUG_SHOW_GRAPH, {})
+
+
+def set_debug_data(func_name, data):
+ if not func_name:
+ return
+
+ g_debug = get_debug_g_dict()
+ g_debug[func_name] = data
+
+ return True
+
+
+def get_debug_data(key):
+ g_debug = get_debug_g_dict()
+ data = g_debug.get(key, None)
+ return data
+
+
+@log_execution_time()
+def zero_variance(df: DataFrame):
+ for col in df.columns:
+ variance = df[col].var()
+ if pd.isna(variance) or variance == 0:
+ return True
+ return False
+
+
+@log_execution_time()
+def find_babel_locale(lang):
+ if not lang:
+ return lang
+
+ lang = str(lang).lower()
+ lang = lang.replace('-', '_')
+ for _lang in LANGUAGES:
+ if lang == _lang.lower():
+ return _lang
+
+ return lang
diff --git a/histview2/common/constants.py b/histview2/common/constants.py
new file mode 100644
index 0000000..72403f9
--- /dev/null
+++ b/histview2/common/constants.py
@@ -0,0 +1,685 @@
+from enum import Enum, auto
+
+MATCHED_FILTER_IDS = 'matched_filter_ids'
+UNMATCHED_FILTER_IDS = 'unmatched_filter_ids'
+NOT_EXACT_MATCH_FILTER_IDS = 'not_exact_match_filter_ids'
+STRING_COL_IDS = 'string_col_ids'
+
+SQL_COL_PREFIX = '__'
+SQL_LIMIT = 5_000_000
+ACTUAL_RECORD_NUMBER = 'actual_record_number'
+ACTUAL_RECORD_NUMBER_TRAIN = 'actual_record_number_train'
+ACTUAL_RECORD_NUMBER_TEST = 'actual_record_number_test'
+REMOVED_OUTLIER_NAN_TRAIN = 'removed_outlier_nan_train'
+REMOVED_OUTLIER_NAN_TEST = 'removed_outlier_nan_test'
+
+YAML_CONFIG_BASIC = 'basic'
+YAML_CONFIG_DB = 'db'
+YAML_CONFIG_PROC = 'proc'
+YAML_CONFIG_HISTVIEW2 = 'histview2'
+YAML_CONFIG_VERSION = 'version'
+YAML_TILE_INTERFACE_DN7 = 'ti_dn7'
+YAML_TILE_INTERFACE_AP = 'ti_analysis_platform'
+TILE_RESOURCE_URL = '/histview2/tile_interface/resources/'
+DB_BACKUP_SUFFIX = '_old'
+DB_BACKUP_FOLDER = 'backup'
+IN_MODIFIED_DAYS = 30
+NORMAL_MODE_MAX_RECORD = 10000
+
+DEFAULT_WARNING_DISK_USAGE = 80
+DEFAULT_ERROR_DISK_USAGE = 90
+
+
+class FilterFunc(Enum):
+ MATCHES = auto()
+ ENDSWITH = auto()
+ STARTSWITH = auto()
+ CONTAINS = auto()
+ REGEX = auto()
+ SUBSTRING = auto()
+ OR_SEARCH = auto()
+ AND_SEARCH = auto()
+
+
+class CsvDelimiter(Enum):
+ CSV = ','
+ TSV = '\t'
+ DOT = '.'
+ SMC = ';'
+ Auto = None
+
+
+class DBType(Enum):
+ POSTGRESQL = 'postgresql'
+ MSSQLSERVER = 'mssqlserver'
+ SQLITE = 'sqlite'
+ ORACLE = 'oracle'
+ MYSQL = 'mysql'
+ CSV = 'csv'
+
+
+class ErrorMsg(Enum):
+ W_PCA_INTEGER = auto()
+ E_PCA_NON_NUMERIC = auto()
+
+ E_ALL_NA = auto()
+ E_ZERO_VARIANCE = auto()
+
+
+# YAML Keywords
+YAML_INFO = 'info'
+YAML_R_PATH = 'r-path'
+YAML_PROC = 'proc'
+YAML_SQL = 'sql'
+YAML_FROM = 'from'
+YAML_SELECT_OTHER_VALUES = 'select-other-values'
+YAML_MASTER_NAME = 'master-name'
+YAML_WHERE_OTHER_VALUES = 'where-other-values'
+YAML_FILTER_TIME = 'filter-time'
+YAML_FILTER_LINE_MACHINE_ID = 'filter-line-machine-id'
+YAML_MACHINE_ID = 'machine-id'
+YAML_DATE_COL = 'date-column'
+YAML_AUTO_INCREMENT_COL = 'auto_increment_column'
+YAML_SERIAL_COL = 'serial-column'
+YAML_SELECT_PREFIX = 'select-prefix'
+YAML_CHECKED_COLS = 'checked-columns'
+YAML_COL_NAMES = 'column-names'
+YAML_DATA_TYPES = 'data-types'
+YAML_ALIASES = 'alias-names'
+YAML_MASTER_NAMES = 'master-names'
+YAML_OPERATORS = 'operators'
+YAML_COEFS = 'coefs'
+YAML_TYPE = 'type'
+YAML_COL_NAME = 'column_name'
+YAML_ORIG_COL_NAME = 'column_name'
+YAML_VALUE_LIST = 'value_list'
+YAML_VALUE_MASTER = 'value_masters'
+YAML_SQL_STATEMENTS = 'sql_statements'
+YAML_TRACE = 'trace'
+YAML_TRACE_BACK = 'back'
+YAML_TRACE_FORWARD = 'forward'
+YAML_CHART_INFO = 'chart-info'
+YAML_DEFAULT = 'default'
+YAML_THRESH_H = 'thresh_high'
+YAML_THRESH_L = 'thresh_low'
+YAML_Y_MAX = 'y_max'
+YAML_Y_MIN = 'y_min'
+YAML_TRACE_SELF_COLS = 'self-alias-columns'
+YAML_TRACE_TARGET_COLS = 'target-orig-columns'
+YAML_TRACE_MATCH_SELF = 'self-substr'
+YAML_TRACE_MATCH_TARGET = 'target-substr'
+YAML_DB = 'db'
+YAML_UNIVERSAL_DB = 'universal_db'
+YAML_ETL_FUNC = 'etl_func'
+YAML_PROC_ID = 'proc_id'
+YAML_PASSWORD = 'password'
+YAML_HASHED = 'hashed'
+YAML_DELIMITER = 'delimiter'
+
+# JSON Keywords
+GET02_VALS_SELECT = 'GET02_VALS_SELECT'
+ARRAY_FORMVAL = 'ARRAY_FORMVAL'
+ARRAY_PLOTDATA = 'array_plotdata'
+SERIAL_DATA = 'serial_data'
+SERIAL_COLUMNS = 'serial_columns'
+COMMON_INFO = 'common_info'
+DATETIME_COL = 'datetime_col'
+CYCLE_IDS = 'cycle_ids'
+ARRAY_Y = 'array_y'
+ARRAY_Z = 'array_z'
+ORIG_ARRAY_Z = 'orig_array_z'
+ARRAY_Y_MIN = 'array_y_min'
+ARRAY_Y_MAX = 'array_y_max'
+ARRAY_Y_TYPE = 'array_y_type'
+IQR = 'iqr'
+ARRAY_X = 'array_x'
+Y_MAX = 'y-max'
+Y_MIN = 'y-min'
+Y_MAX_ORG = 'y_max_org'
+Y_MIN_ORG = 'y_min_org'
+TIME_RANGE = 'time_range'
+TOTAL = 'total'
+
+UNLINKED_IDXS = 'unlinked_idxs'
+NONE_IDXS = 'none_idxs'
+INF_IDXS = 'inf_idxs'
+NEG_INF_IDXS = 'neg_inf_idxs'
+UPPER_OUTLIER_IDXS = 'upper_outlier_idxs'
+LOWER_OUTLIER_IDXS = 'lower_outlier_idxs'
+
+SCALE_SETTING = 'scale_setting'
+SCALE_THRESHOLD = 'scale_threshold'
+SCALE_AUTO = 'scale_auto'
+SCALE_COMMON = 'scale_common'
+SCALE_FULL = 'scale_full'
+KDE_DATA = 'kde_data'
+SCALE_Y = 'scale_y'
+SCALE_X = 'scale_x'
+SCALE_COLOR = 'scale_color'
+
+CHART_INFOS = 'chart_infos'
+CHART_INFOS_ORG = 'chart_infos_org'
+COMMON = 'COMMON'
+SELECT_ALL = 'All'
+NO_FILTER = 'NO_FILTER'
+START_PROC = 'start_proc'
+START_DATE = 'START_DATE'
+START_TM = 'START_TIME'
+START_DT = 'start_dt'
+COND_PROCS = 'cond_procs'
+COND_PROC = 'cond_proc'
+END_PROC = 'end_proc'
+END_DATE = 'END_DATE'
+END_TM = 'END_TIME'
+END_DT = 'end_dt'
+IS_REMOVE_OUTLIER = 'isRemoveOutlier'
+TBLS = 'TBLS'
+FILTER_PARTNO = 'filter-partno'
+FILTER_MACHINE = 'machine_id'
+CATE_PROC = 'end_proc_cate'
+GET02_CATE_SELECT = 'GET02_CATE_SELECT'
+CATEGORY_DATA = 'category_data'
+CATE_PROCS = 'cate_procs'
+TIMES = 'times'
+TIME_NUMBERINGS = 'time_numberings'
+ELAPSED_TIME = 'elapsed_time'
+COLORS = 'colors'
+H_LABEL = 'h_label'
+V_LABEL = 'v_label'
+TIME_MIN = 'time_min'
+TIME_MAX = 'time_max'
+X_THRESHOLD = 'x_threshold'
+Y_THRESHOLD = 'y_threshold'
+X_SERIAL = 'x_serial'
+Y_SERIAL = 'y_serial'
+SORT_KEY = 'sort_key'
+
+IS_RES_LIMITED = 'is_res_limited'
+IS_RES_LIMITED_TRAIN = 'is_res_limited_train'
+IS_RES_LIMITED_TEST = 'is_res_limited_test'
+WITH_IMPORT_OPTIONS = 'with_import'
+GET_PARAM = 'get_param'
+PROCS = 'procs'
+CLIENT_TIMEZONE = 'client_timezone'
+DATA_SIZE = 'data_size'
+X_OPTION = 'xOption'
+SERIAL_PROCESS = 'serialProcess'
+SERIAL_COLUMN = 'serialColumn'
+SERIAL_ORDER = 'serialOrder'
+TEMP_X_OPTION = 'TermXOption'
+TEMP_SERIAL_PROCESS = 'TermSerialProcess'
+TEMP_SERIAL_COLUMN = 'TermSerialColumn'
+TEMP_SERIAL_ORDER = 'TermSerialOrder'
+THRESHOLD_BOX = 'thresholdBox'
+SCATTER_CONTOUR = 'scatter_contour'
+DF_ALL_PROCS = 'dfProcs'
+DF_ALL_COLUMNS = 'dfColumns'
+CHART_TYPE = 'chartType'
+
+# CATEGORICAL PLOT
+CATE_VARIABLE = 'categoryVariable'
+CATE_VALUE_MULTI = 'categoryValueMulti'
+PART_NO = 'PART_NO'
+MACHINE_ID = 'MACHINE_ID'
+COMPARE_TYPE = 'compareType'
+CATEGORICAL = 'var'
+TERM = 'term'
+RL_CATEGORY = 'category'
+RL_CYCLIC_TERM = 'cyclicTerm'
+RL_DIRECT_TERM = 'directTerm'
+TIME_CONDS = 'time_conds'
+CATE_CONDS = 'cate_conds'
+LINE_NO = 'LINE_NO'
+YAML_LINE_LIST = 'line-list'
+FILTER_OTHER = 'filter-other'
+THRESH_HIGH = 'thresh-high'
+THRESH_LOW = 'thresh-low'
+PRC_MAX = 'prc-max'
+PRC_MIN = 'prc-min'
+ACT_FROM = 'act-from'
+ACT_TO = 'act-to'
+CYCLIC_DIV_NUM = 'cyclicTermDivNum'
+CYCLIC_INTERVAL = 'cyclicTermInterval'
+CYCLIC_WINDOW_LEN = 'cyclicTermWindowLength'
+CYCLIC_TERMS = 'cyclic_terms'
+END_PROC_ID = 'end_proc_id'
+END_PROC_NAME = 'end_proc_name'
+END_COL_ID = 'end_col_id'
+END_COL_NAME = 'end_col_name'
+RANK_COL = 'before_rank_values'
+SUMMARIES = 'summaries'
+CAT_DISTRIBUTE = 'category_distributed'
+CAT_SUMMARY = 'cat_summary'
+N = 'n'
+N_PCTG = 'n_pctg'
+N_NA = 'n_na'
+N_NA_PCTG = 'n_na_pctg'
+N_TOTAL = 'n_total'
+UNIQUE_CATEGORIES = 'unique_categories'
+UNIQUE_DIV = 'unique_div'
+UNIQUE_COLOR = 'unique_color'
+IS_OVER_UNIQUE_LIMIT = 'isOverUniqueLimit'
+DIC_CAT_FILTERS = 'dic_cat_filters'
+TEMP_CAT_EXP = 'temp_cat_exp'
+TEMP_CAT_PROCS = 'temp_cat_procs'
+DIV_BY_DATA_NUM = 'dataNumber'
+DIV_BY_CAT = 'div'
+COLOR_VAR = 'colorVar'
+IS_DATA_LIMITED = 'isDataLimited'
+
+# Cat Expansion
+CAT_EXP_BOX = 'catExpBox'
+
+# Order columns
+INDEX_ORDER_COLS = 'indexOrderColumns'
+THIN_DATA_GROUP_COUNT = 'thinDataGroupCounts'
+
+# validate data flag
+IS_VALIDATE_DATA = 'isValidateData'
+# Substring column name in universal db
+SUB_STRING_COL_NAME = '{}_From_{}_To_{}'
+SUB_STRING_REGEX = r'^.+_From_(\d+)_To_(\d+)$'
+
+# heatmap
+HM_STEP = 'step'
+HM_MODE = 'mode'
+HM_FUNCTION_REAL = 'function_real'
+HM_FUNCTION_CATE = 'function_cate'
+HM_TRIM = 'remove_outlier'
+CELL_SUFFIX = '_cell'
+AGG_COL = 'agg_col'
+TIME_COL = 'time'
+
+
+class HMFunction(Enum):
+ max = auto()
+ min = auto()
+ mean = auto()
+ std = auto()
+ range = auto()
+ median = auto()
+ count = auto()
+ count_per_hour = auto()
+ count_per_min = auto()
+ first = auto()
+ time_per_count = auto()
+ iqr = auto()
+
+
+class RelationShip(Enum):
+ ONE = auto()
+ MANY = auto()
+
+
+class AbsPath(Enum):
+ SHOW = auto()
+ HIDE = auto()
+
+
+class DataType(Enum):
+ NULL = 0
+ INTEGER = 1
+ REAL = 2
+ TEXT = 3
+ DATETIME = 4
+
+
+class DataTypeEncode(Enum):
+ NULL = ''
+ INTEGER = 'Int'
+ REAL = 'Real'
+ TEXT = 'Str'
+ DATETIME = 'CT'
+
+
+class JobStatus(Enum):
+ def __str__(self):
+ return str(self.name)
+
+ PENDING = 0
+ PROCESSING = 1
+ DONE = 2
+ KILLED = 3
+ FAILED = 4
+ FATAL = 5 # error when insert to db commit, file lock v...v ( we need re-run these files on the next job)
+
+
+class Outliers(Enum):
+ NOT_OUTLIER = 0
+ IS_OUTLIER = 1
+
+
+class FlaskGKey(Enum):
+ TRACE_ERR = auto()
+ YAML_CONFIG = auto()
+ APP_DB_SESSION = auto()
+ DEBUG_SHOW_GRAPH = auto()
+ MEMOIZE = auto()
+
+
+class DebugKey(Enum):
+ IS_DEBUG_MODE = auto()
+ GET_DATA_FROM_DB = auto()
+
+
+class MemoizeKey(Enum):
+ STOP_USING_CACHE = auto()
+
+
+# error message for dangling jobs
+FORCED_TO_BE_FAILED = 'DANGLING JOB. FORCED_TO_BE_FAILED'
+DEFAULT_POLLING_FREQ = 180 # default is import every 3 minutes
+
+class CfgConstantType(Enum):
+ def __str__(self):
+ return str(self.name)
+
+ # CHECKED_COLUMN = 0 # TODO define value
+ # GUI_TYPE = 1
+ # FILTER_REGEX = 2
+ # PARTNO_LIKE = 3
+ POLLING_FREQUENCY = auto()
+ ETL_JSON = auto()
+ UI_ORDER = auto()
+ USE_OS_TIMEZONE = auto()
+ TS_CARD_ORDER = auto()
+ EFA_HEADER_EXISTS = auto()
+ DISK_USAGE_CONFIG = auto()
+
+
+# UI order types
+UI_ORDER_DB = 'tblDbConfig'
+UI_ORDER_PROC = 'tblProcConfig'
+
+# SQL
+SQL_PERCENT = '%'
+SQL_REGEX_PREFIX = 'RAINBOW7_REGEX:'
+SQL_REGEXP_FUNC = 'REGEXP'
+
+# DATA TRACE LOG CONST
+EXECTIME = 'ExecTime'
+INPUTDATA = 'InputData'
+# Measurement Protocol Server
+MPS = 'www.google-analytics.com'
+R_PORTABLE = 'R-Portable'
+R_LIB_VERSION = 'R_LIB_VERSION'
+
+# Message
+MSG_DB_CON_FAILED = 'Database connection failed! Please check your database connection information'
+
+# encoding
+ENCODING_SHIFT_JIS = 'cp932'
+ENCODING_UTF_8 = 'utf-8'
+ENCODING_UTF_8_BOM = 'utf-8-sig'
+ENCODING_ASCII = 'ascii'
+
+# Web socket
+SOCKETIO = 'socketio'
+PROC_LINK_DONE_PUBSUB = '/proc_link_done_pubsub'
+PROC_LINK_DONE_SUBSCRIBE = 'proc_link_subscribe'
+PROC_LINK_DONE_PUBLISH = 'proc_link_publish'
+SHUTDOWN_APP_DONE_PUBSUB = '/shutdown_app_done_pubsub'
+SHUTDOWN_APP_DONE_PUBLISH = 'shutdown_app_publish'
+BACKGROUND_JOB_PUBSUB = '/job'
+# JOB_STATUS_PUBLISH = 'job_status_publish'
+# JOB_INFO_PUBLISH = 'res_background_job'
+
+# Dictionary Key
+HAS_RECORD = 'has_record'
+
+# WRAPR keys
+WR_CTGY = 'ctgy'
+WR_HEAD = 'head'
+WR_RPLC = 'rplc'
+WR_VALUES = 'values'
+WR_HEADER_NAMES = 'header_name'
+WR_TYPES = 'types'
+# RIDGELINE
+RL_GROUPS = 'groups'
+RL_EMD = 'emd'
+RL_DATA = 'data'
+RL_RIDGELINES = 'ridgelines'
+RL_ARRAY_X = 'array_x'
+RL_CATE_NAME = 'cate_name'
+RL_PERIOD = 'TargetPeriod.from|TargetPeriod.to'
+RL_SENSOR_NAME = 'sensor_name'
+RL_PROC_NAME = 'proc_name'
+RL_KDE = 'kde_data'
+RL_DEN_VAL = 'kde'
+RL_ORG_DEN = 'origin_kde'
+RL_TRANS_VAL = 'transform_val'
+RL_TRANS_DEN = 'trans_kde'
+RL_XAXIS = 'rlp_xaxis'
+RL_YAXIS = 'rlp_yaxis'
+RL_HIST_LABELS = 'hist_labels'
+RL_HIST_COUNTS = 'hist_counts'
+RL_DATA_COUNTS = 'data_counts'
+RL_CATES = 'categories'
+
+# SkD
+SKD_TARGET_PROC_CLR = '#65c5f1'
+
+# tile interface
+TILE_INTERFACE = 'tile_interface'
+SECTIONS = 'sections'
+DN7_TILE = 'dn7'
+AP_TILE = 'analysis_platform'
+
+
+# actions
+class Action(Enum):
+ def __str__(self):
+ return str(self.name)
+
+ SHUTDOWN_APP = auto()
+
+
+class YType(Enum):
+ NORMAL = 0
+ INF = 1
+ NEG_INF = -1
+ NONE = 2
+ OUTLIER = 3
+ NEG_OUTLIER = -3
+ UNLINKED = -4
+
+
+class CfgFilterType(Enum):
+ def __str__(self):
+ return str(self.name)
+
+ LINE = auto()
+ MACHINE_ID = auto()
+ PART_NO = auto()
+ OTHER = auto()
+
+
+class ProcessCfgConst(Enum):
+ PROC_ID = 'id'
+ PROC_COLUMNS = 'columns'
+
+
+class EFAColumn(Enum):
+ def __str__(self):
+ return str(self.name)
+
+ Line = auto()
+ Process = auto()
+ Machine = auto()
+
+
+EFA_HEADER_FLAG = '1'
+
+
+class Operator(Enum):
+ def __str__(self):
+ return str(self.name)
+
+ PLUS = '+'
+ MINUS = '-'
+ PRODUCT = '*'
+ DEVIDE = '/'
+ REGEX = 'regex'
+
+
+class AggregateBy(Enum):
+ DAY = 'Day'
+ HOUR = 'Hour'
+
+
+# App Config keys
+SQLITE_CONFIG_DIR = 'SQLITE_CONFIG_DIR'
+PARTITION_NUMBER = 'PARTITION_NUMBER'
+UNIVERSAL_DB_FILE = 'UNIVERSAL_DB_FILE'
+APP_DB_FILE = 'APP_DB_FILE'
+TESTING = 'TESTING'
+
+DATA_TYPE_ERROR_MSG = 'Data Type Error'
+DATA_TYPE_DUPLICATE_MSG = 'Duplicate Record'
+
+AUTO_BACKUP = 'auto-backup-universal'
+
+
+class appENV(Enum):
+ PRODUCTION = 'prod'
+ DEVELOPMENT = 'dev'
+
+
+THIN_DATA_CHUNK = 4000
+THIN_DATA_COUNT = THIN_DATA_CHUNK * 3
+
+# variables correlation
+CORRS = 'corrs'
+CORR = 'corr'
+PCORR = 'pcorr'
+NTOTALS = 'ntotals'
+DUPLICATE_COUNT_COLUMN = '__DUPLICATE_COUNT__' # A column that store duplicate count of current data row
+
+# Heatmap
+MAX_TICKS = 8
+AGG_FUNC = 'agg_function'
+CATE_VAL = 'cate_value'
+END_COL = 'end_col'
+X_TICKTEXT = 'x_ticktext'
+X_TICKVAL = 'x_tickvals'
+Y_TICKTEXT = 'y_ticktext'
+Y_TICKVAL = 'y_tickvals'
+ACT_CELLS = 'actual_num_cell'
+
+OBJ_VAR = 'objectiveVar'
+
+CAT_TOTAL = 'cat_total'
+IS_CAT_LIMITED = 'is_cat_limited'
+MAX_CATEGORY_SHOW = 30
+
+# PCA
+SHORT_NAMES = 'short_names'
+DATAPOINT_INFO = 'data_point_info'
+PLOTLY_JSON = 'plotly_jsons'
+DIC_SENSOR_HEADER = 'dic_sensor_headers'
+
+
+# chart type
+class ChartType(Enum):
+ HEATMAP = 'heatmap'
+ SCATTER = 'scatter'
+ VIOLIN = 'violin'
+
+
+# Scp sub request params
+MATRIX_COL = 'colNumber'
+COLOR_ORDER = 'scpColorOrder'
+
+
+# COLOR ORDER
+class ColorOrder(Enum):
+ DATA = 1
+ TIME = 2
+ ELAPSED_TIME = 3
+
+
+# import export debug info
+DIC_FORM_NAME = 'dic_form'
+DF_NAME = 'df'
+CONFIG_DB_NAME = 'config_db'
+USER_SETTING_NAME = 'user_setting'
+USER_SETTING_VERSION = 0
+EN_DASH = '–'
+
+
+# Disk usage warning level
+class DiskUsageStatus(Enum):
+ Normal = 0
+ Warning = 1
+ Full = 2
+
+
+# debug mode
+IS_EXPORT_MODE = 'isExportMode'
+IS_IMPORT_MODE = 'isImportMode'
+
+# NA
+NA_STR = 'NA'
+
+# Recent
+VAR_TRACE_TIME = 'varTraceTime'
+TERM_TRACE_TIME = 'termTraceTime'
+CYCLIC_TRACE_TIME = 'cyclicTraceTime'
+TRACE_TIME = 'traceTime'
+
+# Limited graph flag
+IS_GRAPH_LIMITED = 'isGraphLimited'
+
+# language
+LANGUAGES = [
+ 'ja',
+ 'en',
+ 'it',
+ 'es',
+ 'vi',
+ 'pt',
+ 'hi',
+ 'th',
+ 'zh_CN',
+ 'zh_TW',
+ 'ar',
+ 'bg',
+ 'ca',
+ 'cs',
+ 'cy',
+ 'de',
+ 'el',
+ 'fa',
+ 'fi',
+ 'fr',
+ 'gd',
+ 'he',
+ 'hr',
+ 'hu',
+ 'id',
+ 'is',
+ 'km',
+ 'ko',
+ 'lb',
+ 'mi',
+ 'mk',
+ 'mn',
+ 'ms',
+ 'my',
+ 'ne',
+ 'nl',
+ 'no',
+ 'pa',
+ 'pl',
+ 'pt',
+ 'ro',
+ 'ru',
+ 'sd',
+ 'si',
+ 'sk',
+ 'sq',
+ 'sv',
+ 'te',
+ 'tl',
+ 'tr'
+]
diff --git a/histview2/common/cryptography_utils.py b/histview2/common/cryptography_utils.py
new file mode 100644
index 0000000..7453325
--- /dev/null
+++ b/histview2/common/cryptography_utils.py
@@ -0,0 +1,113 @@
+from cryptography.fernet import Fernet
+from histview2.common.constants import DBType, ENCODING_UTF_8
+import copy
+from histview2 import dic_config
+
+
+def generate_key():
+ """
+ Generate Fernet key for encrypting and decrypting
+ :return:
+ """
+ return Fernet.generate_key()
+
+
+def encode_db_secret_key():
+ DB_SECRET_KEY = dic_config['DB_SECRET_KEY']
+ return Fernet(str.encode(DB_SECRET_KEY))
+
+
+def encrypt(plain_text):
+ """
+ Encoding a text with a key using Fernet.
+ :param plain_text: str or bytes
+ :return: cipher_text: bytes
+ """
+ if plain_text is None:
+ return None
+
+ cipher_suite = encode_db_secret_key()
+ if type(plain_text) == bytes:
+ plain_text_bytes = plain_text
+ else:
+ plain_text_bytes = str.encode(plain_text)
+ cipher_text = cipher_suite.encrypt(plain_text_bytes)
+
+ return cipher_text
+
+
+def decrypt(cipher_text):
+ """
+ Decoding a text with a key using Fernet.
+ :param cipher_text:
+ :return: plain_text
+ """
+ cipher_suite = encode_db_secret_key()
+ cipher_text_bytes = str.encode(cipher_text)
+ plain_text = cipher_suite.decrypt(cipher_text_bytes)
+
+ return plain_text
+
+
+def decrypt_pwd(cipher_text):
+ """
+ Decoding a text with a key using Fernet.
+ :param cipher_text: str or bytes
+ :return: plain_text: str
+ """
+ if cipher_text is None:
+ return None
+
+ cipher_suite = encode_db_secret_key()
+ if type(cipher_text) == bytes:
+ cipher_text_bytes = cipher_text
+ else:
+ cipher_text_bytes = str.encode(cipher_text)
+
+ plain_text = cipher_suite.decrypt(cipher_text_bytes)
+
+ return plain_text.decode(ENCODING_UTF_8)
+
+
+def encrypt_db_password(dict_db_config):
+ """
+
+ :param dict_db_config: db config dict: {db: {db-name: {key:value}}}
+ :return: Hashed dict_db_config
+ """
+ dict_db_config_hashed = copy.deepcopy(dict_db_config)
+
+ if dict_db_config_hashed.get('db'):
+ for key in dict_db_config_hashed["db"]:
+ db_config = dict_db_config_hashed["db"][key]
+ if db_config and not db_config.get("hashed") \
+ and db_config.get("type") != DBType.SQLITE.value:
+ plain_password = db_config.get("password")
+ if plain_password:
+ hashed_password = encrypt(plain_password)
+ db_config["password"] = str(hashed_password, encoding="utf-8")
+ db_config["hashed"] = True
+
+ return dict_db_config_hashed
+
+
+def decrypt_db_password(dict_db_config):
+ """
+
+ :param dict_db_config: db config dict: {db: {db-name: {key:value}}}
+ :return: Unhashed dict_db_config
+ """
+ dict_db_config_unhashed = copy.deepcopy(dict_db_config)
+
+ if dict_db_config_unhashed and dict_db_config_unhashed.get('db'):
+ for key in dict_db_config_unhashed["db"]:
+ db_config = dict_db_config_unhashed["db"][key]
+ if db_config and db_config.get("hashed"):
+ hashed_password = db_config.get("password")
+ plain_password = b""
+ if hashed_password:
+ plain_password = decrypt(hashed_password)
+ db_config["password"] = str(plain_password, encoding="utf-8")
+ db_config["hashed"] = False
+
+ return dict_db_config_unhashed
diff --git a/histview2/common/disk_usage.py b/histview2/common/disk_usage.py
new file mode 100644
index 0000000..bb13be2
--- /dev/null
+++ b/histview2/common/disk_usage.py
@@ -0,0 +1,154 @@
+import shutil
+import json
+
+from histview2.common.memoize import memoize
+from histview2.common.common_utils import get_ip_address
+from histview2.setting_module.models import CfgConstant
+from histview2.common.constants import DiskUsageStatus
+from histview2.common.logger import log_execution_time
+
+
+class DiskUsageInterface:
+
+ @classmethod
+ def get_disk_usage(cls, path=None):
+ raise NotImplementedError()
+
+
+class MainDiskUsage(DiskUsageInterface):
+ """
+ Checks disk usage of disk/partition that main application was installed on.
+
+ """
+
+ @classmethod
+ def get_disk_usage(cls, path=None):
+ return shutil.disk_usage(path=path)
+
+
+@memoize(duration=5*60)
+def get_disk_usage_percent(path=None):
+ """
+ Gets disk usage information.
+
+ :param path: disk location
In case not pass this argument, it will be set './' as default location
+ :return: a tuple of (disk status, used percent)
+ """
+ if not path:
+ path = './' # as default dir
+
+ dict_status_measures = {CfgConstant.get_warning_disk_usage(): DiskUsageStatus.Warning,
+ CfgConstant.get_error_disk_usage(): DiskUsageStatus.Full}
+ dict_limit_capacity = {y: x for x, y in dict_status_measures.items() if x} # switch key value
+
+ status = DiskUsageStatus.Normal
+ used_percent = 0
+ rules = [MainDiskUsage]
+
+ for checker in rules:
+ usage = checker.get_disk_usage(path)
+ used_percent = round(usage.used / usage.total * 100)
+ for measure in sorted(dict_status_measures.keys()):
+ if measure:
+ if used_percent >= measure:
+ status = dict_status_measures.pop(measure)
+ if not dict_status_measures:
+ break
+
+ return status, used_percent, dict_limit_capacity
+
+
+def get_disk_capacity():
+ """
+ Get information of disk capacity on Bridge Station & Postgres DB
+
+ :return: DiskCapacityException object that include disk status, used percent and message if have.
+ """
+ disk_status, used_percent, dict_limit_capacity = get_disk_usage_percent()
+ print(f'Disk usage: {used_percent}% - {disk_status.name}')
+
+ message = ''
+ if disk_status == DiskUsageStatus.Full:
+ message = 'Data import has stopped because the hard disk capacity of `__SERVER_INFO__` has reached ' \
+ f'{dict_limit_capacity.get(DiskUsageStatus.Full)}%. ' \
+ 'Data import will restart when unnecessary data is deleted and the free space increases.'
+ elif disk_status == DiskUsageStatus.Warning:
+ message = 'Please delete unnecessary data because the capacity of the hard disk of `__SERVER_INFO__` has ' \
+ f'reached {dict_limit_capacity.get(DiskUsageStatus.Warning)}%.'
+
+ server_info = get_ip_address()
+ message = message.replace('__SERVER_INFO__', server_info)
+ return DiskCapacityException(disk_status, used_percent, server_info,
+ 'EdgeServer',
+ dict_limit_capacity.get(DiskUsageStatus.Warning),
+ dict_limit_capacity.get(DiskUsageStatus.Full), message)
+
+
+def get_disk_capacity_once(_job_id=None):
+ """
+ Get information of disk capacity on Bridge Station & Postgres DB and always return DiskCapacityException object
+
+ Attention: DO NOT USE ANYWHERE ELSE, this is only used in send_processing_info method to serve checking
+ disk capacity for each job.
+
+ :param _job_id: serve to check & run only once for each _job_id
+ :return: DiskCapacityException object
+ """
+ return get_disk_capacity()
+
+
+class DiskCapacityException(Exception):
+ """Exception raised for disk usage exceed the allowed limit.
+
+ Attributes:
+ disk_status -- status of disk usage
+ used_percent -- amount of used storage
+ server_info -- String of server information
+ server_type -- Type of server
+ warning_limit_percent -- limit level
+ error_limit_percent -- limit level
+ message -- explanation of the error
+ """
+
+ def __init__(self, disk_status, used_percent, server_info, server_type, warning_limit_percent, error_limit_percent,
+ message):
+ self.disk_status: DiskUsageStatus = disk_status
+ self.used_percent = used_percent
+ self.server_info = server_info
+ self.server_type = server_type
+ self.warning_limit_percent = warning_limit_percent
+ self.error_limit_percent = error_limit_percent
+ self.message = message
+ super().__init__(self.message)
+
+ def to_dict(self):
+ return {
+ 'disk_status': self.disk_status.name,
+ 'used_percent': self.used_percent,
+ 'server_info': self.server_info,
+ 'server_type': self.server_type,
+ 'warning_limit_percent': self.warning_limit_percent,
+ 'error_limit_percent': self.error_limit_percent,
+ 'message': self.message
+ }
+
+
+
+@log_execution_time()
+def get_disk_capacity_to_load_UI():
+ disk_capacity = {
+ 'EdgeServer': None,
+ }
+
+ # check Edge Server
+ edge_disk_capacity = get_disk_capacity()
+ disk_capacity['EdgeServer'] = edge_disk_capacity.to_dict()
+
+ return disk_capacity
+
+
+def add_disk_capacity_into_response(response, disk_capacity):
+ render_data = response.get_data()
+ script = f''
+ render_data = render_data.replace(bytes('