Added support for SQL diagnostic functionality (#297)

* Added support for SQL diagnostic functionality. * Added support for SQL diagnostic functionality. * fix * fix * add doc * add doc * fix * update requirements * Added support for SQL diagnostic functionality * Added support for SQL diagnostic functionality * Added support for SQL diagnostic functionality * Added support for SQL diagnostic functionality * Added support for SQL diagnostic functionality * Added support for SQL diagnostic functionality * Optimize gather tasks file * Added support for SQL diagnostic functionality * Added support for SQL diagnostic functionality
oceanbase · Jul 10, 2024 · 035ca56 · 035ca56
1 parent c686c0b
commit 035ca56
Show file tree

Hide file tree

Showing 66 changed files with 3,872 additions and 76 deletions.
diff --git a/.github/workflows/test_sql_rule.yml b/.github/workflows/test_sql_rule.yml
@@ -0,0 +1,30 @@
+name: Test Full Scan Rule
+
+on:
+  push:
+    branches: "*"
+  pull_request:
+    branches: "*"
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        fetch-depth: 0 # Fetch all history for proper version detection
+
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v3
+      with:
+        python-version: 3.8
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements3.txt
+
+    - name: Run tests
+      run: python -m unittest discover -s test/analyzer/sql -p 'test_*.py'
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 .idea/
+.vscode/
 venv/
 *.pyc
 *site-packages/

diff --git a/clean_all_result.sh b/clean_all_result.sh
@@ -1,4 +1,4 @@
-rm -rf ./gather_pack_*
-rm -rf ./analyze_pack_*
-rm -rf ./analyze_flt_result*
-rm -rf ./check_report
+rm -rf ./obdiag_gather_pack_*
+rm -rf ./obdiag_analyze_pack_*
+rm -rf ./obdiag_analyze_flt_result*
+rm -rf ./obdiag_check_report
diff --git a/common/ob_connector.py b/common/ob_connector.py
@@ -46,6 +46,16 @@ def init(self):
         except Exception as e:
             self.stdio.verbose(e)
 
+    def __enter__(self):
+        """Ensures the database connection is open upon entering the 'with' block."""
+        self._connect_to_db()
+        return self
+
+    def __exit__(self, exception_type, exception_value, traceback):
+        """Automatically closes the database connection when exiting the 'with' block."""
+        if self.connection:
+            self.connection.close()
+
     def _connect_db(self):
         try:
             self.conn = mysql.connect(
@@ -82,17 +92,28 @@ def execute_sql(self, sql):
         cursor.close()
         return ret
 
-    def execute_sql_return_columns_and_data(self, sql):
+    def execute_sql_return_columns_and_data(self, sql, params=None):
+        """
+        Executes an SQL query and returns column names and data.
+
+        :param sql: The SQL statement to execute, using %s as a placeholder for parameters.
+        :param parameters: A tuple or list of parameters to substitute into the SQL statement.
+        :return: A tuple containing a list of column names and a list of rows (each a tuple).
+        """
         if self.conn is None:
             self._connect_db()
         else:
             self.conn.ping(reconnect=True)
-        cursor = self.conn.cursor()
-        cursor.execute(sql)
-        column_names = [col[0] for col in cursor.description]
-        ret = cursor.fetchall()
-        cursor.close()
-        return column_names, ret
+
+        with self.conn.cursor() as cursor:
+            if params:
+                cursor.execute(sql, params)
+            else:
+                cursor.execute(sql)
+
+            column_names = [col[0] for col in cursor.description]
+            data = cursor.fetchall()
+        return column_names, data
 
     def execute_sql_return_cursor_dictionary(self, sql):
         if self.conn is None:

diff --git a/common/tool.py b/common/tool.py
@@ -49,7 +49,7 @@
 from datetime import timedelta
 from random import choice
 from io import BytesIO
-from copy import copy
+import copy
 from colorama import Fore, Style
 from ruamel.yaml import YAML
 from err import EC_SQL_EXECUTE_FAILED
@@ -1208,6 +1208,24 @@ def compare_versions_lower(v1, v2, stdio=None):
             return i < j
         return len(v1.split(".")) < len(v2.split("."))
 
+    @staticmethod
+    def mask_passwords(data):
+        # Make a deep copy of the data to avoid modifying the original
+        masked_data = copy.deepcopy(data)
+
+        if isinstance(masked_data, dict):
+            for key, value in masked_data.items():
+                if 'password' in key.lower():
+                    masked_data[key] = '*' * (len(value) if value else 1)
+                elif isinstance(value, (dict, list)):
+                    masked_data[key] = StringUtils.mask_passwords(value)
+        elif isinstance(masked_data, list):
+            for index, item in enumerate(masked_data):
+                if isinstance(item, (dict, list)):
+                    masked_data[index] = StringUtils.mask_passwords(item)
+
+        return masked_data
+
 
 class Cursor(SafeStdio):
 
@@ -1396,3 +1414,59 @@ def get_nodes_list(context, nodes, stdio=None):
                 return None
             return new_nodes
         return None
+
+
+class SQLUtil(object):
+    re_trace = re.compile(r'''\/\*.*trace_id((?!\/\*).)*rpc_id.*\*\/''', re.VERBOSE)
+    re_annotation = re.compile(r'''\/\*((?!\/\*).)*\*\/''', re.VERBOSE)
+    re_interval = re.compile(
+        r'''interval\s?(\?|\-?\d+)\s?(day|hour|minute|second|microsecond|week|month|quarter|year|second_microsecond|minute_microsecond|minute_second|hour_microsecond|hour_second|hour_minute|day_microsecond|day_second|day_minute|day_hour|year_month)''',
+        re.VERBOSE,
+    )
+    re_force_index = re.compile(r'''force[\s]index[\s][(]\w+[)]''', re.VERBOSE)
+    re_cast_1 = re.compile(r'''cast\(.*?\(.*?\)\)''', re.VERBOSE)
+    re_cast_2 = re.compile(r'''cast\(.*?\)''', re.VERBOSE)
+    re_now = re.compile(r'''now\(\)''', re.VERBOSE)
+
+    def remove_sql_text_affects_parser(self, sql):
+        sql = sql.lower().strip()
+        sql = self.remove_hint_and_annotate(sql)
+        sql = self.remove_force_index(sql)
+        sql = self.remove_now_in_insert(sql)
+        sql = self.remove_semicolon(sql)
+        return sql
+
+    def remove_hint_and_annotate(self, sql):
+        sql = sql.lower()
+        sql = re.sub(self.re_annotation, '', sql)
+        sql = re.sub(self.re_trace, '', sql)
+        return sql
+
+    def replace_interval_day(self, sql):
+        sql = sql.lower()
+        sql = re.sub(self.re_interval, '?', sql)
+        return sql
+
+    def remove_force_index(self, sql):
+        sql = sql.lower()
+        sql = re.sub(self.re_force_index, '', sql)
+        return sql
+
+    def remove_cast(self, sql):
+        sql = sql.lower()
+        sql = re.sub(self.re_cast_1, '?', sql)
+        sql = re.sub(self.re_cast_2, '?', sql)
+        return sql
+
+    def remove_now_in_insert(self, sql):
+        sql = sql.lower().lstrip()
+        if sql.startswith('insert'):
+            sql = re.sub(self.re_now, '?', sql)
+        return sql
+
+    def remove_semicolon(self, sql):
+        sql = sql.strip()
+        return sql[:-1] if sql[-1] == ';' else sql
+
+    def get_db_id(self, database_alias, user_id):
+        return database_alias + '-' + user_id
diff --git a/core.py b/core.py
@@ -32,6 +32,8 @@
 from err import CheckStatus, SUG_SSH_FAILED
 from handler.analyzer.analyze_flt_trace import AnalyzeFltTraceHandler
 from handler.analyzer.analyze_log import AnalyzeLogHandler
+from handler.analyzer.analyze_sql import AnalyzeSQLHandler
+from handler.analyzer.analyze_sql_review import AnalyzeSQLReviewHandler
 from handler.analyzer.analyze_parameter import AnalyzeParameterHandler
 from handler.analyzer.analyze_variable import AnalyzeVariableHandler
 from handler.checker.check_handler import CheckHandler
@@ -281,6 +283,13 @@ def analyze_fuction(self, function_type, opt):
                 self.set_context(function_type, 'analyze', config)
                 handler = AnalyzeFltTraceHandler(self.context)
                 handler.handle()
+            elif function_type == 'analyze_sql':
+                self.set_context(function_type, 'analyze', config)
+                handler = AnalyzeSQLHandler(self.context)
+                handler.handle()
+            elif function_type == 'analyze_sql_review':
+                self.set_context(function_type, 'analyze', config)
+                handler = AnalyzeSQLReviewHandler(self.context)
             elif function_type == 'analyze_parameter_non_default':
                 self.set_context(function_type, 'analyze', config)
                 handler = AnalyzeParameterHandler(self.context, 'non_default')

diff --git a/diag_cmd.py b/diag_cmd.py
@@ -713,6 +713,57 @@ def _do_command(self, obdiag):
         return obdiag.analyze_fuction('analyze_parameter_non_default', self.opts)
 
 
+class ObdiagAnalyzeSQLCommand(ObdiagOriginCommand):
+
+    def __init__(self):
+        super(ObdiagAnalyzeSQLCommand, self).__init__('sql', 'Analyze oceanbase sql from sql_audit ')
+        self.parser.add_option('--tenant_name', type='string', help="tenant name")
+        self.parser.add_option('--host', type='string', help="tenant connection host")
+        self.parser.add_option('--port', type='string', help="tenant connection port")
+        self.parser.add_option('--password', type='string', help="tenant connection user password", default='')
+        self.parser.add_option('--user', type='string', help="tenant connection user name")
+        self.parser.add_option('--from', type='string', help="specify the start of the time range. format: 'yyyy-mm-dd hh:mm:ss'")
+        self.parser.add_option('--to', type='string', help="specify the end of the time range. format: 'yyyy-mm-dd hh:mm:ss'")
+        self.parser.add_option('--since', type='string', help="Specify time range that from 'n' [d]ays, 'n' [h]ours or 'n' [m]inutes. before to now. format: <n> <m|h|d>. example: 1h.", default='30m')
+        self.parser.add_option('--level', type='string', help="The alarm level, optional parameters [critical, warn, notice, ok]", default='notice')
+        self.parser.add_option('--output', type='string', help="The format of the output results, choices=[json, html]", default='html')
+        self.parser.add_option('--limit', type='string', help="The limit on the number of data rows returned by sql_audit for the tenant.", default=2000)
+        self.parser.add_option('--store_dir', type='string', help='the dir to store result, current dir by default.', default='./obdiag_analyze/')
+        self.parser.add_option('--elapsed_time', type='string', help='The minimum threshold for filtering execution time, measured in microseconds.', default=100000)
+        self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml'))
+
+    def init(self, cmd, args):
+        super(ObdiagAnalyzeSQLCommand, self).init(cmd, args)
+        self.parser.set_usage('%s [options]' % self.prev_cmd)
+        return self
+
+    def _do_command(self, obdiag):
+        return obdiag.analyze_fuction('analyze_sql', self.opts)
+
+
+class ObdiagAnalyzeSQLReviewCommand(ObdiagOriginCommand):
+
+    def __init__(self):
+        super(ObdiagAnalyzeSQLReviewCommand, self).__init__('sql_review', 'Analyze oceanbase sql from sql_audit ')
+        self.parser.add_option('--host', type='string', help="tenant connection host")
+        self.parser.add_option('--port', type='string', help="tenant connection port")
+        self.parser.add_option('--password', type='string', help="tenant connection user password", default='')
+        self.parser.add_option('--user', type='string', help="tenant connection user name")
+        self.parser.add_option('--files', type='string', action="append", help="specify files")
+        self.parser.add_option('--level', type='string', help="The alarm level, optional parameters [critical, warn, notice, ok]", default='notice')
+        self.parser.add_option('--output', type='string', help="The format of the output results, choices=[json, html]", default='html')
+        self.parser.add_option('--store_dir', type='string', help='the dir to store result, current dir by default.', default='./obdiag_analyze/')
+        self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml'))
+
+    def init(self, cmd, args):
+        super(ObdiagAnalyzeSQLReviewCommand, self).init(cmd, args)
+        self.parser.set_usage('%s [options]' % self.prev_cmd)
+        return self
+
+    def _do_command(self, obdiag):
+        return obdiag.analyze_fuction('analyze_sql_review', self.opts)
+
+
 class ObdiagAnalyzeParameterCommand(MajorCommand):
     def __init__(self):
         super(ObdiagAnalyzeParameterCommand, self).__init__('parameter', 'Analyze oceanbase parameters info')
@@ -863,6 +914,8 @@ def __init__(self):
         super(ObdiagAnalyzeCommand, self).__init__('analyze', 'Analyze oceanbase diagnostic info')
         self.register_command(ObdiagAnalyzeLogCommand())
         self.register_command(ObdiagAnalyzeFltTraceCommand())
+        self.register_command(ObdiagAnalyzeSQLCommand())
+        self.register_command(ObdiagAnalyzeSQLReviewCommand())
         self.register_command(ObdiagAnalyzeParameterCommand())
         self.register_command(ObdiagAnalyzeVariableCommand())
 

diff --git a/docs/analyze_sql.md b/docs/analyze_sql.md
@@ -0,0 +1,31 @@
+## analyze sql
+
+```bash
+$ obdiag analyze sql [options]
+
+Options:
+  --host=HOST           tenant connection host
+  --port=PORT           tenant connection port
+  --password=PASSWORD   tenant connection user password
+  --user=USER           tenant connection user name
+  --from=FROM           specify the start of the time range. format: 'yyyy-mm-
+                        dd hh:mm:ss'
+  --to=TO               specify the end of the time range. format: 'yyyy-mm-dd
+                        hh:mm:ss'
+  --since=SINCE         Specify time range that from 'n' [d]ays, 'n' [h]ours
+                        or 'n' [m]inutes. before to now. format: <n> <m|h|d>.
+                        example: 1h.
+  --level=LEVEL         The alarm level, optional parameters [critical, warn,
+                        notice, ok]
+  --output=OUTPUT       The format of the output results, choices=[json, html]
+  --limit=LIMIT         The limit on the number of data rows returned by
+                        sql_audit for the tenant.
+  --store_dir=STORE_DIR
+                        the dir to store result, current dir by default.
+  --elapsed_time=ELAPSED_TIME
+                        The minimum threshold for filtering execution time,
+                        measured in microseconds.
+  -c C                  obdiag custom config
+  -h, --help            Show help and exit.
+  -v, --verbose         Activate verbose output.
+```
diff --git a/docs/analyze_sql_review.md b/docs/analyze_sql_review.md
@@ -0,0 +1,20 @@
+## analyze sql_review
+
+```bash
+$ obdiag analyze sql_review [options]
+
+Options:
+  --host=HOST           tenant connection host
+  --port=PORT           tenant connection port
+  --password=PASSWORD   tenant connection user password
+  --user=USER           tenant connection user name
+  --files=FILES         specify files
+  --level=LEVEL         The alarm level, optional parameters [critical, warn,
+                        notice, ok]
+  --output=OUTPUT       The format of the output results, choices=[json, html]
+  --store_dir=STORE_DIR
+                        the dir to store result, current dir by default.
+  -c C                  obdiag custom config
+  -h, --help            Show help and exit.
+  -v, --verbose         Activate verbose output.
+```