diff --git a/.github/workflows/test_sql_rule.yml b/.github/workflows/test_sql_rule.yml new file mode 100644 index 00000000..e02a7193 --- /dev/null +++ b/.github/workflows/test_sql_rule.yml @@ -0,0 +1,30 @@ +name: Test Full Scan Rule + +on: + push: + branches: "*" + pull_request: + branches: "*" + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch all history for proper version detection + + - name: Set up Python 3.8 + uses: actions/setup-python@v3 + with: + python-version: 3.8 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements3.txt + + - name: Run tests + run: python -m unittest discover -s test/analyzer/sql -p 'test_*.py' \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3ad42e75..990bea80 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .idea/ +.vscode/ venv/ *.pyc *site-packages/ diff --git a/clean_all_result.sh b/clean_all_result.sh index cfc510d9..bf708de6 100755 --- a/clean_all_result.sh +++ b/clean_all_result.sh @@ -1,4 +1,4 @@ -rm -rf ./gather_pack_* -rm -rf ./analyze_pack_* -rm -rf ./analyze_flt_result* -rm -rf ./check_report \ No newline at end of file +rm -rf ./obdiag_gather_pack_* +rm -rf ./obdiag_analyze_pack_* +rm -rf ./obdiag_analyze_flt_result* +rm -rf ./obdiag_check_report \ No newline at end of file diff --git a/common/ob_connector.py b/common/ob_connector.py index 1ba0259c..961044e6 100644 --- a/common/ob_connector.py +++ b/common/ob_connector.py @@ -46,6 +46,16 @@ def init(self): except Exception as e: self.stdio.verbose(e) + def __enter__(self): + """Ensures the database connection is open upon entering the 'with' block.""" + self._connect_to_db() + return self + + def __exit__(self, exception_type, exception_value, traceback): + """Automatically closes the database connection when exiting the 'with' block.""" + if self.connection: + self.connection.close() + def _connect_db(self): try: self.conn = mysql.connect( @@ -82,17 +92,28 @@ def execute_sql(self, sql): cursor.close() return ret - def execute_sql_return_columns_and_data(self, sql): + def execute_sql_return_columns_and_data(self, sql, params=None): + """ + Executes an SQL query and returns column names and data. + + :param sql: The SQL statement to execute, using %s as a placeholder for parameters. + :param parameters: A tuple or list of parameters to substitute into the SQL statement. + :return: A tuple containing a list of column names and a list of rows (each a tuple). + """ if self.conn is None: self._connect_db() else: self.conn.ping(reconnect=True) - cursor = self.conn.cursor() - cursor.execute(sql) - column_names = [col[0] for col in cursor.description] - ret = cursor.fetchall() - cursor.close() - return column_names, ret + + with self.conn.cursor() as cursor: + if params: + cursor.execute(sql, params) + else: + cursor.execute(sql) + + column_names = [col[0] for col in cursor.description] + data = cursor.fetchall() + return column_names, data def execute_sql_return_cursor_dictionary(self, sql): if self.conn is None: diff --git a/common/tool.py b/common/tool.py index 96629e84..0e9921a2 100644 --- a/common/tool.py +++ b/common/tool.py @@ -49,7 +49,7 @@ from datetime import timedelta from random import choice from io import BytesIO -from copy import copy +import copy from colorama import Fore, Style from ruamel.yaml import YAML from err import EC_SQL_EXECUTE_FAILED @@ -1208,6 +1208,24 @@ def compare_versions_lower(v1, v2, stdio=None): return i < j return len(v1.split(".")) < len(v2.split(".")) + @staticmethod + def mask_passwords(data): + # Make a deep copy of the data to avoid modifying the original + masked_data = copy.deepcopy(data) + + if isinstance(masked_data, dict): + for key, value in masked_data.items(): + if 'password' in key.lower(): + masked_data[key] = '*' * (len(value) if value else 1) + elif isinstance(value, (dict, list)): + masked_data[key] = StringUtils.mask_passwords(value) + elif isinstance(masked_data, list): + for index, item in enumerate(masked_data): + if isinstance(item, (dict, list)): + masked_data[index] = StringUtils.mask_passwords(item) + + return masked_data + class Cursor(SafeStdio): @@ -1396,3 +1414,59 @@ def get_nodes_list(context, nodes, stdio=None): return None return new_nodes return None + + +class SQLUtil(object): + re_trace = re.compile(r'''\/\*.*trace_id((?!\/\*).)*rpc_id.*\*\/''', re.VERBOSE) + re_annotation = re.compile(r'''\/\*((?!\/\*).)*\*\/''', re.VERBOSE) + re_interval = re.compile( + r'''interval\s?(\?|\-?\d+)\s?(day|hour|minute|second|microsecond|week|month|quarter|year|second_microsecond|minute_microsecond|minute_second|hour_microsecond|hour_second|hour_minute|day_microsecond|day_second|day_minute|day_hour|year_month)''', + re.VERBOSE, + ) + re_force_index = re.compile(r'''force[\s]index[\s][(]\w+[)]''', re.VERBOSE) + re_cast_1 = re.compile(r'''cast\(.*?\(.*?\)\)''', re.VERBOSE) + re_cast_2 = re.compile(r'''cast\(.*?\)''', re.VERBOSE) + re_now = re.compile(r'''now\(\)''', re.VERBOSE) + + def remove_sql_text_affects_parser(self, sql): + sql = sql.lower().strip() + sql = self.remove_hint_and_annotate(sql) + sql = self.remove_force_index(sql) + sql = self.remove_now_in_insert(sql) + sql = self.remove_semicolon(sql) + return sql + + def remove_hint_and_annotate(self, sql): + sql = sql.lower() + sql = re.sub(self.re_annotation, '', sql) + sql = re.sub(self.re_trace, '', sql) + return sql + + def replace_interval_day(self, sql): + sql = sql.lower() + sql = re.sub(self.re_interval, '?', sql) + return sql + + def remove_force_index(self, sql): + sql = sql.lower() + sql = re.sub(self.re_force_index, '', sql) + return sql + + def remove_cast(self, sql): + sql = sql.lower() + sql = re.sub(self.re_cast_1, '?', sql) + sql = re.sub(self.re_cast_2, '?', sql) + return sql + + def remove_now_in_insert(self, sql): + sql = sql.lower().lstrip() + if sql.startswith('insert'): + sql = re.sub(self.re_now, '?', sql) + return sql + + def remove_semicolon(self, sql): + sql = sql.strip() + return sql[:-1] if sql[-1] == ';' else sql + + def get_db_id(self, database_alias, user_id): + return database_alias + '-' + user_id diff --git a/core.py b/core.py index 47eece9d..27297831 100644 --- a/core.py +++ b/core.py @@ -32,6 +32,8 @@ from err import CheckStatus, SUG_SSH_FAILED from handler.analyzer.analyze_flt_trace import AnalyzeFltTraceHandler from handler.analyzer.analyze_log import AnalyzeLogHandler +from handler.analyzer.analyze_sql import AnalyzeSQLHandler +from handler.analyzer.analyze_sql_review import AnalyzeSQLReviewHandler from handler.analyzer.analyze_parameter import AnalyzeParameterHandler from handler.analyzer.analyze_variable import AnalyzeVariableHandler from handler.checker.check_handler import CheckHandler @@ -281,6 +283,13 @@ def analyze_fuction(self, function_type, opt): self.set_context(function_type, 'analyze', config) handler = AnalyzeFltTraceHandler(self.context) handler.handle() + elif function_type == 'analyze_sql': + self.set_context(function_type, 'analyze', config) + handler = AnalyzeSQLHandler(self.context) + handler.handle() + elif function_type == 'analyze_sql_review': + self.set_context(function_type, 'analyze', config) + handler = AnalyzeSQLReviewHandler(self.context) elif function_type == 'analyze_parameter_non_default': self.set_context(function_type, 'analyze', config) handler = AnalyzeParameterHandler(self.context, 'non_default') diff --git a/diag_cmd.py b/diag_cmd.py index 27b5e167..49b31fb1 100644 --- a/diag_cmd.py +++ b/diag_cmd.py @@ -713,6 +713,57 @@ def _do_command(self, obdiag): return obdiag.analyze_fuction('analyze_parameter_non_default', self.opts) +class ObdiagAnalyzeSQLCommand(ObdiagOriginCommand): + + def __init__(self): + super(ObdiagAnalyzeSQLCommand, self).__init__('sql', 'Analyze oceanbase sql from sql_audit ') + self.parser.add_option('--tenant_name', type='string', help="tenant name") + self.parser.add_option('--host', type='string', help="tenant connection host") + self.parser.add_option('--port', type='string', help="tenant connection port") + self.parser.add_option('--password', type='string', help="tenant connection user password", default='') + self.parser.add_option('--user', type='string', help="tenant connection user name") + self.parser.add_option('--from', type='string', help="specify the start of the time range. format: 'yyyy-mm-dd hh:mm:ss'") + self.parser.add_option('--to', type='string', help="specify the end of the time range. format: 'yyyy-mm-dd hh:mm:ss'") + self.parser.add_option('--since', type='string', help="Specify time range that from 'n' [d]ays, 'n' [h]ours or 'n' [m]inutes. before to now. format: . example: 1h.", default='30m') + self.parser.add_option('--level', type='string', help="The alarm level, optional parameters [critical, warn, notice, ok]", default='notice') + self.parser.add_option('--output', type='string', help="The format of the output results, choices=[json, html]", default='html') + self.parser.add_option('--limit', type='string', help="The limit on the number of data rows returned by sql_audit for the tenant.", default=2000) + self.parser.add_option('--store_dir', type='string', help='the dir to store result, current dir by default.', default='./obdiag_analyze/') + self.parser.add_option('--elapsed_time', type='string', help='The minimum threshold for filtering execution time, measured in microseconds.', default=100000) + self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + + def init(self, cmd, args): + super(ObdiagAnalyzeSQLCommand, self).init(cmd, args) + self.parser.set_usage('%s [options]' % self.prev_cmd) + return self + + def _do_command(self, obdiag): + return obdiag.analyze_fuction('analyze_sql', self.opts) + + +class ObdiagAnalyzeSQLReviewCommand(ObdiagOriginCommand): + + def __init__(self): + super(ObdiagAnalyzeSQLReviewCommand, self).__init__('sql_review', 'Analyze oceanbase sql from sql_audit ') + self.parser.add_option('--host', type='string', help="tenant connection host") + self.parser.add_option('--port', type='string', help="tenant connection port") + self.parser.add_option('--password', type='string', help="tenant connection user password", default='') + self.parser.add_option('--user', type='string', help="tenant connection user name") + self.parser.add_option('--files', type='string', action="append", help="specify files") + self.parser.add_option('--level', type='string', help="The alarm level, optional parameters [critical, warn, notice, ok]", default='notice') + self.parser.add_option('--output', type='string', help="The format of the output results, choices=[json, html]", default='html') + self.parser.add_option('--store_dir', type='string', help='the dir to store result, current dir by default.', default='./obdiag_analyze/') + self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml')) + + def init(self, cmd, args): + super(ObdiagAnalyzeSQLReviewCommand, self).init(cmd, args) + self.parser.set_usage('%s [options]' % self.prev_cmd) + return self + + def _do_command(self, obdiag): + return obdiag.analyze_fuction('analyze_sql_review', self.opts) + + class ObdiagAnalyzeParameterCommand(MajorCommand): def __init__(self): super(ObdiagAnalyzeParameterCommand, self).__init__('parameter', 'Analyze oceanbase parameters info') @@ -863,6 +914,8 @@ def __init__(self): super(ObdiagAnalyzeCommand, self).__init__('analyze', 'Analyze oceanbase diagnostic info') self.register_command(ObdiagAnalyzeLogCommand()) self.register_command(ObdiagAnalyzeFltTraceCommand()) + self.register_command(ObdiagAnalyzeSQLCommand()) + self.register_command(ObdiagAnalyzeSQLReviewCommand()) self.register_command(ObdiagAnalyzeParameterCommand()) self.register_command(ObdiagAnalyzeVariableCommand()) diff --git a/docs/analyze_sql.md b/docs/analyze_sql.md new file mode 100644 index 00000000..e4d5a101 --- /dev/null +++ b/docs/analyze_sql.md @@ -0,0 +1,31 @@ +## analyze sql + +```bash +$ obdiag analyze sql [options] + +Options: + --host=HOST tenant connection host + --port=PORT tenant connection port + --password=PASSWORD tenant connection user password + --user=USER tenant connection user name + --from=FROM specify the start of the time range. format: 'yyyy-mm- + dd hh:mm:ss' + --to=TO specify the end of the time range. format: 'yyyy-mm-dd + hh:mm:ss' + --since=SINCE Specify time range that from 'n' [d]ays, 'n' [h]ours + or 'n' [m]inutes. before to now. format: . + example: 1h. + --level=LEVEL The alarm level, optional parameters [critical, warn, + notice, ok] + --output=OUTPUT The format of the output results, choices=[json, html] + --limit=LIMIT The limit on the number of data rows returned by + sql_audit for the tenant. + --store_dir=STORE_DIR + the dir to store result, current dir by default. + --elapsed_time=ELAPSED_TIME + The minimum threshold for filtering execution time, + measured in microseconds. + -c C obdiag custom config + -h, --help Show help and exit. + -v, --verbose Activate verbose output. +``` diff --git a/docs/analyze_sql_review.md b/docs/analyze_sql_review.md new file mode 100644 index 00000000..4b560960 --- /dev/null +++ b/docs/analyze_sql_review.md @@ -0,0 +1,20 @@ +## analyze sql_review + +```bash +$ obdiag analyze sql_review [options] + +Options: + --host=HOST tenant connection host + --port=PORT tenant connection port + --password=PASSWORD tenant connection user password + --user=USER tenant connection user name + --files=FILES specify files + --level=LEVEL The alarm level, optional parameters [critical, warn, + notice, ok] + --output=OUTPUT The format of the output results, choices=[json, html] + --store_dir=STORE_DIR + the dir to store result, current dir by default. + -c C obdiag custom config + -h, --help Show help and exit. + -v, --verbose Activate verbose output. +``` diff --git a/handler/analyzer/analyze_sql.py b/handler/analyzer/analyze_sql.py new file mode 100644 index 00000000..d54168e3 --- /dev/null +++ b/handler/analyzer/analyze_sql.py @@ -0,0 +1,423 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: analyze_sql.py +@desc: +""" +import datetime +import time +import os +from tabulate import tabulate +from common.constant import const +from common.tool import StringUtils, Util +from common.tool import TimeUtils +from common.ob_connector import OBConnector +from handler.meta.sql_meta import GlobalSqlMeta +from handler.meta.html_meta import GlobalHtmlMeta +from common.tool import FileUtil +from handler.analyzer.sql.rule_manager import SQLReviewRuleManager +from handler.analyzer.sql.meta.sys_tenant_meta import SysTenantMeta +from handler.gather.gather_scenes import GatherSceneHandler +from common.command import get_observer_version + + +class AnalyzeSQLHandler(object): + def __init__(self, context): + super(AnalyzeSQLHandler, self).__init__() + self.context = context + self.stdio = context.stdio + self.from_time_str = None + self.to_time_str = None + self.from_timestamp = None + self.to_timestamp = None + self.config_path = const.DEFAULT_CONFIG_PATH + self.db_connector_provided = False + self.tenant_name = 'sys' + self.db_user = None + self.local_stored_parrent_path = os.path.abspath('./obdiag_analyze/') + self.sql_audit_limit = 2000 + self.elapsed_time = 100000 + self.output_type = 'html' + self.level = 'notice' + self.ob_version = '4.0.0.0' + self.sql_audit_keys = [ + 'svrIp', + 'svrPort', + 'requestId', + 'clientIp', + 'tenantName', + 'tenantId', + 'dbName', + 'dbId', + 'querySql', + 'planId', + 'sqlId', + 'traceId', + 'requestTime', + 'returnRows', + 'affectedRows', + 'partitionCount', + 'retCode', + 'event0WaitTimeUs', + 'event1WaitTimeUs', + 'event2WaitTimeUs', + 'event3WaitTimeUs', + 'totalWaitTimeMicro', + 'totalWaits', + 'rpcCount', + 'planType', + 'isInnerSql', + 'isExecutorRpc', + 'isHitPlan', + 'elapsedTime', + 'cpuTime', + 'netTime', + 'netWaitTime', + 'queueTime', + 'decodeTime', + 'getPlanTime', + 'executeTime', + 'applicationWaitTime', + 'concurrencyWaitTime', + 'userIoWaitTime', + 'scheduleTime', + 'rowCacheHit', + 'bloomFilterCacheHit', + 'blockCacheHit', + 'blockIndexCacheHit', + 'diskReads', + 'retryCount', + 'tableScan', + 'consistencyLevel', + 'memstoreReadRowCount', + 'ssstoreReadRowCount', + 'planCachePlanExplain', + ] + + def init_inner_config(self): + self.stdio.print('init inner config start') + self.inner_config = self.context.inner_config + self.stdio.verbose('inner config: {0}'.format(self.inner_config)) + basic_config = self.inner_config['obdiag']['basic'] + self.config_path = basic_config['config_path'] + self.stdio.print('init inner config complete') + return True + + def init_config(self): + self.stdio.print('init cluster config start') + ob_cluster = self.context.cluster_config + self.stdio.verbose('cluster config: {0}'.format(StringUtils.mask_passwords(ob_cluster))) + self.ob_cluster = ob_cluster + self.sys_connector = OBConnector(ip=ob_cluster.get("db_host"), port=ob_cluster.get("db_port"), username=ob_cluster.get("tenant_sys").get("user"), password=ob_cluster.get("tenant_sys").get("password"), stdio=self.stdio, timeout=100) + self.ob_cluster_name = ob_cluster.get("ob_cluster_name") + self.stdio.print('init cluster config complete') + return True + + def init_ob_version(self): + self.stdio.print('get observer version start') + self.ob_version = get_observer_version(self.context) + self.stdio.print('get observer version complete, version:{0}'.format(self.ob_version)) + return True + + def init_db_connector(self): + if self.db_user: + self.db_connector_provided = True + self.db_connector = OBConnector(ip=self.ob_cluster.get("db_host"), port=self.ob_cluster.get("db_port"), username=self.db_user, password=self.db_password, stdio=self.stdio, timeout=100) + else: + self.db_connector = self.sys_connector + + def init_option(self): + self.stdio.print('init option start') + options = self.context.options + self.stdio.verbose('options:[{0}]'.format(options)) + from_option = Util.get_option(options, 'from') + to_option = Util.get_option(options, 'to') + since_option = Util.get_option(options, 'since') + db_user_option = Util.get_option(options, 'user') + if db_user_option: + tenant_name = self.__extract_tenant_name(db_user_option) + if tenant_name: + self.db_user = db_user_option + self.tenant_name = tenant_name + db_password_option = Util.get_option(options, 'password') + self.db_password = db_password_option + tenant_name_option = Util.get_option(options, 'tenant_name') + if tenant_name_option: + self.tenant_name = tenant_name_option + level_option = Util.get_option(options, 'level') + if level_option: + self.level = level_option + store_dir_option = Util.get_option(options, 'store_dir') + if store_dir_option is not None: + if not os.path.exists(os.path.abspath(store_dir_option)): + self.stdio.warn('Error: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + os.makedirs(os.path.abspath(store_dir_option)) + self.local_stored_parrent_path = os.path.abspath(store_dir_option) + output_option = Util.get_option(options, 'output') + if output_option: + self.output_type = output_option + limit_option = Util.get_option(options, 'limit') + if limit_option: + self.sql_audit_limit = limit_option + elapsed_time_option = Util.get_option(options, 'elapsed_time') + if elapsed_time_option: + self.elapsed_time = elapsed_time_option + if from_option is not None and to_option is not None: + try: + from_timestamp = TimeUtils.parse_time_str(from_option) + to_timestamp = TimeUtils.parse_time_str(to_option) + self.from_time_str = from_option + self.to_time_str = to_option + except: + self.stdio.exception('Error: Datetime is invalid. Must be in format yyyy-mm-dd hh:mm:ss. from_datetime={0}, to_datetime={1}'.format(from_option, to_option)) + return False + if to_timestamp <= from_timestamp: + self.stdio.error('Error: from datetime is larger than to datetime, please check.') + return False + elif (from_option is None or to_option is None) and since_option is not None: + now_time = datetime.datetime.now() + self.to_time_str = (now_time + datetime.timedelta(minutes=1)).strftime('%Y-%m-%d %H:%M:%S') + self.from_time_str = (now_time - datetime.timedelta(seconds=TimeUtils.parse_time_length_to_sec(since_option))).strftime('%Y-%m-%d %H:%M:%S') + self.stdio.print('analyze sql from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) + else: + self.stdio.warn('no time option provided, default processing is based on the last 30 minutes') + now_time = datetime.datetime.now() + self.to_time_str = (now_time + datetime.timedelta(minutes=1)).strftime('%Y-%m-%d %H:%M:%S') + if since_option is not None: + self.from_time_str = (now_time - datetime.timedelta(seconds=TimeUtils.parse_time_length_to_sec(since_option))).strftime('%Y-%m-%d %H:%M:%S') + else: + self.from_time_str = (now_time - datetime.timedelta(minutes=30)).strftime('%Y-%m-%d %H:%M:%S') + self.stdio.print('analyze sql from_time: {0}, to_time: {1}'.format(self.from_time_str, self.to_time_str)) + self.from_timestamp = TimeUtils.datetime_to_timestamp(self.from_time_str, self.stdio) + self.to_timestamp = TimeUtils.datetime_to_timestamp(self.to_time_str, self.stdio) + self.stdio.print('init option complete') + return True + + def handle(self): + self.start_time = time.time() + if not self.init_option(): + self.stdio.error('init option failed') + return False + if not self.init_inner_config(): + self.stdio.error('init inner config failed') + return False + if not self.init_config(): + self.stdio.error('init config failed') + return False + if not self.init_ob_version(): + self.stdio.error('init ob version failed') + return False + self.init_db_connector() + self.local_store_path = os.path.join(self.local_stored_parrent_path, "obdiag_analyze_sql_result_{0}_{1}.html".format(TimeUtils.timestamp_to_filename_time(self.from_timestamp), TimeUtils.timestamp_to_filename_time(self.to_timestamp))) + self.stdio.print("use {0} as result store path.".format(self.local_store_path)) + all_tenant_results = {} + if self.tenant_name: + meta = SysTenantMeta(self.sys_connector, self.stdio, self.ob_version) + self.stdio.print('select sql tenant name list start') + tenant_names = meta.get_ob_tenant_name_list() + self.stdio.print('select sql tenant name list end, result:{0}'.format(tenant_names)) + else: + tenant_names = [self.tenant_name] + for tenant_name in tenant_names: + self.stdio.print('select tenant:{0} sql audit start'.format(tenant_name[0])) + inner_results = self.__select_sql_audit(tenant_name[0]) + self.stdio.print('select tenant:{0} sql audit complete'.format(tenant_name[0])) + filter_results = self.__filter_max_elapsed_time_with_same_sql_id(inner_results) + all_tenant_results[tenant_name] = filter_results + for tenant_name, results in all_tenant_results.items(): + for item in results: + item['planCachePlanExplain'] = self.__get_plan_cache_plan_explain(item) + item['diagnosticEntries'] = self.__parse_sql_review(item["querySql"]) + if self.output_type == "html": + data = self.__gather_cluster_info() + html_result = self.__generate_html_result(all_tenant_results, data) + if html_result: + FileUtil.write_append(self.local_store_path, html_result) + self.__print_result() + else: + pass + + def __extract_tenant_name(self, username): + """ + Extracts the tenant name from the given OBClient username format. + Parameters: + username (str): The username portion of the OBClient connection string, formatted as 'user@tenantName' or 'user@tenantName#clusterName' or 'clusterName:tenantName:user'. + Returns: + The tenant name, or None if parsing fails + """ + # Check for 'user@tenantName' or 'user@tenantName#clusterName' format + if "@" in username: + parts = username.split('@') + if len(parts) == 2: + return parts[1].split('#')[0] if '#' in parts[1] else parts[1] + + # Check for 'clusterName:tenantName:user' format + elif ":" in username: + parts = username.split(':') + if len(parts) >= 3: + return parts[1] + + self.stdio.error("unable to recognize the user name format") + return None + + def __select_sql_audit(self, tenant_name): + sql = str(GlobalSqlMeta().get_value(key="get_sql_audit_ob4_for_sql_review")) + replacements = { + "##REPLACE_TENANT_NAME##": tenant_name, + "##REPLACE_REQUEST_FROM_TIME##": str(self.from_timestamp), + "##REPLACE_REQUEST_TO_TIME##": str(self.to_timestamp), + "##REPLACE_ELAPSED_TIME##": str(self.elapsed_time), + "##REPLACE_LIMIT##": str(self.sql_audit_limit), + } + for old, new in replacements.items(): + sql = sql.replace(old, new) + self.stdio.verbose("excute SQL: {0}".format(sql)) + columns, rows = self.db_connector.execute_sql_return_columns_and_data(sql) + result = [] + for row in rows: + result.append(dict(zip(columns, row))) + self.stdio.print("excute select sql_audit SQL complete, the length of raw result is {0}".format(len(result))) + return result + + def __get_plan_cache_plan_explain(self, data): + meta = SysTenantMeta(self.sys_connector, self.stdio, self.ob_version) + column_names, table_data = meta.get_plain_explain_raw(data['tenantId'], data['svrIp'], data['svrPort'], data['planId']) + formatted_table = tabulate(table_data, headers=column_names, tablefmt="grid") + return formatted_table + + def __filter_max_elapsed_time_with_same_sql_id(self, data): + # Create a dictionary to hold the max elapsed time for each db_id, sql_id pair + max_elapsed_times = {} + for item in data: + key = (item['tenantId'], item['dbId'], item['sqlId']) + if key not in max_elapsed_times or item['elapsedTime'] > max_elapsed_times[key]['elapsedTime']: + max_elapsed_times[key] = item + # Extract the values which are the filtered list + filtered_data = list(max_elapsed_times.values()) + self.stdio.print("filter filter max elapsed time with same sql_id complete, raw data length:{0}, filter data length:{1}".format(len(data), len(filtered_data))) + return filtered_data + + def __parse_sql_review(self, sql): + rules = SQLReviewRuleManager() + result = rules.manager.analyze_sql_statement(sql, self.stdio, self.level) + return result + + def __generate_current_row_selected_keys(self, diagnostics, keys, rowspan_length): + current_row = [f"{diagnostics[key]}" for key in keys] + return current_row + + def __generate_html_table(self, diagnostics): + rows = [] + rowspan_length = len(diagnostics['diagnosticEntries']) + current_row = self.__generate_current_row_selected_keys(diagnostics, self.sql_audit_keys, rowspan_length) + table_head = ''.join(current_row) + for idx, diag in enumerate(diagnostics['diagnosticEntries']): + if idx == 0: + # Start a new row with SQL text having rowspan equal to the number of diagnostics. + row = table_head + f"{diag.class_name}", f"{diag.description}", f"{diag.level.string}", f"{diag.suggestion}" + rows.append("" + "".join(row) + "") + else: + rows.append("" + f"{diag.class_name}" + f"{diag.description}" + f"{diag.level.string}" + f"{diag.suggestion}" + "") + return "".join(rows) + + def __generate_table_headers(self): + headers_html = "".join([f"{item}" for item in self.sql_audit_keys]) + return headers_html + + def __generate_cluster_info_html(self, data): + result = f""" +
+

Cluster Information

+
+
{data}
+
+
+ """ + result += GlobalHtmlMeta().get_value(key="html_script_templete") + return result + + def __gather_cluster_info(self): + handler = GatherSceneHandler(context=self.context, gather_pack_dir=self.local_stored_parrent_path, is_inner=True) + return handler.handle() + + def __generate_html_result(self, all_results, cluster_data): + if len(all_results) == 0: + self.stdio.error('sql audit result is empty, unable to generate HTML') + return None + self.stdio.print('generate html result start') + full_html = "" + table_headers = self.__generate_table_headers() + cluster_info = self.__generate_cluster_info_html(cluster_data) + all_sql_entries_html = "" + i = 0 + for key, value in all_results.items(): + tenant_sql_entries_html = "" + for data in value: + i += 1 + sql_entries_html = "".join(self.__generate_html_table(data)) + tenant_sql_entries_html += sql_entries_html + + if len(tenant_sql_entries_html) > 0: + all_sql_entries_html += f""" +
+

Tenant[{key[0]}] SQL Diagnostic Result

+
+
+                
+                    
+                        
+                            {table_headers}
+                            
+                            
+                            
+                            
+                        
+                    
+                    
+                        {tenant_sql_entries_html}
+                    
+                
诊断规则规则描述规则级别诊断建议
+
+
+
+ """ + + full_html += ( + GlobalHtmlMeta().get_value(key="analyze_sql_html_head_template") + + f""" +
+

Command Information

+
+
+                

Command: "obdiag analyze sql"

+

Options: {self.context.options}

+
+
+
+ {cluster_info} + """ + + all_sql_entries_html + ) + full_html += GlobalHtmlMeta().get_value(key="html_footer_temple") + self.stdio.print('generate html result complete') + return full_html + + def __print_result(self): + self.end_time = time.time() + elapsed_time = self.end_time - self.start_time + data = [["Status", "Result Details", "Time"], ["Completed", self.local_store_path, f"{elapsed_time:.2f} s"]] + table = tabulate(data, headers="firstrow", tablefmt="grid") + self.stdio.print("\nAnalyze SQL Summary:") + self.stdio.print(table) + self.stdio.print("\n") diff --git a/handler/analyzer/analyze_sql_review.py b/handler/analyzer/analyze_sql_review.py new file mode 100644 index 00000000..1b69f3eb --- /dev/null +++ b/handler/analyzer/analyze_sql_review.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: analyze_sql_review.py +@desc: +""" +import os +import time +import sqlparse +from tabulate import tabulate +from colorama import Fore, Style +from common.constant import const +from common.tool import StringUtils, Util +from common.tool import TimeUtils +from common.tool import FileUtil +from common.ob_connector import OBConnector +from handler.analyzer.sql.rule_manager import SQLReviewRuleManager +from handler.meta.sql_meta import GlobalSqlMeta +from handler.meta.html_meta import GlobalHtmlMeta + + +class AnalyzeSQLReviewHandler(object): + def __init__(self, context): + super(AnalyzeSQLReviewHandler, self).__init__() + self.context = context + self.stdio = context.stdio + self.from_time_str = None + self.to_time_str = None + self.config_path = const.DEFAULT_CONFIG_PATH + self.analyze_files_list = None + self.directly_analyze_files = False + self.level = 'notice' + self.local_store_path = None + self.output_type = 'html' + + def init_inner_config(self): + self.stdio.print("init inner config start") + self.inner_config = self.context.inner_config + self.stdio.verbose('inner config: {0}'.format(self.inner_config)) + basic_config = self.inner_config['obdiag']['basic'] + self.config_path = basic_config['config_path'] + self.stdio.print("init inner config complete") + return True + + def init_config(self): + self.stdio.print('init cluster config start') + ob_cluster = self.context.cluster_config + self.stdio.verbose('cluster config: {0}'.format(StringUtils.mask_passwords(ob_cluster))) + self.ob_cluster = ob_cluster + self.sys_connector = OBConnector(ip=ob_cluster.get("db_host"), port=ob_cluster.get("db_port"), username=ob_cluster.get("tenant_sys").get("user"), password=ob_cluster.get("tenant_sys").get("password"), stdio=self.stdio, timeout=100) + self.ob_cluster_name = ob_cluster.get("ob_cluster_name") + self.stdio.print('init cluster config complete') + return True + + def init_db_connector(self): + if self.db_user: + self.stdio.verbose("init db connector start") + self.db_connector_provided = True + self.db_connector = OBConnector(ip=self.ob_cluster.get("db_host"), port=self.ob_cluster.get("db_port"), username=self.db_user, password=self.db_password, stdio=self.stdio, timeout=100) + self.stdio.verbose("init db connector complete") + else: + self.db_connector = self.sys_connector + + def init_option(self): + self.stdio.print('init option start') + options = self.context.options + self.stdio.verbose('options:[{0}]'.format(options)) + files_option = Util.get_option(options, 'files') + if files_option: + self.directly_analyze_files = True + self.analyze_files_list = files_option + db_user_option = Util.get_option(options, 'user') + db_password_option = Util.get_option(options, 'password') + tenant_name_option = Util.get_option(options, 'tenant_name') + if tenant_name_option is not None: + self.tenant_name = tenant_name_option + level_option = Util.get_option(options, 'level') + if level_option: + self.level = level_option + store_dir_option = Util.get_option(options, 'store_dir') + if store_dir_option is not None: + if not os.path.exists(os.path.abspath(store_dir_option)): + self.stdio.warn('Error: args --store_dir [{0}] incorrect: No such directory, Now create it'.format(os.path.abspath(store_dir_option))) + os.makedirs(os.path.abspath(store_dir_option)) + self.local_stored_parrent_path = os.path.abspath(store_dir_option) + output_option = Util.get_option(options, 'output') + if output_option: + self.output_type = output_option + self.db_user = db_user_option + self.db_password = db_password_option + self.stdio.print('init option complete') + return True + + def handle(self): + self.start_time = time.time() + if not self.init_option(): + self.stdio.error('init option failed') + return False + if not self.init_inner_config(): + self.stdio.error('init inner config failed') + return False + if not self.init_config(): + self.stdio.error('init config failed') + return False + self.init_db_connector() + self.local_store_path = os.path.join(self.local_stored_parrent_path, "obdiag_sql_review_result_{0}.html".format(TimeUtils.timestamp_to_filename_time(TimeUtils.get_current_us_timestamp()))) + self.stdio.print("use {0} as result store path.".format(self.local_store_path)) + all_results = self.__directly_analyze_files() + results = self.__parse_results(all_results) + if self.output_type == "html": + html_result = self.__generate_html_result(results) + FileUtil.write_append(self.local_store_path, html_result) + else: + pass + self.__print_result() + + def __directly_analyze_files(self): + sql_files = self.__get_sql_file_list() + if len(sql_files) == 0: + self.stdio.warn("failed to find SQL files from the --files option provided") + return None + file_results = {} + sql_results = {} + for file in sql_files: + sql_list = self.__parse_sql_file(file) + for sql in sql_list: + rules = SQLReviewRuleManager() + result = rules.manager.analyze_sql_statement(sql, self.stdio, self.level) + sql_results[sql] = result + file_results[file] = sql_results + return file_results + + def __get_sql_file_list(self): + """ + :param: + :return: sql_files + """ + sql_files = [] + if self.analyze_files_list and len(self.analyze_files_list) > 0: + for path in self.analyze_files_list: + if os.path.exists(path): + if os.path.isfile(path): + sql_files.append(path) + else: + sql_file_list = FileUtil.find_all_file(path) + if len(sql_file_list) > 0: + sql_files.extend(sql_file_list) + self.stdio.print("files to be processed: {0}".format(sql_files)) + return sql_files + + def __parse_sql_file(self, file_path): + with open(file_path, 'r') as file: + sql_content = file.read() + statements = sqlparse.split(sql_content) + sql_list = [stmt for stmt in statements if stmt.strip()] + return sql_list + + def __parse_results(self, results): + reports = [] + for file_name, file_results in results.items(): + diagnostic_entries = [] + for sql, sql_results in file_results.items(): + diagnostics = [] + for sql_result in sql_results: + diagnostic = {"ruleClassName": sql_result.class_name, "ruleName": sql_result.rule_name, "ruleDescription": sql_result.description, "ruleLevel": sql_result.level.value, "suggestion": sql_result.suggestion} + diagnostics.append(diagnostic) + diagnostic_entry = {"sqlText": sql, "diagnostics": diagnostics} + diagnostic_entries.append(diagnostic_entry) + report = {"command": "obdiag analyze sql_review", "options": {"files": file_name}, "diagnosticEntries": diagnostic_entries} + reports.append(report) + return reports + + def __generate_html_table(self, sql_entry): + diagnostics = sql_entry["diagnostics"] + sql_text = sql_entry["sqlText"] + rows = [] + current_row = [f"{sql_text}"] + + for idx, diag in enumerate(diagnostics): + if idx == 0: + # Start a new row with SQL text having rowspan equal to the number of diagnostics. + row = current_row + [f"{diag['ruleClassName']}", f"{diag['ruleDescription']}", f"{diag['ruleLevel'][1]}", f"{diag['suggestion']}"] + rows.append("" + "".join(row) + "") + else: + rows.append("" + f"{diag['ruleClassName']}" + f"{diag['ruleDescription']}" + f"{diag['ruleLevel'][1]}" + f"{diag['suggestion']}" + "") + return "".join(rows) + + def __generate_html_result(self, all_results): + full_html = "" + for data in all_results: + # print(data) + diagnostic_entries = data["diagnosticEntries"] + sql_entries_html = "".join([self.__generate_html_table(entry) for entry in diagnostic_entries]) + full_html += ( + GlobalHtmlMeta().get_value(key="sql_review_html_head_template") + + f""" +

Command: {data["command"]}

+

Files: {data["options"]["files"]}

+

诊断结果

+ + + + + + + + + + + + {sql_entries_html} + +
SQL文本诊断规则规则描述规则级别调优建议
+ """ + ) + full_html += GlobalHtmlMeta().get_value(key="html_footer_temple") + return full_html + + def __print_result(self): + self.end_time = time.time() + elapsed_time = self.end_time - self.start_time + data = [["Status", "Result Details", "Time"], ["Completed", self.local_store_path, f"{elapsed_time:.2f} s"]] + table = tabulate(data, headers="firstrow", tablefmt="grid") + self.stdio.print("\nAnalyze SQL Review Summary:") + self.stdio.print(table) + self.stdio.print("\n") diff --git a/handler/analyzer/sql/__init__.py b/handler/analyzer/sql/__init__.py new file mode 100644 index 00000000..69b7c2fa --- /dev/null +++ b/handler/analyzer/sql/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: __init__.py +@desc: +""" diff --git a/handler/analyzer/sql/engine.py b/handler/analyzer/sql/engine.py new file mode 100644 index 00000000..699406af --- /dev/null +++ b/handler/analyzer/sql/engine.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: engine.py +@desc: +""" + +from sqlgpt_parser.parser.oceanbase_parser import parser as oceanbase_parser + + +class Engine(object): + def __new__(cls): + singleton = cls.__dict__.get('__singleton__') + if singleton is not None: + return singleton + + cls.__singleton__ = singleton = object.__new__(cls) + + return singleton + + def parse(self, sql, tracking=False): + return oceanbase_parser.parse(sql, tracking=tracking) diff --git a/handler/analyzer/sql/meta/_init_.py b/handler/analyzer/sql/meta/_init_.py new file mode 100644 index 00000000..69b7c2fa --- /dev/null +++ b/handler/analyzer/sql/meta/_init_.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: __init__.py +@desc: +""" diff --git a/handler/analyzer/sql/meta/metadata.py b/handler/analyzer/sql/meta/metadata.py new file mode 100644 index 00000000..faf56b4b --- /dev/null +++ b/handler/analyzer/sql/meta/metadata.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: metadata.py +@desc: +""" + +from typing import List +from enum import Enum, unique + + +@unique +class IndexType(Enum): + PRIMARY = '1.primary' + UNIQUE = '2.unique' + NORMAL = '3.normal' + + +class MetaData(object): + def __init__(self, table_list: List, statistics_list: List): + self.table_list = table_list + self.statistics_list = statistics_list + + +class Table(object): + def __init__(self, database_name, table_name, column_list: List, index_list: List, table_rows): + self.table_name = table_name + self.database_name = database_name + self.column_list = column_list + self.index_list = index_list + self.table_rows = table_rows + + +class Column(object): + def __init__(self, column_name, column_type, column_nullable): + self.column_name = column_name + self.column_type = column_type + self.column_nullable = column_nullable + + +class Index(object): + def __init__( + self, + index_name, + column_list: List, + index_type: IndexType, + index_all_match=None, + index_back=None, + extract_range=None, + has_interesting_order=None, + ): + self.index_name = index_name + self.column_list = column_list + self.column_count = len(column_list) + self.index_type = index_type + self.index_all_match = index_all_match + self.index_back = index_back + self.extract_range = extract_range + self.has_interesting_order = has_interesting_order + + +class Selectivity(object): + def __init__(self, column_name, min_value, max_value, ndv=None): + self.column_name = column_name + self.min_value = min_value + self.max_value = max_value + self.ndv = ndv + + +class Statistics(object): + def __init__(self, database_name, table_name, selectivity_list: List): + self.database_name = database_name + self.table_name = table_name + self.selectivity_list = selectivity_list diff --git a/handler/analyzer/sql/meta/sys_tenant_meta.py b/handler/analyzer/sql/meta/sys_tenant_meta.py new file mode 100644 index 00000000..d1bf40d3 --- /dev/null +++ b/handler/analyzer/sql/meta/sys_tenant_meta.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/6/21 +@file: sys_tenant_meta.py +@desc: +""" +from handler.meta.sql_meta import GlobalSqlMeta +from common.ob_connector import OBConnector +from common.tool import StringUtils + + +class SysTenantMeta(object): + + def __init__(self, connector: OBConnector, stdio, ob_version='4.0.0.0'): + self.sys_connector = connector + self.stdio = stdio + self.ob_version = ob_version + + def get_tables(self, tenant_id: int, db_name: str): + if StringUtils.compare_versions_greater(self.ob_version, '4.0.0.0'): + sql = str(GlobalSqlMeta().get_value(key="get_tables_for_ob4")) + else: + sql = str(GlobalSqlMeta().get_value(key="get_tables")) + sql = sql.replace('##REPLACE_DATABASE_NAME##', db_name) + self.stdio.verbose("get tables excute SQL: {0}".format(sql)) + columns, rows = self.sys_connector.execute_sql_return_columns_and_data(sql) + results = dict(zip(columns, rows)) + return results + + def get_database_name(self, tenant_id, database_id): + sql = str(GlobalSqlMeta().get_value(key="get_database_name")) + sql = sql.replace('##REPLACE_TENANT_ID##', str(tenant_id)).replace('REPLACE_DATABASE_ID', str(database_id)) + columns, rows = self.sys_connector.execute_sql_return_columns_and_data(sql) + results = dict(zip(columns, rows)) + return results + + def get_plain_explain(self, tenant_id: int, svr_ip: str, port: int, plan_id: int): + if StringUtils.compare_versions_greater(self.ob_version, '4.0.0.0'): + sql = str(GlobalSqlMeta().get_value(key="get_plan_explains_for_ob4")) + else: + sql = str(GlobalSqlMeta().get_value(key="get_plan_explains")) + replacements = {"##REPLACE_TENANT_ID##": str(tenant_id), "##REPLACE_SVR_IP##": svr_ip, "##REPLACE_SVR_PORT##": str(port), "##REPLACE_PLAN_ID##": str(plan_id)} + for old, new in replacements.items(): + sql = sql.replace(old, new) + columns, rows = self.sys_connector.execute_sql_return_columns_and_data(sql) + results = dict(zip(columns, rows)) + return results + + def get_plain_explain_raw(self, tenant_id: int, svr_ip: str, port: int, plan_id: int): + if StringUtils.compare_versions_greater(self.ob_version, '4.0.0.0'): + sql = str(GlobalSqlMeta().get_value(key="get_plan_explains_for_ob4")) + else: + sql = str(GlobalSqlMeta().get_value(key="get_plan_explains")) + replacements = {"##REPLACE_TENANT_ID##": str(tenant_id), "##REPLACE_SVR_IP##": svr_ip, "##REPLACE_SVR_PORT##": str(port), "##REPLACE_PLAN_ID##": str(plan_id)} + for old, new in replacements.items(): + sql = sql.replace(old, new) + columns, rows = self.sys_connector.execute_sql_return_columns_and_data(sql) + return columns, rows + + def get_ob_tenant_name_list(self): + if StringUtils.compare_versions_greater(self.ob_version, '4.0.0.0'): + sql = str(GlobalSqlMeta().get_value(key="get_tenant_name_list_for_v4")) + else: + sql = str(GlobalSqlMeta().get_value(key="get_tenant_name_list")) + results = self.sys_connector.execute_sql(sql) + return results diff --git a/handler/analyzer/sql/rule_manager.py b/handler/analyzer/sql/rule_manager.py new file mode 100644 index 00000000..c2e7db48 --- /dev/null +++ b/handler/analyzer/sql/rule_manager.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/6/11 +@file: rule_manager.py +@desc: +""" + +from typing import Dict, Type, List +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result +from handler.analyzer.sql.rules.review.arithmetic import ArithmeticRule +from handler.analyzer.sql.rules.review.full_scan import FullScanRule +from handler.analyzer.sql.rules.review.is_null import IsNullRule +from handler.analyzer.sql.rules.review.large_in_clause import LargeInClauseAdjustedRule +from handler.analyzer.sql.rules.review.multi_table_join import MultiTableJoinRule +from handler.analyzer.sql.rules.review.select_all import SelectAllRule +from handler.analyzer.sql.rules.review.update_delete_multi_table import UpdateDeleteMultiTableRule +from handler.analyzer.sql.rules.review.update_delete_without_where_or_true_condition import UpdateDeleteWithoutWhereOrTrueConditionRule +from handler.analyzer.sql.rules.level import Level +from common.tool import SQLUtil + + +class RuleManager(object): + def __init__(self): + self._registered_rules: Dict[str, Type[AbstractRule]] = {} + + def register_rule(self, rule_class: Type[AbstractRule]): + """ + 注册一个新的规则类。 + :param rule_class: 规则类的类型。 + """ + self._registered_rules[rule_class.rule_name] = rule_class + + def analyze_sql_statement(self, sql, stdio, level_str='notice') -> List[Result]: + """ + 对SQL语句列表应用所有已注册的规则,并收集结果。 + :param sql_statements: SQL语句的列表。 + :return: 二维列表,每个内部列表包含对应SQL语句的所有规则检查结果。 + """ + try: + sql = SQLUtil().remove_sql_text_affects_parser(sql) + sql_statement = parser.parse(sql) + stdio.verbose("sql [{0}]; sql_statement:[{1}]".format(sql, sql_statement)) + except Exception as e: + stdio.verbose("parse sql Exception : {0}".format(e)) + return [] + level = Level.from_string(level_str) + rule_results = [] + for rule_class in self._registered_rules.values(): + rule_instance = rule_class() + result = rule_instance.match(sql_statement) + suggestion = rule_instance.suggestion(sql_statement) + if result: + if suggestion.level >= level: + stdio.verbose("rule_name:{0}, suggestion_level:{1}, suggestion:{2}".format(suggestion.rule_name, suggestion.level, suggestion.suggestion)) + rule_results.append(suggestion) + else: + if level <= Level.OK: + suggestion = Result(rule_class.rule_name, Level.OK, "No issues found with this rule.", rule_class.rule_description) + rule_results.append(suggestion) + return rule_results + + +class SQLReviewRuleManager(object): + def __init__(self): + self.manager = RuleManager() + self.manager.register_rule(SelectAllRule) + self.manager.register_rule(ArithmeticRule) + self.manager.register_rule(FullScanRule) + self.manager.register_rule(IsNullRule) + self.manager.register_rule(LargeInClauseAdjustedRule) + self.manager.register_rule(MultiTableJoinRule) + self.manager.register_rule(UpdateDeleteMultiTableRule) + self.manager.register_rule(UpdateDeleteWithoutWhereOrTrueConditionRule) diff --git a/handler/analyzer/sql/rules/__init__.py b/handler/analyzer/sql/rules/__init__.py new file mode 100644 index 00000000..69b7c2fa --- /dev/null +++ b/handler/analyzer/sql/rules/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: __init__.py +@desc: +""" diff --git a/handler/analyzer/sql/rules/abstract_rule.py b/handler/analyzer/sql/rules/abstract_rule.py new file mode 100644 index 00000000..c5897c34 --- /dev/null +++ b/handler/analyzer/sql/rules/abstract_rule.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/22 +@file: abstract_rule.py +@desc: +""" + +from abc import ABCMeta, abstractmethod + +from sqlgpt_parser.parser.tree.statement import Statement + + +class AbstractRule(metaclass=ABCMeta): + def match(self, root: Statement, context=None) -> bool: + return True + + @abstractmethod + def suggestion(self, root: Statement, context=None): + pass diff --git a/handler/analyzer/sql/rules/level.py b/handler/analyzer/sql/rules/level.py new file mode 100644 index 00000000..bba0ec6c --- /dev/null +++ b/handler/analyzer/sql/rules/level.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/22 +@file: level.py +@desc: +""" + +from enum import Enum, unique + + +@unique +class Level(Enum): + OK = (1, 'ok') + NOTICE = (2, 'notice') + WARN = (3, 'warn') + CRITICAL = (4, 'critical') + + def __lt__(self, other): + if self.__class__ is other.__class__: + return self.value[0] < other.value[0] + return NotImplemented + + def __le__(self, other): + if self.__class__ is other.__class__: + return self.value[0] <= other.value[0] + return NotImplemented + + def __gt__(self, other): + if self.__class__ is other.__class__: + return self.value[0] > other.value[0] + return NotImplemented + + def __ge__(self, other): + if self.__class__ is other.__class__: + return self.value[0] >= other.value[0] + return NotImplemented + + @classmethod + def from_string(cls, s): + for member in cls: + if member.value[1] == s: + return member + raise ValueError(f"No such level: {s}") + + @property + def string(self): + return self.value[1] diff --git a/handler/analyzer/sql/rules/result.py b/handler/analyzer/sql/rules/result.py new file mode 100644 index 00000000..9950fc31 --- /dev/null +++ b/handler/analyzer/sql/rules/result.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: result.py +@desc: +""" +import json + + +class Result(object): + def __init__(self, name, level, suggestion, description): + self.class_name = name + self.rule_name = name + self.level = level + self.suggestion = suggestion + self.description = description + + def __str__(self): + return json.dumps({"class_name": self.rule_name, "rule_name": self.rule_name, "level": self.level.value, "suggestion": self.suggestion, "description": self.description}, indent=5) diff --git a/handler/analyzer/sql/rules/review/__init__.py b/handler/analyzer/sql/rules/review/__init__.py new file mode 100644 index 00000000..69b7c2fa --- /dev/null +++ b/handler/analyzer/sql/rules/review/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: __init__.py +@desc: +""" diff --git a/handler/analyzer/sql/rules/review/arithmetic.py b/handler/analyzer/sql/rules/review/arithmetic.py new file mode 100644 index 00000000..550b7811 --- /dev/null +++ b/handler/analyzer/sql/rules/review/arithmetic.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: arithmetic.py +@desc: +""" +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.expression import QualifiedNameReference +from sqlgpt_parser.parser.tree.statement import Statement +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result + + +class ArithmeticRule(AbstractRule): + rule_name = "arithmetic_rule" + rule_description = """ + Field operations are not recommended. + Example: a + 1 > 2 => a > 2 - 1 + """ + + def match(self, root: Statement, catalog=None) -> bool: + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + + def visit_arithmetic_binary(self, node, context): + if isinstance(node.left, QualifiedNameReference) or isinstance(node.right, QualifiedNameReference): + self.match = True + + try: + visitor = Visitor() + visitor.process(root, None) + except Exception as e: + pass + + return visitor.match + + def suggestion(self, root: Statement, catalog=None): + suggest_text = 'Consider simplifying your expressions by moving constants out of comparisons.' + if not self.match(root, catalog): + return Result(self.rule_name, Level.OK, "No improper field operations detected, query is optimized.", self.rule_description) + else: + return Result(self.rule_name, Level.NOTICE, suggest_text, self.rule_description) diff --git a/handler/analyzer/sql/rules/review/full_scan.py b/handler/analyzer/sql/rules/review/full_scan.py new file mode 100644 index 00000000..9a747de8 --- /dev/null +++ b/handler/analyzer/sql/rules/review/full_scan.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: select_all.py +@desc: +""" +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.result import Result +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from sqlgpt_parser.parser.tree.literal import StringLiteral +from sqlgpt_parser.parser.tree.statement import Statement + + +class FullScanRule(AbstractRule): + rule_name = "full_scan_rule" + rule_description = """ + Online query full table scan is not recommended. + Exceptions are: + 1. very small table + 2. very low frequency + 3. the table/result set returned is very small (within 100 records / 100 KB). + """ + + def match(self, root: Statement, catalog=None) -> bool: + """ + match: + select 1 from a + select 1 from a where b != / <> + select 1 from a where b not like + select 1 from a where b not in + select 1 from a where not exists + select 1 from a where b like %a / %a% + + not match: + select * from a left join b on (a.id = b.id) and a.c=1 + + :param root: + :param catalog: + :return: + """ + + # Remove clauses such as exists / != / <> / not in / not like / like %a + class Remove_Visitor(DefaultTraversalVisitor): + def visit_comparison_expression(self, node, context): + type = node.type + if type in ('!=', '<>'): + node.left = None + node.right = None + node.type = None + else: + self.process(node.left, context) + self.process(node.right, context) + return None + + def visis_in_predicate(self, node, context): + if node.is_not: + node.is_not = None + node.value = None + node.value_list = None + return None + + def visit_like_predicate(self, node, context): + process_flag = True + + pattern = node.pattern + + if isinstance(pattern, StringLiteral): + value = pattern.value + if value.startswith('%') or node.is_not: + process_flag = False + node.pattern = None + node.value = None + node.escape = None + + if process_flag: + self.process(node.value, context) + self.process(node.pattern, context) + if node.escape is not None: + self.process(node.escape, context) + return None + + # Determine whether there is a expression that can extract query range, if there is, it is not a full table scan + class Query_Range_Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = True + + def visit_comparison_expression(self, node, context): + type = node.type + if type and type in ('=', '>', '<', '>=', '<='): + self.match = False + if node.left: + self.process(node.left, context) + if node.right: + self.process(node.right, context) + return None + + def visit_in_predicate(self, node, context): + if node.is_not: + # Even though it's a NOT IN, it still suggests a non-full scan attempt + self.match = False + else: + self.process(node.value, context) + self.process(node.value_list, context) + + def visit_like_predicate(self, node, context): + if node.pattern and node.value: + pattern = node.pattern + if isinstance(pattern, StringLiteral): + value = pattern.value + if value.endswith('%'): + self.match = False + + if node.value: + self.process(node.value, context) + if node.pattern: + self.process(node.pattern, context) + if node.escape: + self.process(node.escape, context) + return None + + # Add handling for NOT EXISTS + def visit_exists_predicate(self, node, context): + if node.is_not: + # NOT EXISTS can also imply a specific range consideration + self.match = False + else: + self.process(node.subquery, context) + + def visit_between_predicate(self, node, context): + if not node.is_not: + self.match = False + + self.process(node.value, context) + self.process(node.min, context) + self.process(node.max, context) + + return None + + def visit_not_expression(self, node, context): + node.value = None + self.match = True + return None + + try: + remove_visitor = Remove_Visitor() + remove_visitor.process(root, None) + + query_range_visitor = Query_Range_Visitor() + query_range_visitor.process(root, None) + return query_range_visitor.match + except Exception as e: + pass + + return False + + def suggestion(self, root: Statement, catalog=None) -> Result: + if self.match(root, catalog): + suggestion_text = "Detected a potential full table scan which may impact performance. " "Consider adding indexes, refining WHERE clauses, or restructuring the query to utilize existing indexes." + return Result(self.rule_name, Level.WARN, suggestion_text, self.rule_description) + else: + suggestion_text = "The query does not involve a full table scan. It appears to be well-optimized for the given conditions." + return Result(self.rule_name, Level.OK, suggestion_text, self.rule_description) diff --git a/handler/analyzer/sql/rules/review/is_null.py b/handler/analyzer/sql/rules/review/is_null.py new file mode 100644 index 00000000..76a537ce --- /dev/null +++ b/handler/analyzer/sql/rules/review/is_null.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: is_null.py +@desc: +""" + +from sqlgpt_parser.parser.tree.literal import NullLiteral +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.statement import Statement +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result + + +class IsNullRule(AbstractRule): + rule_name = "is_null_rule" + rule_description = """ + Use IS NULL to determine whether it is a NULL value + A direct comparison of NULL to any value is NULL. +  1) The return result of NULL<>NULL is NULL, not false. +  2) The return result of NULL=NULL is NULL, not true. +  3) The return result of NULL<>1 is NULL, not true. + """ + + def match(self, root: Statement, catalog=None) -> bool: + """ + NULL<>、<>NULL、=NULL、NULL= + :param root: + :param catalog: + :return: + """ + + # NULL<>、<>NULL、=NULL、NULL=、!=NULL、 NULL!= + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + + def visit_comparison_expression(self, node, context): + if isinstance(node.left, NullLiteral): + self.match = True + if isinstance(node.right, NullLiteral): + self.match = True + return None + + try: + visitor = Visitor() + visitor.process(root, None) + return visitor.match + except Exception as e: + pass + + return False + + def suggestion(self, root: Statement, catalog=None): + if self.match(root, catalog): + # 如果发现不正确的NULL比较,提供具体的修改建议 + suggestion_text = "Detected comparison with NULL using =, !=, or <>. " "Use 'IS NULL' or 'IS NOT NULL' for correct NULL checks." + return Result(self.rule_name, Level.WARN, suggestion_text, self.rule_description) + else: + # 如果没有发现不正确比较,返回OK状态 + return Result(self.rule_name, Level.OK, "No improper NULL comparisons found.", self.rule_description) diff --git a/handler/analyzer/sql/rules/review/large_in_clause.py b/handler/analyzer/sql/rules/review/large_in_clause.py new file mode 100644 index 00000000..2d17411c --- /dev/null +++ b/handler/analyzer/sql/rules/review/large_in_clause.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + + +""" +@time: 2024/5/24 +@file: large_in_clause_adjusted.py +@desc: +""" +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result +from sqlgpt_parser.parser.tree.statement import Statement + + +class LargeInClauseAdjustedRule(AbstractRule): + rule_name = "large_in_clause_rule_adjusted" + rule_description = """ + Avoid using IN clauses with more than 200 elements as it may lead to performance issues. + """ + + MAX_IN_ELEMENTS = 200 + + def match(self, root: Statement, catalog=None) -> bool: + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + + def visit_in_predicate(self, node, context): + # Assuming node.values holds the list of values directly or indirectly; adjust based on actual implementation + if hasattr(node, 'value_list'): + if len(node.value_list.values) > LargeInClauseAdjustedRule.MAX_IN_ELEMENTS: + self.match = True + return self.match + return self.match + + try: + visitor = Visitor() + visitor.process(root, None) + except Exception as e: + pass + + return visitor.match + + def suggestion(self, root: Statement, catalog=None): + if self.match(root, catalog): + return Result( + self.rule_name, + Level.WARN, + f"The IN clause contains more than {LargeInClauseAdjustedRule.MAX_IN_ELEMENTS} elements, which may degrade query performance. " "Consider alternative strategies like breaking the query into smaller chunks or using EXISTS/JOIN clauses.", + self.rule_description, + ) + else: + return Result( + self.rule_name, + Level.OK, + "The IN clause does not exceed the recommended number of elements.", + self.rule_description, + ) diff --git a/handler/analyzer/sql/rules/review/multi_table_join.py b/handler/analyzer/sql/rules/review/multi_table_join.py new file mode 100644 index 00000000..b925086f --- /dev/null +++ b/handler/analyzer/sql/rules/review/multi_table_join.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: multi_table_join.py +@desc: +""" +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.join_criteria import JoinOn, JoinUsing +from sqlgpt_parser.parser.tree.statement import Statement +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result + + +class MultiTableJoinRule(AbstractRule): + rule_name = "multi_table_join_rule" + rule_description = """ + The number of association tables is not recommended to exceed 5 + """ + + def match(self, root: Statement, catalog=None) -> bool: + + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + self.join_count = 0 + + def visit_join(self, node, context): + self.join_count = self.join_count + 1 + + if self.join_count >= 5: + self.match = True + else: + self.process(node.left, context) + self.process(node.right, context) + + if isinstance(node.criteria, JoinOn): + self.process(node.criteria.expression, context) + elif isinstance(node.criteria, JoinUsing): + self.process(node.criteria.columns) + + return None + + try: + visitor = Visitor() + visitor.process(root, None) + except Exception as e: + pass + + return visitor.match + + def get_join_count(self, root: Statement) -> int: + """Helper method to count the number of JOIN operations in the statement.""" + + class CountJoinVisitor(DefaultTraversalVisitor): + def __init__(self): + self.join_count = 0 + + def visit_join(self, node, context): + self.join_count += 1 + self.process(node.left, context) + self.process(node.right, context) + + if isinstance(node.criteria, JoinOn): + self.process(node.criteria.expression, context) + elif isinstance(node.criteria, JoinUsing): + self.process(node.criteria.columns) + + try: + visitor = CountJoinVisitor() + visitor.process(root, None) + except Exception as e: + pass + return visitor.join_count + + def suggestion(self, root: Statement, catalog=None) -> Result: + join_count = self.get_join_count(root) + if join_count > 5: + # 如果关联表数量超过5,提供具体的改进建议 + suggestion_text = ( + f"The query involves {join_count} tables in JOIN operations, exceeding the recommended limit of 3.\n" + "Consider the following optimizations:\n" + "- Break the query into smaller, simpler queries and use application-side processing to combine results.\n" + "- Review the schema design; denormalization or indexed views might reduce the need for complex joins.\n" + "- Ensure all joined columns are properly indexed for involved tables.\n" + "- If applicable, consider using materialized views or caching strategies for frequently accessed subsets of data." + ) + return Result(self.rule_name, Level.WARN, suggestion_text, self.rule_description) + else: + # 如果没有超过,说明查询在推荐范围内 + suggestion_text = "The number of joined tables is within the recommended limit. No further action needed." + return Result(self.rule_name, Level.OK, suggestion_text, self.rule_description) diff --git a/handler/analyzer/sql/rules/review/select_all.py b/handler/analyzer/sql/rules/review/select_all.py new file mode 100644 index 00000000..f8706336 --- /dev/null +++ b/handler/analyzer/sql/rules/review/select_all.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: full_scan.py +@desc: +""" +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.statement import Statement +from sqlgpt_parser.parser.tree.expression import QualifiedNameReference +from sqlgpt_parser.parser.tree.qualified_name import QualifiedName +from sqlgpt_parser.parser.tree.select_item import SingleColumn +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.result import Result +from handler.analyzer.sql.rules.abstract_rule import AbstractRule + + +class SelectAllRule(AbstractRule): + rule_name = "select_all_rule" + rule_description = """ + select * + """ + + def match(self, root: Statement, catalog=None) -> bool: + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.is_select_all = False + + def visit_select(self, node, context): + for item in node.select_items: + if isinstance(item, SingleColumn) and isinstance(item.expression, QualifiedNameReference) and isinstance(item.expression.name, QualifiedName): + parts = item.expression.name.parts + for part in parts: + if part == '*': + self.is_select_all = True + break + + try: + visitor = Visitor() + visitor.process(root, None) + except Exception as e: + pass + + return visitor.is_select_all + + def suggestion(self, root: Statement, catalog=None) -> Result: + if self.match(root, catalog): + suggestion_text = "Using 'SELECT *' can lead to unnecessary data retrieval and potentially impact query performance. " "Consider specifying only the necessary columns explicitly to optimize your query." + return Result(self.rule_name, Level.WARN, suggestion_text, self.rule_description) + else: + return Result(self.rule_name, Level.OK, "No 'SELECT *' usage detected, query is optimized for column selection.", self.rule_description) diff --git a/handler/analyzer/sql/rules/review/update_delete_multi_table.py b/handler/analyzer/sql/rules/review/update_delete_multi_table.py new file mode 100644 index 00000000..ee76e22f --- /dev/null +++ b/handler/analyzer/sql/rules/review/update_delete_multi_table.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: update_delete_multi_table.py +@desc: +""" + +from sqlgpt_parser.parser.tree.relation import Join +from sqlgpt_parser.parser.tree.statement import Statement +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.level import Level +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result + + +class UpdateDeleteMultiTableRule(AbstractRule): + rule_name = "update_delete_multi_table_rule" + rule_description = """ + UPDATE / DELETE does not recommend using multiple tables + """ + + def match(self, root: Statement, catalog=None) -> bool: + """ + :param root: + :param catalog: + :return: + """ + + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + + def visit_delete(self, node, context): + table = node.table + if table and isinstance(table[0], Join): + self.match = True + + def visit_update(self, node, context): + table = node.table + if table and isinstance(table[0], Join): + self.match = True + + try: + visitor = Visitor() + visitor.process(root, None) + except Exception as e: + pass + + return visitor.match + + def suggestion(self, root: Statement, catalog=None) -> Result: + if self.match(root, catalog): + suggestion_text = ( + "The use of multiple tables in UPDATE or DELETE operation is not recommended. " "Consider breaking down the operation into separate single-table statements or " "using transactions to manage the update/delete across multiple tables safely." + ) + return Result(self.rule_name, Level.WARN, suggestion_text, self.rule_description) + else: + return Result(self.rule_name, Level.OK, "No multi-table UPDATE or DELETE operation detected, following best practices.", self.rule_description) diff --git a/handler/analyzer/sql/rules/review/update_delete_without_where_or_true_condition.py b/handler/analyzer/sql/rules/review/update_delete_without_where_or_true_condition.py new file mode 100644 index 00000000..abf3d1de --- /dev/null +++ b/handler/analyzer/sql/rules/review/update_delete_without_where_or_true_condition.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/20 +@file: update_delete_without_where_or_true_condition.py +@desc: +""" + +from sqlgpt_parser.parser.tree.statement import Statement +from sqlgpt_parser.parser.tree.expression import ComparisonExpression +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.level import Level +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result + + +class UpdateDeleteWithoutWhereOrTrueConditionRule(AbstractRule): + rule_name = "update_delete_without_where_or_true_condition_rule" + rule_description = """ + UPDATE or DELETE statements should not be executed without a WHERE clause or with a always-true WHERE condition. + """ + + def match(self, root: Statement, catalog=None) -> bool: + """ + :param root: + :param catalog: + :return: + """ + + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + self.visited_where = False + + def visit_update(self, node, context): + self.check_for_where(node.where) + + def visit_delete(self, node, context): + self.check_for_where(node.where) + + def check_for_where(self, where): + if where is None: + # No WHERE clause found + self.match = True + elif isinstance(where, ComparisonExpression) and where.left == where.right: + # WHERE clause exists but is always true + self.match = True + else: + # Valid WHERE clause found + self.visited_where = True + + try: + visitor = Visitor() + visitor.process(root, None) + except Exception as e: + pass + + # Only consider it a match if there was no valid WHERE clause encountered + return visitor.match and not visitor.visited_where + + def suggestion(self, root: Statement, catalog=None) -> Result: + if self.match(root, catalog): + suggestion_text = ( + "Executing UPDATE or DELETE statements without a WHERE clause or with an always-true WHERE condition " + "can be extremely dangerous, potentially affecting all rows in the table. Please ensure a proper and " + "specific WHERE condition is used to limit the scope of the operation." + ) + return Result(self.rule_name, Level.CRITICAL, suggestion_text, self.rule_description) + else: + return Result(self.rule_name, Level.OK, "UPDATE or DELETE operations include a WHERE clause with a specific condition, adhering to best practices.", self.rule_description) diff --git a/handler/analyzer/sql/rules/tunning/__init__.py b/handler/analyzer/sql/rules/tunning/__init__.py new file mode 100644 index 00000000..69b7c2fa --- /dev/null +++ b/handler/analyzer/sql/rules/tunning/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: __init__.py +@desc: +""" diff --git a/handler/analyzer/sql/rules/tunning/index_column_fuzzy_match.py b/handler/analyzer/sql/rules/tunning/index_column_fuzzy_match.py new file mode 100644 index 00000000..73f27a66 --- /dev/null +++ b/handler/analyzer/sql/rules/tunning/index_column_fuzzy_match.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/21 +@file: index_column_fuzzy_search.py +@desc: +""" +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.expression import QualifiedNameReference +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result +from sqlgpt_parser.parser.tree.statement import Statement + + +class IndexColumnFuzzyMatchRule(AbstractRule): + rule_name = "index_column_fuzzy_match_rule" + rule_description = """ + Avoid using fuzzy or left fuzzy matches on indexed columns in query conditions + as it may lead to performance degradation. + """ + + def match(self, root: Statement, catalog=None) -> bool: + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + self.fuzzy_matches_on_indexed_columns = [] + + def visit_like_expression(self, node, context): + # Assuming we have a mechanism to identify indexed columns, e.g., via `catalog` + if catalog and isinstance(node.expression, QualifiedNameReference): + column_name = str(node.expression.name) + if catalog.is_column_indexed(column_name): + if node.pattern.startswith('%'): # Left fuzzy match + self.fuzzy_matches_on_indexed_columns.append(column_name) + self.match = True + elif '%' in node.pattern and not node.pattern.endswith('%'): # Fuzzy match but not left + self.fuzzy_matches_on_indexed_columns.append(column_name) + self.match = True + return self.match + + visitor = Visitor() + visitor.process(root, catalog) + + if visitor.match: + self._fuzzy_matched_columns = visitor.fuzzy_matches_on_indexed_columns + return visitor.match + + def suggestion(self, root: Statement, catalog=None): + if hasattr(self, '_fuzzy_matched_columns') and self._fuzzy_matched_columns: + column_list = ", ".join(self._fuzzy_matched_columns) + detailed_suggestion = f"Avoid using fuzzy or left fuzzy matches on these indexed columns: {column_list}" + return Result(self.rule_name, Level.WARN, detailed_suggestion, self.rule_description) + else: + return Result(self.rule_name, Level.OK, "No issues found with indexed column fuzzy matching.", self.rule_description) diff --git a/handler/analyzer/sql/rules/tunning/index_column_implicit_conversion.py b/handler/analyzer/sql/rules/tunning/index_column_implicit_conversion.py new file mode 100644 index 00000000..b3c1fdca --- /dev/null +++ b/handler/analyzer/sql/rules/tunning/index_column_implicit_conversion.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/5/23 +@file: index_column_implicit_conversion.py +@desc: +""" +from handler.analyzer.sql.rules.level import Level +from sqlgpt_parser.parser.tree.expression import QualifiedNameReference +from sqlgpt_parser.parser.tree.visitor import DefaultTraversalVisitor +from handler.analyzer.sql.rules.abstract_rule import AbstractRule +from handler.analyzer.sql.rules.result import Result +from sqlgpt_parser.parser.tree.statement import Statement + + +class IndexColumnImplicitConversionRule(AbstractRule): + rule_name = "index_column_implicit_conversion_rule" + rule_description = """ + Detect potential implicit type conversions on indexed columns in query conditions + due to comparison or arithmetic operations with different types, which may degrade index efficiency. + """ + + def match(self, root: Statement, catalog=None) -> bool: + class Visitor(DefaultTraversalVisitor): + def __init__(self): + self.match = False + self.conversion_warnings = [] + + def visit_comparison_expression(self, node, context): + if catalog: + left_is_col = isinstance(node.left, QualifiedNameReference) and catalog.is_column_indexed(str(node.left.name)) + right_is_col = isinstance(node.right, QualifiedNameReference) and catalog.is_column_indexed(str(node.right.name)) + + if left_is_col or right_is_col: + # Check for type mismatch that could lead to implicit conversion + if left_is_col and not isinstance(node.right, type(node.left)) or right_is_col and not isinstance(node.left, type(node.right)): + col_name = left_is_col and str(node.left.name) or str(node.right.name) + self.conversion_warnings.append(f"Implicit type conversion warning on indexed column '{col_name}'.") + self.match = True + + return self.match + + def visit_arithmetic_binary_expression(self, node, context): + if catalog: + for expr in [node.left, node.right]: + if isinstance(expr, QualifiedNameReference) and catalog.is_column_indexed(str(expr.name)): + # If the other side is not the same type, it might suggest an implicit conversion + if not isinstance(node.left, type(expr)) or not isinstance(node.right, type(expr)): + col_name = str(expr.name) + self.conversion_warnings.append(f"Implicit type conversion warning on indexed column '{col_name}' due to arithmetic operation.") + self.match = True + + return self.match + + visitor = Visitor() + visitor.process(root, catalog) + + if visitor.match: + self._conversion_warnings_details = visitor.conversion_warnings + return visitor.match + + def suggestion(self, root: Statement, catalog=None): + if hasattr(self, '_conversion_warnings_details') and self._conversion_warnings_details: + issue_list = "\n".join(self._conversion_warnings_details) + detailed_suggestion = f"The following indexed columns may be involved in implicit type conversions due to comparison or arithmetic operations:\n{issue_list}\nReview these to ensure optimal index usage." + return Result(self.rule_name, Level.WARN, detailed_suggestion, self.rule_description) + else: + return Result(self.rule_name, Level.OK, "No implicit type conversion warnings found for indexed columns.", self.rule_description) diff --git a/handler/gather/gather_scenes.py b/handler/gather/gather_scenes.py index 073c112a..b782c672 100644 --- a/handler/gather/gather_scenes.py +++ b/handler/gather/gather_scenes.py @@ -33,7 +33,7 @@ class GatherSceneHandler(SafeStdio): - def __init__(self, context, gather_pack_dir='./', tasks_base_path="~/.obdiag/gather/tasks/", task_type="observer"): + def __init__(self, context, gather_pack_dir='./', tasks_base_path="~/.obdiag/gather/tasks/", task_type="observer", is_inner=False): self.context = context self.stdio = context.stdio self.is_ssh = True @@ -43,10 +43,11 @@ def __init__(self, context, gather_pack_dir='./', tasks_base_path="~/.obdiag/gat self.yaml_tasks = {} self.code_tasks = [] self.env = {} - self.scene = None + self.scene = "observer.base" self.tasks_base_path = tasks_base_path self.task_type = task_type self.variables = {} + self.is_inner = is_inner if self.context.get_variable("gather_timestamp", None): self.gather_timestamp = self.context.get_variable("gather_timestamp") else: @@ -72,7 +73,11 @@ def handle(self): self.__init_report_path() self.__init_task_names() self.execute() - self.__print_result() + if self.is_inner: + result = self.__get_sql_result() + return result + else: + self.__print_result() def execute(self): try: @@ -209,12 +214,20 @@ def init_option(self): self.gather_pack_dir = os.path.abspath(store_dir_option) if scene_option: self.scene = scene_option - else: - return False if env_option: env_dict = StringUtils.parse_env(env_option) self.env = env_dict return True + def __get_sql_result(self): + try: + file_path = os.path.join(self.report_path, "sql_result.txt") + with open(file_path, 'r', encoding='utf-8') as f: + data = f.read() + return data + except Exception as e: + self.stdio.error(e) + return None + def __print_result(self): self.stdio.print(Fore.YELLOW + "\nGather scene results stored in this directory: {0}\n".format(self.report_path) + Style.RESET_ALL) diff --git a/handler/gather/tasks/observer/backup.yaml b/handler/gather/tasks/observer/backup.yaml index 5fabd6d7..6e7cc91f 100644 --- a/handler/gather/tasks/observer/backup.yaml +++ b/handler/gather/tasks/observer/backup.yaml @@ -67,13 +67,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/observer/backup_clean.yaml b/handler/gather/tasks/observer/backup_clean.yaml index 88fef329..699ced1d 100644 --- a/handler/gather/tasks/observer/backup_clean.yaml +++ b/handler/gather/tasks/observer/backup_clean.yaml @@ -73,13 +73,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/observer/base.yaml b/handler/gather/tasks/observer/base.yaml index e75c9c7a..ef1fb488 100644 --- a/handler/gather/tasks/observer/base.yaml +++ b/handler/gather/tasks/observer/base.yaml @@ -40,29 +40,29 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" + sql: "SELECT * FROM oceanbase.DBA_OB_TENANTS;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_RESOURCE_POOLS;" + sql: "SELECT c.TENANT_ID, e.TENANT_NAME, concat(c.NAME, ': ', d.NAME) `pool:conf`,concat(c.UNIT_COUNT, ' unit: ', d.min_cpu, 'C/', ROUND(d.MEMORY_SIZE/1024/1024/1024,0), 'G') unit_info FROM oceanbase.DBA_OB_RESOURCE_POOLS c, oceanbase.DBA_OB_UNIT_CONFIGS d, oceanbase.DBA_OB_TENANTS e WHERE c.UNIT_CONFIG_ID=d.UNIT_CONFIG_ID AND c.TENANT_ID=e.TENANT_ID ORDER BY c.TENANT_ID;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_TENANTS;" + sql: "SELECT a.TENANT_NAME,a.TENANT_ID,b.SVR_IP FROM oceanbase.DBA_OB_TENANTS a, oceanbase.GV$OB_UNITS b WHERE a.TENANT_ID=b.TENANT_ID;" global: true - type: sql - sql: "SELECT c.TENANT_ID, e.TENANT_NAME, concat(c.NAME, ': ', d.NAME) `pool:conf`,concat(c.UNIT_COUNT, ' unit: ', d.min_cpu, 'C/', ROUND(d.MEMORY_SIZE/1024/1024/1024,0), 'G') unit_info FROM oceanbase.DBA_OB_RESOURCE_POOLS c, oceanbase.DBA_OB_UNIT_CONFIGS d, oceanbase.DBA_OB_TENANTS e WHERE c.UNIT_CONFIG_ID=d.UNIT_CONFIG_ID AND c.TENANT_ID=e.TENANT_ID AND c.TENANT_ID>1000 ORDER BY c.TENANT_ID;" + sql: "select /*+read_consistency(weak) QUERY_TIMEOUT(60000000) */ t1.svr_ip, t1.role, t1.tenant_id,t1.database_name,t1.table_name, ifnull(t2.data_size,0) / 1073741824 as total_data_size_gb from (select tenant_id, database_name, table_name, role, svr_ip, table_id, tablet_id from oceanbase.cdb_ob_table_locations) t1 left join (select tenant_id, tablet_id, data_size from oceanbase.cdb_ob_tablet_replicas) t2 on t1.tenant_id = t2.tenant_id and t1.tablet_id = t2.tablet_id order by total_data_size_gb desc limit 50;" global: true - type: sql - sql: "SELECT a.TENANT_NAME,a.TENANT_ID,b.SVR_IP FROM oceanbase.DBA_OB_TENANTS a, oceanbase.GV$OB_UNITS b WHERE a.TENANT_ID=b.TENANT_ID;" + sql: "show parameters" global: true - type: sql - sql: "show parameters" + sql: "show variables" global: true diff --git a/handler/gather/tasks/observer/clog_disk_full.yaml b/handler/gather/tasks/observer/clog_disk_full.yaml index ad246024..ecedeab0 100644 --- a/handler/gather/tasks/observer/clog_disk_full.yaml +++ b/handler/gather/tasks/observer/clog_disk_full.yaml @@ -58,13 +58,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/observer/compaction.yaml b/handler/gather/tasks/observer/compaction.yaml index 3e50ec65..87f9dbbf 100644 --- a/handler/gather/tasks/observer/compaction.yaml +++ b/handler/gather/tasks/observer/compaction.yaml @@ -109,13 +109,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml b/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml index 0693831d..70a52e65 100644 --- a/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml +++ b/handler/gather/tasks/observer/delay_of_primary_and_backup.yaml @@ -118,13 +118,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/observer/io.yaml b/handler/gather/tasks/observer/io.yaml index 23bab615..fa4a488e 100644 --- a/handler/gather/tasks/observer/io.yaml +++ b/handler/gather/tasks/observer/io.yaml @@ -52,13 +52,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/observer/log_archive.yaml b/handler/gather/tasks/observer/log_archive.yaml index 43f128bc..2d2908f0 100644 --- a/handler/gather/tasks/observer/log_archive.yaml +++ b/handler/gather/tasks/observer/log_archive.yaml @@ -73,13 +73,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/observer/long_transaction.yaml b/handler/gather/tasks/observer/long_transaction.yaml index c83afa90..cbd4c9a4 100644 --- a/handler/gather/tasks/observer/long_transaction.yaml +++ b/handler/gather/tasks/observer/long_transaction.yaml @@ -43,13 +43,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/observer/memory.yaml b/handler/gather/tasks/observer/memory.yaml index 140aca02..f2650f19 100644 --- a/handler/gather/tasks/observer/memory.yaml +++ b/handler/gather/tasks/observer/memory.yaml @@ -64,13 +64,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/observer/recovery.yaml b/handler/gather/tasks/observer/recovery.yaml index 6009a0d0..1d858159 100644 --- a/handler/gather/tasks/observer/recovery.yaml +++ b/handler/gather/tasks/observer/recovery.yaml @@ -54,13 +54,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/observer/restart.yaml b/handler/gather/tasks/observer/restart.yaml index d8b956c6..9ada284f 100644 --- a/handler/gather/tasks/observer/restart.yaml +++ b/handler/gather/tasks/observer/restart.yaml @@ -49,13 +49,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/observer/rootservice_switch.yaml b/handler/gather/tasks/observer/rootservice_switch.yaml index efda677f..be72be4e 100644 --- a/handler/gather/tasks/observer/rootservice_switch.yaml +++ b/handler/gather/tasks/observer/rootservice_switch.yaml @@ -79,13 +79,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/observer/suspend_transaction.yaml b/handler/gather/tasks/observer/suspend_transaction.yaml index 25a9edb3..59001c27 100644 --- a/handler/gather/tasks/observer/suspend_transaction.yaml +++ b/handler/gather/tasks/observer/suspend_transaction.yaml @@ -43,13 +43,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/observer/unit_data_imbalance.yaml b/handler/gather/tasks/observer/unit_data_imbalance.yaml index 078ec631..b9e7b54e 100644 --- a/handler/gather/tasks/observer/unit_data_imbalance.yaml +++ b/handler/gather/tasks/observer/unit_data_imbalance.yaml @@ -100,13 +100,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/observer/unknown.yaml b/handler/gather/tasks/observer/unknown.yaml index 780a3f75..3d22f9af 100644 --- a/handler/gather/tasks/observer/unknown.yaml +++ b/handler/gather/tasks/observer/unknown.yaml @@ -46,13 +46,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/gather/tasks/other/application_error.yaml b/handler/gather/tasks/other/application_error.yaml index 41a421bd..98b87e23 100644 --- a/handler/gather/tasks/other/application_error.yaml +++ b/handler/gather/tasks/other/application_error.yaml @@ -46,13 +46,13 @@ task: sql: "show variables like 'version_comment';" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_ZONES;" + sql: "SELECT * FROM oceanbase.DBA_OB_ZONES ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS;" + sql: "SELECT * FROM oceanbase.DBA_OB_SERVERS ORDER BY ZONE;" global: true - type: sql - sql: "SELECT * FROM oceanbase.GV$OB_SERVERS;" + sql: "SELECT SVR_IP,SVR_PORT,ZONE,SQL_PORT,CPU_CAPACITY,CPU_CAPACITY_MAX,CPU_ASSIGNED,CPU_ASSIGNED_MAX, concat(ROUND(MEM_CAPACITY/1024/1024/1024,0), 'G') as MEM_CAPACITY, concat(ROUND(MEM_ASSIGNED/1024/1024/1024,0), 'G') as MEM_ASSIGNED, concat(ROUND(LOG_DISK_CAPACITY/1024/1024/1024,0), 'G') as LOG_DISK_CAPACITY, concat(ROUND(LOG_DISK_ASSIGNED/1024/1024/1024,0), 'G') as LOG_DISK_ASSIGNED, concat(ROUND(LOG_DISK_IN_USE/1024/1024/1024,0), 'G') as LOG_DISK_IN_USE, concat(ROUND(DATA_DISK_CAPACITY/1024/1024/1024,0), 'G') as DATA_DISK_CAPACITY,concat(ROUND(DATA_DISK_IN_USE/1024/1024/1024,0), 'G') as DATA_DISK_IN_USE,concat(ROUND(MEMORY_LIMIT/1024/1024/1024,0), 'G') as MEMORY_LIMIT,concat(ROUND(DATA_DISK_ALLOCATED/1024/1024/1024,0), 'G') as DATA_DISK_ALLOCATED FROM oceanbase.GV$OB_SERVERS;" global: true - type: sql sql: "SELECT * FROM oceanbase.DBA_OB_UNIT_CONFIGS;" diff --git a/handler/meta/html_meta.py b/handler/meta/html_meta.py index 302de58c..6b6b0867 100644 --- a/handler/meta/html_meta.py +++ b/handler/meta/html_meta.py @@ -287,3 +287,198 @@ def rm_value(self, key): ''', ) + +html_dict.set_value( + "sql_review_html_head_template", + ''' + + + + + + SQL Review报告 + + + +

SQL Review报告

+ ''', +) + + +html_dict.set_value( + "analyze_sql_html_head_template", + ''' + + + + + + 租户SQL诊断报告 + + + + + +

SQL Diagnostic Result

+ ''', +) + +html_dict.set_value( + "html_footer_temple", + ''' + + + + ''', +) + +html_dict.set_value( + "html_script_templete", + ''' + + ''', +) diff --git a/handler/meta/sql_meta.py b/handler/meta/sql_meta.py index 7538e7cf..452c6de7 100644 --- a/handler/meta/sql_meta.py +++ b/handler/meta/sql_meta.py @@ -1003,3 +1003,432 @@ def rm_value(self, key): ##REPLACE_ORDER_BY##; ''', ) + +sql_dict.set_value( + "select_all_gv_database_view", + ''' + SELECT /*+ READ_CONSISTENCY(WEAK) QUERY_TIMEOUT(60000000) */ + tenant_id, + tenant_name, + database_id, + database_name, + `comment`, + in_recyclebin + FROM oceanbase.`gv$database` + ''', +) + +sql_dict.set_value( + "select_cdb_database", + ''' + SELECT/*+ QUERY_TIMEOUT(10000000) */ + con_id as tenant_id, + object_id as database_id, + object_name as database_name + FROM oceanbase.cdb_objects + where + con_id = ##REPLACE_CON_ID## + and OBJECT_TYPE = 'DATABASE' + ''', +) + +sql_dict.set_value( + "select_tenant_cdb_database", + ''' + SELECT/*+ QUERY_TIMEOUT(10000000) */ + con_id as tenant_id, + object_id as database_id, + object_name as database_name + FROM oceanbase.DBA_OBJECTS + where OBJECT_TYPE = 'DATABASE' + ''', +) + +sql_dict.set_value( + "select_tenant_gv_database_view", + ''' + SELECT /*+ READ_CONSISTENCY(WEAK) QUERY_TIMEOUT(60000000) */ + tenant_id, + tenant_name, + database_id, + database_name, + `comment`, + in_recyclebin + FROM oceanbase.`gv$database` + WHERE tenant_id = ##REPLACE_TENANT_ID## + ''', +) + +sql_dict.set_value( + "get_table_id", + ''' + select + table_id + from oceanbase.gv$table + where + tenant_id = ##REPLACE_TENANT_ID## + and database_id = ##REPLACE_DATABASE_ID## + and table_name = '##REPLACE_TABLE_NAME' + limit 1 + ''', +) + +sql_dict.set_value( + "get_table_id_for_ob4", + ''' + select + t3.table_id as table_id + from + (select + con_id, + owner, + table_name, + partitioned + from oceanbase.CDB_TABLES) + t1 + left join + (select + con_id, + owner, + object_name, + object_id + from oceanbase.CDB_OBJECTS + where object_type = 'database' + ) t2 ON t1.con_id =t2.con_id and t1.owner = t2.owner + left join + (select + con_id, + owner, + object_name, + object_id as table_id + from oceanbase.CDB_OBJECTS where object_type = 'table' + ) t3 ON t1.con_id = t3.con_id and t1.owner = t3.owner and t1.table_name = t3.object_name + where t1.con_id = ##REPLACE_CON_ID## and t2.object_id = ##REPLACE_OBJECT_ID## t1.table_name = ##REPLACE_TABLE_NAME## limit 1 + ''', +) + +sql_dict.set_value( + "get_table_index", + ''' +select + key_name as index_name, + group_concat(column_name order by seq_in_index separator ',') as column_name + from + oceanbase.__tenant_virtual_table_index + where + table_id = ##REPLACE_TABLE_ID## + group by key_name + ''', +) + +sql_dict.set_value( + "get_database_name", + ''' +select + database_name + from oceanbase.gv$database + where + tenant_id = ##REPLACE_TENANT_ID## + and database_id = ##REPLACE_DATABASE_ID## + limit 1 + ''', +) + +sql_dict.set_value( + "get_sql_audit_for_sql_review", + ''' +select /*+ READ_CONSISTENCY(WEAK) QUERY_TIMEOUT(120000000) */ + max(case when length(sql_id) > 0 then svr_ip else 0 end) as svrIp, + max(case when length(sql_id) > 0 then svr_port else 0 end) as svrPort, + max(case when length(sql_id) > 0 then request_id else 0 end) as requestId, + max(case when length(sql_id) > 0 then client_ip else 0 end) as clientIp, + max(case when length(sql_id) > 0 then tenant_name else 0 end) as tenantName, + max(case when length(sql_id) > 0 then tenant_id else 0 end) as tenantId, + max(case when length(sql_id) > 0 then db_name else 0 end) as dbName, + max(case when length(sql_id) > 0 then db_id else 0 end) as dbId, + max(case when length(sql_id) > 0 then query_sql else 0 end) as querySql, + max(case when length(sql_id) > 0 then plan_id else 0 end) as planId, + max(case when length(sql_id) > 0 then sql_id else '' end) as sqlId, + max(case when length(sql_id) > 0 then trace_id else '' end) as traceId, + min(request_time) as requestTime, + sum(case when length(sql_id) > 0 then return_rows else 0 end) as returnRows, + sum(case when length(sql_id) > 0 then affected_rows else 0 end) as affectedRows, + sum(partition_cnt) as partitionCount, + sum(case when length(sql_id) > 0 then ret_code else 0 end) as retCode, + sum(case event when 'system internal wait' then WAIT_TIME_MICRO else 0 end) as event0WaitTimeUs, + sum(case event when 'mysql response wait client' then WAIT_TIME_MICRO else 0 end) as event1WaitTimeUs, + sum(case event when 'sync rpc' then WAIT_TIME_MICRO else 0 end) as event2WaitTimeUs, + sum(case event when 'db file data read' then WAIT_TIME_MICRO else 0 end) as event3WaitTimeUs, + sum(total_wait_time_micro) as totalWaitTimeMicro, + sum(total_waits) as totalWaits, + sum(rpc_count) as rpcCount, + sum(case when length(sql_id) > 0 then plan_type else 0 end) as planType, + sum(case when length(sql_id) > 0 then is_inner_sql else 0 end) as isInnerSql, + sum(case when length(sql_id) > 0 then is_executor_rpc else 0 end) as isExecutorRpc, + sum(case when length(sql_id) > 0 then is_hit_plan else 0 end) as isHitPlan, + sum(case when length(sql_id) > 0 then elapsed_time else 0 end) as elapsedTime, + sum(execute_time)-sum(total_wait_time_micro)+sum(get_plan_time) as cpuTime, + sum(net_time) as netTime, + sum(net_wait_time) as netWaitTime, + sum(queue_time) as queueTime, + sum(decode_time) as decodeTime, + sum(get_plan_time) as getPlanTime, + sum(execute_time) as executeTime, + sum(application_wait_time) as applicationWaitTime, + sum(concurrency_wait_time) as concurrencyWaitTime, + sum(user_io_wait_time) as userIoWaitTime, + sum(schedule_time) as scheduleTime, + sum(row_cache_hit) as rowCacheHit, + sum(bloom_filter_cache_hit) as bloomFilterCacheHit, + sum(block_cache_hit) as blockCacheHit, + sum(block_index_cache_hit) as blockIndexCacheHit, + sum(disk_reads) as diskReads, + sum(case when length(sql_id) > 0 then retry_cnt else 0 end) as retryCount, + sum(case when length(sql_id) > 0 then table_scan else 0 end) as tableScan, + sum(case when length(sql_id) > 0 then consistency_level else 0 end) as consistencyLevel, + sum(memstore_read_row_count) as memstoreReadRowCount, + sum(ssstore_read_row_count) as ssstoreReadRowCount + from oceanbase.gv$sql_audit + where request_time >= ##REPLACE_REQUEST_FROM_TIME## + and request_time <= ##REPLACE_REQUEST_TO_TIME## + and length(sql_id) > 0 + and length(query_sql) > 0 + and length(db_name) > 0 + and query_sql not like 'show%' + and query_sql not like 'alter%' + and query_sql not like 'set%' + and query_sql not like 'commit%' + and query_sql not like 'roll%' + and query_sql not like 'begin%' + and query_sql not like 'end%' + and query_sql not like 'drop%' + group by trace_id + having elapsedTime >= ##REPLACE_ELAPSED_TIME## + and length(sqlId) > 0 + limit ##REPLACE_LIMIT## + ''', +) + +sql_dict.set_value( + "get_sql_audit_ob4_for_sql_review", + ''' +select /*+ READ_CONSISTENCY(WEAK) QUERY_TIMEOUT(120000000) */ + max(case when length(sql_id) > 0 then svr_ip else 0 end) as svrIp, + max(case when length(sql_id) > 0 then svr_port else 0 end) as svrPort, + max(case when length(sql_id) > 0 then request_id else 0 end) as requestId, + max(case when length(sql_id) > 0 then client_ip else 0 end) as clientIp, + max(case when length(sql_id) > 0 then tenant_name else 0 end) as tenantName, + max(case when length(sql_id) > 0 then tenant_id else 0 end) as tenantId, + max(case when length(sql_id) > 0 then db_name else 0 end) as dbName, + max(case when length(sql_id) > 0 then db_id else 0 end) as dbId, + max(case when length(sql_id) > 0 then query_sql else 0 end) as querySql, + max(case when length(sql_id) > 0 then plan_id else 0 end) as planId, + max(case when length(sql_id) > 0 then sql_id else '' end) as sqlId, + max(case when length(sql_id) > 0 then trace_id else '' end) as traceId, + min(request_time) as requestTime, + sum(case when length(sql_id) > 0 then return_rows else 0 end) as returnRows, + sum(case when length(sql_id) > 0 then affected_rows else 0 end) as affectedRows, + sum(partition_cnt) as partitionCount, + sum(case when length(sql_id) > 0 then ret_code else 0 end) as retCode, + sum(case event when 'system internal wait' then WAIT_TIME_MICRO else 0 end) as event0WaitTimeUs, + sum(case event when 'mysql response wait client' then WAIT_TIME_MICRO else 0 end) as event1WaitTimeUs, + sum(case event when 'sync rpc' then WAIT_TIME_MICRO else 0 end) as event2WaitTimeUs, + sum(case event when 'db file data read' then WAIT_TIME_MICRO else 0 end) as event3WaitTimeUs, + sum(total_wait_time_micro) as totalWaitTimeMicro, + sum(total_waits) as totalWaits, + sum(rpc_count) as rpcCount, + sum(case when length(sql_id) > 0 then plan_type else 0 end) as planType, + sum(case when length(sql_id) > 0 then is_inner_sql else 0 end) as isInnerSql, + sum(case when length(sql_id) > 0 then is_executor_rpc else 0 end) as isExecutorRpc, + sum(case when length(sql_id) > 0 then is_hit_plan else 0 end) as isHitPlan, + max(case when length(sql_id) > 0 then elapsed_time else 0 end) as elapsedTime, + sum(execute_time) - sum(total_wait_time_micro) + max(get_plan_time) as cpuTime, + sum(net_time) as netTime, + sum(net_wait_time) as netWaitTime, + sum(queue_time) as queueTime, + sum(decode_time) as decodeTime, + sum(get_plan_time) as getPlanTime, + sum(execute_time) as executeTime, + sum(application_wait_time) as applicationWaitTime, + sum(concurrency_wait_time) as concurrencyWaitTime, + sum(user_io_wait_time) as userIoWaitTime, + sum(schedule_time) as scheduleTime, + sum(row_cache_hit) as rowCacheHit, + sum(bloom_filter_cache_hit) as bloomFilterCacheHit, + sum(block_cache_hit) as blockCacheHit, + 0 as blockIndexCacheHit, + sum(disk_reads) as diskReads, + sum(case when length(sql_id) > 0 then retry_cnt else 0 end) as retryCount, + sum(case when length(sql_id) > 0 then table_scan else 0 end) as tableScan, + sum(case when length(sql_id) > 0 then consistency_level else 0 end) as consistencyLevel, + sum(memstore_read_row_count) as memstoreReadRowCount, + sum(ssstore_read_row_count) as ssstoreReadRowCount + from oceanbase.gv$ob_sql_audit + where tenant_name = '##REPLACE_TENANT_NAME##' + and request_time >= ##REPLACE_REQUEST_FROM_TIME## + and request_time <= ##REPLACE_REQUEST_TO_TIME## + and length(sql_id) > 0 + and length(query_sql) > 0 + and length(db_name) > 0 + and query_sql not like 'show%' + and query_sql not like 'alter%' + and query_sql not like 'set%' + and query_sql not like 'commit%' + and query_sql not like 'roll%' + and query_sql not like 'begin%' + and query_sql not like 'end%' + and query_sql not like 'drop%' + and query_sql not like 'commit%' + and query_sql not like 'select 1%' + group by trace_id + having elapsedTime >= ##REPLACE_ELAPSED_TIME## + and length(sqlId) > 0 + limit ##REPLACE_LIMIT## + ''', +) + +sql_dict.set_value( + "get_plan_explains", + ''' +select /*+ READ_CONSISTENCY(WEAK) */ + plan_depth as planDepth, + plan_line_id as planLineId, + operator, name as objectName + from + oceanbase.gv$plan_cache_plan_explain + where + tenant_id = ##REPLACE_TENANT_ID## and ip = '##REPLACE_SVR_IP##' and port = ##REPLACE_SVR_PORT## and plan_id = ##REPLACE_PLAN_ID## + ''', +) + +sql_dict.set_value( + "get_plan_explains_for_ob4", + ''' +select /*+ READ_CONSISTENCY(WEAK) */ + plan_depth as planDepth, + plan_line_id as planLineId, + operator, + name as objectName, + rows, + cost + from + oceanbase.gv$ob_plan_cache_plan_explain + where + tenant_id = ##REPLACE_TENANT_ID## and svr_ip = '##REPLACE_SVR_IP##' and svr_port = ##REPLACE_SVR_PORT## and + plan_id = ##REPLACE_PLAN_ID## + ''', +) + +sql_dict.set_value( + "get_tables", + ''' +select + table_name as tableName, + table_id as tableId + from oceanbase.gv$table + where database_name = '##REPLACE_DATABASE_NAME##' limit 1 + ''', +) + +sql_dict.set_value( + "get_tables_for_ob4", + ''' +select + table_schema databaseName, + table_name tableName + from information_schema.tables + where table_schema = '##REPLACE_DATABASE_NAME##' and table_type='BASE TABLE' limit 1 + ''', +) + +sql_dict.set_value( + "get_tenants", + ''' +select + tenant_name as tenantName, + tenant_id as tenantId + from oceanbase.gv$tenant + ''', +) + +sql_dict.set_value( + "get_colum_list_on_lower_version", + ''' +select /*+ READ_CONSISTENCY(weak),leading(a,b) use_hash(a,b) */ + b.data_type dataType, + a.column_id columnId, + b.column_name columnName , + max(a.num_distinct) ndvCount + FROM + oceanbase.__all_column_statistic a, + oceanbase.__all_column b + WHERE + a.tenant_id=b.tenant_id + and a.table_id=b.table_id + and a.column_id=b.column_id + and b.column_name not like '%__substr%' + and a.tenant_id=##REPLACE_TENANT_ID## and a.table_id=##REPLACE_TABLE_ID## + ''', +) + +sql_dict.set_value( + "get_colum_list", + ''' +select /*+ READ_CONSISTENCY(weak),leading(a,b) use_hash(a,b) */ + b.data_type dataType, + a.column_id columnId, + b.column_name columnName, + max(a.num_distinct) ndvCount + FROM + oceanbase.__all_virtual_column_statistic a, + oceanbase.__all_virtual_column b + WHERE + a.tenant_id=b.tenant_id + and a.table_id=b.table_id + and a.column_id=b.column_id + and b.column_name not like '%__substr%' + and a.tenant_id=##REPLACE_TENANT_ID## and a.table_id=##REPLACE_TABLE_ID## + GROUP BY b.column_name + ''', +) + +sql_dict.set_value( + "get_column_min_and_max_value_list_on_lower_version", + ''' +select /*+ READ_CONSISTENCY(weak),leading(a,b) use_hash(a,b) */ + column_id columnId, + des_hex_str(case min_value when '19070000FDFFFFFFFFFFFFFFFF01' then '0' else min_value end) `minValue`, + des_hex_str(case max_value when '19070000FEFFFFFFFFFFFFFFFF01' then '0' else max_value end) `maxValue` + from + oceanbase.__all_column_statistic + where tenant_id = ##REPLACE_TENANT_ID## and table_id = ##REPLACE_TABLE_ID## + ''', +) + +sql_dict.set_value( + "get_column_min_and_max_value_list", + ''' +select /*+ READ_CONSISTENCY(weak),leading(a,b) use_hash(a,b) */ + column_id columnId, + des_hex_str(case min_value when '19070000FDFFFFFFFFFFFFFFFF01' then '0' else min_value end) `minValue`, + des_hex_str(case max_value when '19070000FEFFFFFFFFFFFFFFFF01' then '0' else max_value end) `maxValue` + from + oceanbase.__all_virtual_column_statistic + where + tenant_id = ##REPLACE_TENANT_ID## and table_id = ##REPLACE_TABLE_ID## + ''', +) + +sql_dict.set_value( + "get_tenant_name_list", + ''' + select tenant_name from oceanbase.__all_tenant; + ''', +) + +sql_dict.set_value( + "get_tenant_name_list_for_v4", + ''' + SELECT tenant_name FROM oceanbase.DBA_OB_TENANTS where TENANT_TYPE != 'META'; + ''', +) diff --git a/init_obdiag_cmd.sh b/init_obdiag_cmd.sh index d784fac6..38247c22 100644 --- a/init_obdiag_cmd.sh +++ b/init_obdiag_cmd.sh @@ -18,7 +18,7 @@ _obdiag_completion() { fi ;; analyze) - type_list="log flt_trace" + type_list="log flt_trace sql sql_review" ;; rca) type_list="list run" diff --git a/requirements3.txt b/requirements3.txt index 386ea4c1..2b4c46b6 100644 --- a/requirements3.txt +++ b/requirements3.txt @@ -34,5 +34,7 @@ ruamel.yaml==0.17.4 progressbar==2.5 halo==0.0.31 inspect2==0.1.2 +sqlgpt-parser>=0.0.1a5 netifaces==0.11.0 -kubernetes==30.1.0 \ No newline at end of file +netifaces==0.11.0 +kubernetes==30.1.0 diff --git a/test/analyzer/log/test_tree.py b/test/analyzer/log/test_tree.py new file mode 100644 index 00000000..f26520a9 --- /dev/null +++ b/test/analyzer/log/test_tree.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2023/12/09 +@file: test_tree.py +@desc: +""" + +from handler.analyzer.log_parser.tree import Tree + +file_datas = [ + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "1", "name": "open1", "id": "1", "parent_id": "00000000-0000-0000-0000-000000000000", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166233214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open2", "id": "2", "parent_id": "00000000-0000-0000-0000-000000000000", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166233214}}, + { + 'host_ip': '192.168.1.1', + 'host_type': 'OBSERVER', + 'trace_data': {"trace_id": "x", "name": "open3", "id": "3", "parent_id": "1", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662117166233214, "logs": "test log", "tags": "just a test"}, + }, + { + 'host_ip': '192.168.1.1', + 'host_type': 'OBSERVER', + 'trace_data': { + "trace_id": "x", + "name": "open6", + "id": "6", + "parent_id": "3", + "is_follow": "false", + "start_ts": 1662107166232204, + "end_ts": 1662108166233214, + "logs": "null", + "tags": [{"sql_text": "select * from t where c1=1"}, {"hit_plan": "false"}, {"sql_id": "XXXXXXXXXXXXXXXXXXXXXX"}, {"database_id": 111111111}], + }, + }, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open7", "id": "7", "parent_id": "6", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166433214, "logs": [{"end_ts": 1662107166433214}]}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBPROXY', 'trace_data': {"trace_id": "x", "name": "open11", "id": "11", "parent_id": "1", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107167233214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBPROXY', 'trace_data': {"trace_id": "x", "name": "open22", "id": "22", "parent_id": "2", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107173233214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBPROXY', 'trace_data': {"trace_id": "x", "name": "open12", "id": "12", "parent_id": "1", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662117166233214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBPROXY', 'trace_data': {"trace_id": "x", "name": "open13", "id": "13", "parent_id": "1", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166233314}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open23", "id": "23", "parent_id": "2", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166233314}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open32", "id": "32", "parent_id": "11", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166235214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open33", "id": "33", "parent_id": "11", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166283214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open41", "id": "41", "parent_id": "12", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166293214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open55", "id": "55", "parent_id": "32", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107166291214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open56", "id": "56", "parent_id": "32", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107167233214}}, + {'host_ip': '192.168.1.1', 'host_type': 'OBSERVER', 'trace_data': {"trace_id": "x", "name": "open66", "id": "66", "parent_id": "41", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107266233214}}, + { + 'host_ip': '192.168.1.1', + 'host_type': 'OBSERVER', + 'trace_data': {"trace_id": "x", "name": "open67", "id": "67", "parent_id": "999999", "is_follow": "false", "start_ts": 1662107166232204, "end_ts": 1662107966233214, "logs": "test log", "tags": "just a test"}, + }, +] + + +def output(tree): + if not tree.nodes: + print("The analysis result is empty") + return + for line in tree.traverse(10, 5): + print(line) + + +if __name__ == '__main__': + tree = Tree() + tree.build(file_datas) + tree.traverse(5, 5) + output(tree) diff --git a/test/analyzer/sql/test_arithmetic_rule.py b/test/analyzer/sql/test_arithmetic_rule.py new file mode 100644 index 00000000..345fd5d1 --- /dev/null +++ b/test/analyzer/sql/test_arithmetic_rule.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_arithmetic_rule.py +@desc: +""" +import unittest +from handler.analyzer.sql.rules.review.arithmetic import ArithmeticRule +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.level import Level + + +class TestArithmeticRuleWithRealSQL(unittest.TestCase): + + def setUp(self): + self.rule = ArithmeticRule() + self.parser = parser + + def test_arithmetic_operation_detected(self): + # SQL语句包含算术运算 + sql_with_arithmetic = "SELECT * FROM table1 WHERE column1 + 1 > 2" + parsed_stmt = self.parser.parse(sql_with_arithmetic) + result = self.rule.match(parsed_stmt, None) + self.assertTrue(result) + + def test_no_arithmetic_operation(self): + # SQL语句不包含算术运算 + sql_no_arithmetic = "SELECT * FROM table1 WHERE column1 > 2" + parsed_stmt = self.parser.parse(sql_no_arithmetic) + result = self.rule.match(parsed_stmt, None) + self.assertFalse(result) + + def test_suggestion_for_arithmetic_operation(self): + sql_with_arithmetic = "SELECT * FROM table1 WHERE column1 + 1 > 2" + parsed_stmt = self.parser.parse(sql_with_arithmetic) + result = self.rule.suggestion(parsed_stmt, None) + self.assertEqual(result.level, Level.NOTICE) + + def test_suggestion_without_arithmetic_operation(self): + sql_no_arithmetic = "SELECT * FROM table1 WHERE column1 > 2" + parsed_stmt = self.parser.parse(sql_no_arithmetic) + result = self.rule.suggestion(parsed_stmt, None) + self.assertEqual(result.level, Level.OK) + + def test_complex_arithmetic_operation_detected(self): + # 复杂SQL包含算术运算,并且嵌套在子查询中 + sql_complex = """ + SELECT t1.id + FROM table1 t1 + JOIN ( + SELECT id, column1 - column2 + 1 AS derived_col + FROM table2 + WHERE column3 * 2 < 10 + ) t2 ON t1.id = t2.id + WHERE t2.derived_col > 5 + """ + parsed_stmt = self.parser.parse(sql_complex) + result = self.rule.match(parsed_stmt, None) + self.assertTrue(result, "Should detect arithmetic operation in complex SQL statement.") + + def test_complex_no_arithmetic_operation(self): + # 复杂SQL,无算术运算,包含JOIN和子查询 + sql_complex_no_arithmetic = """ + SELECT t1.id + FROM table1 t1 + JOIN ( + SELECT id, column1 + FROM table2 + WHERE column3 < 10 + ) t2 ON t1.id = t2.id + WHERE t2.column1 > 5 + """ + parsed_stmt = self.parser.parse(sql_complex_no_arithmetic) + result = self.rule.match(parsed_stmt, None) + self.assertFalse(result, "Should not detect arithmetic operation in complex SQL statement.") + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_full_scan_rule.py b/test/analyzer/sql/test_full_scan_rule.py new file mode 100644 index 00000000..ef44b4d0 --- /dev/null +++ b/test/analyzer/sql/test_full_scan_rule.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_full_scan_rule.py +@desc: +""" +import unittest +from handler.analyzer.sql.rules.review.full_scan import FullScanRule +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.level import Level + + +class TestFullScanRule(unittest.TestCase): + + def setUp(self): + self.rule = FullScanRule() + + def test_full_scan_with_negation_but_filtered(self): + # SQL查询示例,包含否定条件,预期是全表扫描 + sql_filtered_negation = "SELECT * FROM users WHERE NOT (id BETWEEN 1 AND 10)" + parsed_stmt = parser.parse(sql_filtered_negation) + print(parsed_stmt) + self.assertTrue(self.rule.match(parsed_stmt)) + + def test_full_scan_with_like_pattern_full(self): + # SQL查询示例,使用LIKE且模式为%,预期是全表扫描 + sql_like_full = "SELECT * FROM users WHERE username LIKE '%zhangsan'" + parsed_stmt = parser.parse(sql_like_full) + print(parsed_stmt) + self.assertTrue(self.rule.match(parsed_stmt)) + # suggestion = self.rule.suggestion(parsed_stmt) + # self.assertEqual(suggestion.level, Level.WARN) + + def test_not_in_doesnt_hide_full_scan(self): + # SQL查询示例,使用NOT IN,预期可能为全表扫描 + sql_not_in = "SELECT * FROM orders WHERE customerId NOT IN (SELECT customerId FROM active_customers)" + parsed_stmt = parser.parse(sql_not_in) + self.assertFalse(self.rule.match(parsed_stmt)) + + def test_optimized_not_conditions(self): + # SQL查询示例,使用NOT条件 + sql_optimized_not = "SELECT * FROM users WHERE age NOT BETWEEN 18 AND 25" + parsed_stmt = parser.parse(sql_optimized_not) + self.assertTrue(self.rule.match(parsed_stmt)) + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_is_null_rule.py b/test/analyzer/sql/test_is_null_rule.py new file mode 100644 index 00000000..a23627c5 --- /dev/null +++ b/test/analyzer/sql/test_is_null_rule.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_is_null_rule.py +@desc: +""" +import unittest +from handler.analyzer.sql.rules.review.is_null import IsNullRule +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.level import Level + + +class TestIsNullRule(unittest.TestCase): + + def setUp(self): + self.rule = IsNullRule() + + def test_improper_null_comparison(self): + # 测试不当的NULL值比较 + sqls = ["SELECT * FROM table1 WHERE column1 = NULL", "SELECT * FROM table1 WHERE column1 <> NULL", "SELECT * FROM table1 WHERE NULL = column1", "SELECT * FROM table1 WHERE NULL <> column1"] + + for sql in sqls: + parsed_stmt = parser.parse(sql) + self.assertTrue(self.rule.match(parsed_stmt), f"Expected to match for SQL: {sql}") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + def test_proper_null_check(self): + # 测试正确的NULL值检查 + proper_sqls = ["SELECT * FROM table1 WHERE column1 IS NULL", "SELECT * FROM table1 WHERE column1 IS NOT NULL"] + + for sql in proper_sqls: + parsed_stmt = parser.parse(sql) + self.assertFalse(self.rule.match(parsed_stmt), f"Should not match for SQL: {sql}") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_mixed_query(self): + # 混合了适当与不适当的NULL比较 + sql = "SELECT * FROM table1 WHERE column1 IS NULL OR column2 = NULL" + parsed_stmt = parser.parse(sql) + self.assertTrue(self.rule.match(parsed_stmt), "Expected to match due to improper NULL comparison") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_large_in_clause_rule.py b/test/analyzer/sql/test_large_in_clause_rule.py new file mode 100644 index 00000000..4eb05923 --- /dev/null +++ b/test/analyzer/sql/test_large_in_clause_rule.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_large_in_clause_rule.py +@desc: +""" + +import unittest +from handler.analyzer.sql.rules.review.large_in_clause import LargeInClauseAdjustedRule +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.level import Level + + +class TestLargeInClauseAdjustedRule(unittest.TestCase): + + def setUp(self): + self.rule = LargeInClauseAdjustedRule() + + def test_large_in_clause(self): + # 构建一个超过200个元素的IN子句的SQL语句 + large_in_clause_sql = "SELECT * FROM table1 WHERE id IN (" + ','.join(['?'] * 201) + ")" + parsed_stmt = parser.parse(large_in_clause_sql) + + self.assertTrue(self.rule.match(parsed_stmt), "Expected to match for SQL with over 200 IN elements") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + def test_small_in_clause(self): + # 构建一个少于200个元素的IN子句的SQL语句 + small_in_clause_sql = "SELECT * FROM table1 WHERE id IN (" + ','.join(['?'] * 199) + ")" + parsed_stmt = parser.parse(small_in_clause_sql) + + self.assertFalse(self.rule.match(parsed_stmt), "Should not match for SQL within the limit of 200 IN elements") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_no_in_clause(self): + # 构建一个不包含IN子句的SQL语句 + no_in_clause_sql = "SELECT * FROM table1 WHERE column = 'value'" + parsed_stmt = parser.parse(no_in_clause_sql) + + self.assertFalse(self.rule.match(parsed_stmt), "Should not match for SQL without an IN clause") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_multi_table_join_rule.py b/test/analyzer/sql/test_multi_table_join_rule.py new file mode 100644 index 00000000..d758620e --- /dev/null +++ b/test/analyzer/sql/test_multi_table_join_rule.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_multi_table_join_rule.py +@desc: +""" +import unittest +from handler.analyzer.sql.rules.review.multi_table_join import MultiTableJoinRule +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.level import Level + + +class TestMultiTableJoinRule(unittest.TestCase): + + def setUp(self): + self.rule = MultiTableJoinRule() + + def test_excessive_joins_detected(self): + # 假设这个SQL有超过5个JOIN + sql_with_excessive_joins = """ + SELECT * + FROM table1 + JOIN table2 ON table1.id = table2.table1_id + JOIN table3 ON table2.id = table3.table2_id + JOIN table4 ON table3.id = table4.table3_id + JOIN table5 ON table4.id = table5.table4_id + JOIN table6 ON table5.id = table6.table5_id + JOIN table7 ON table6.id = table7.table6_id + """ + parsed_stmt = parser.parse(sql_with_excessive_joins) + result = self.rule.match(parsed_stmt) + self.assertTrue(result, "Should detect excessive joins in SQL statement.") + + def test_no_excessive_joins(self): + # 正常SQL,少于等于5个JOIN + sql_no_excessive_joins = """ + SELECT * + FROM table1 + JOIN table2 ON table1.id = table2.table1_id + JOIN table3 ON table2.id = table3.table2_id + """ + parsed_stmt = parser.parse(sql_no_excessive_joins) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not detect excessive joins in SQL statement.") + + def test_complex_query_with_subqueries_no_excessive_joins(self): + # Complex query with subqueries but not exceeding join limit (e.g., 7 tables but only 4 joins) + sql_complex = """ + SELECT t1.*, t2.col + FROM table1 t1 + JOIN ( + SELECT t2.id, t3.col + FROM table2 t2 + JOIN table3 t3 ON t2.id = t3.table2_id + WHERE t3.col IN (SELECT col FROM table4 WHERE condition) + ) t2 ON t1.id = t2.id + JOIN table5 t5 ON t1.id = t5.table1_id + JOIN table6 t6 ON t5.id = t6.table5_id; + """ + parsed_stmt = parser.parse(sql_complex) + self.assertFalse(self.rule.match(parsed_stmt)) # Assuming subqueries don't increment join count + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_complex_query_with_excessive_joins_and_subqueries(self): + # Complex query exceeding join limit due to multiple explicit joins and possibly join in subqueries + sql_complex_excessive = """ + SELECT t1.*, t2.col + FROM table1 t1 + JOIN table2 t2 ON t1.id = t2.table1_id + JOIN table3 t3 ON t2.id = t3.table2_id + JOIN table4 t4 ON t3.id = t4.table3_id + JOIN table5 t5 ON t4.id = t5.table4_id + JOIN ( + SELECT t6.id, t7.col + FROM table6 t6 + JOIN table7 t7 ON t6.id = t7.table6_id + ) subquery ON t5.id = subquery.id; + """ + parsed_stmt = parser.parse(sql_complex_excessive) + self.assertTrue(self.rule.match(parsed_stmt)) + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_parse.py b/test/analyzer/sql/test_parse.py new file mode 100644 index 00000000..6889b566 --- /dev/null +++ b/test/analyzer/sql/test_parse.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_parse.py +@desc: +""" + +import unittest +from sqlgpt_parser.parser.oceanbase_parser import parser + + +class MyTestCase(unittest.TestCase): + def test_create_table(self): + sql = """ + CREATE TABLE tbl1 (c1 INT, c2 VARCHAR(50)) + """ + statment = "{'type': 'create_table', " "'table_name': 'tbl1', " "'element_list': [('c1', FieldType(), False), ('c2', FieldType(), False)]}" + result = parser.parse(sql) + self.assertEqual(str(result), statment) + self.assertEqual(result["type"], "create_table") + self.assertEqual(result["table_name"], "tbl1") + self.assertEqual(str(result["element_list"][0][1]), "INT") + + def test_create_table_1(self): + sql = """ +CREATE TABLE `ob_hist_sql_audit_stat_0` ( + `ob_cluster_id` bigint(20) NOT NULL COMMENT 'OB的集群Id', + `cluster_name` varchar(128) NOT NULL COMMENT 'OB的集群名称', + `ob_tenant_id` bigint(20) NOT NULL COMMENT 'OB的租户Id', + `ob_server_id` bigint(20) NOT NULL DEFAULT '0' COMMENT 'OB的服务Id', + `ob_db_id` bigint(20) NOT NULL DEFAULT '0' COMMENT 'OB的数据库Id', + `ob_user_id` bigint(20) NOT NULL DEFAULT '0' COMMENT 'OB的用户Id', + `sql_id` varchar(32) NOT NULL DEFAULT '0' COMMENT 'SQL_ID', + `begin_interval_time` bigint(20) NOT NULL COMMENT '统计指标的区间开始时间', + `end_interval_time` bigint(20) NOT NULL COMMENT '统计指标的区间结束时间', + `executions` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)执行次数', + `affected_rows` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)更新行数', + `return_rows` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)查询返回行数', + `partition_cnt` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)访问分区数', + `fail_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码不为0的发生次数', + `ret_code_4012_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码-4012(OB_TIMEOUT)的发生次数', + `ret_code_4013_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码-4013(OB_ALLOCATE_MEMORY_FAILED)的发生次数', + `ret_code_5001_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码-5001(OB_ERR_PARSE_SQL)的发生次数', + `ret_code_5024_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码-5024(OB_ERR_PRIMARY_KEY_DUPLICATE)的发生次数', + `ret_code_5167_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码-5167(OB_ERR_DATA_TOO_LONG)的发生次数', + `ret_code_5217_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码-5217(OB_ERR_BAD_FIELD_ERROR)的发生次数', + `ret_code_6002_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)结果码-6002(OB_TRANS_ROLLBACKED)的发生次数', + `last_fail_info` bigint(20) DEFAULT NULL COMMENT '最后一次错误信息,前46存储错误时间信息,后18位存储错误码信息', + `event_0_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)\"system internal wait\"等待事件的累计时间(微秒)(sum(case event when \"system internal wait\" then wait_time_micro else 0 end))', + `event_1_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)\"mysql response wait client\"等待事件的累计时间(微秒)(sum(case event when \"mysql response wait client\" then wait_time_micro else 0 end))', + `event_2_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)\"sync rpc\"等待事件的累计时间(微秒)(sum(case event when \"sync rpc\" then wait_time_micro else 0 end))', + `event_3_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)\"db file data read\"等待事件的累计时间(微秒)(sum(case event when \"db file data read\" then wait_time_micro else 0 end))', + `event_4_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)\"\"等待事件的累计时间(微秒)(sum(case event when \"\" then wait_time_micro else 0 end))', + `event_5_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)\"\"等待事件的累计时间(微秒)(sum(case event when \"\" then wait_time_micro else 0 end))', + `total_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)总等待时间(微秒)', + `total_waits` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)总等待次数', + `rpc_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)发送RPC次数', + `plan_type_local_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)本地执行计划次数(sum(case plan_type when 1 then 1 else 0 end))', + `plan_type_remote_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)远程执行计划次数(sum(case plan_type when 2 then 1 else 0 end))', + `plan_type_dist_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)分布式执行计划次数(sum(case plan_type when 3 then 1 else 0 end))', + `inner_sql_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)内部SQL次数(count is_inner_sql=1)', + `executor_rpc_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)执行RPC请求次数', + `miss_plan_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)未命中计划缓存的次数', + `elapsed_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)响应时间 —— 接受到请求到执行结束的总时间(微秒)', + `max_elapsed_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间忙内的最大值)最大响应时间', + `net_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)客户端请求传输到Observer的时间(微秒)', + `net_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)请求在Observer从网络进入队列的时间(微秒)s', + `queue_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)请求在队列中的等待时间(微秒)', + `decode_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)语法解析时间(微秒)', + `get_plan_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)生成计划的时间(微秒)', + `execute_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)执行计划的时间(微秒)', + `cpu_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)CPU时间(微秒)(execute_time+get_plan_time-total_wait_time_micro)', + `max_cpu_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的最大值)最大CPU时间(微秒)', + `application_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)所有Application类事件的总时间(微秒)', + `concurrency_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)所有Concurrency类事件的总时间(微秒)', + `user_io_wait_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)所有UserIO类事件的总时间(微秒)', + `schedule_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)所有Schedule类事件的总时间(微秒)', + `row_cache_hit` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)Row Cache命中次数', + `bloom_filter_cache_hit` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)Bloom Filter Cache命中次数', + `block_cache_hit` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)Block Cache命中次数', + `block_index_cache_hit` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)Block Index Cache命中次数', + `disk_reads` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)磁盘读次数', + `retry_cnt` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)重试次数', + `table_scan` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)表扫描次数', + `consistency_level_strong` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)强一致性级别次数(sum(case consistency_level when 3 then 1 else 0 end))', + `consistency_level_weak` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)弱一致性级别次数(sum(case consistency_level when 2 then 1 else 0 end))', + `memstore_read_row_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)Memstore读行数', + `ssstore_read_row_count` bigint(20) NOT NULL DEFAULT '0' COMMENT '(区间内的总计)Ssstore读行数', + `min_request_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '区间内最小的执行时间', + `max_request_time` bigint(20) NOT NULL DEFAULT '0' COMMENT '区间内最大的执行时间', + `sql_type` bigint(20) NOT NULL DEFAULT '-1' COMMENT 'SQL的类型,1:select, 2:select for update, 3:insert,4:update, 5: delete, 6: replace', + `max_affected_rows` bigint(20) NOT NULL DEFAULT '0' COMMENT '区间内最大的更新行数', + `max_return_rows` bigint(20) NOT NULL DEFAULT '0' COMMENT '区间内最大的查询返回行数', + `max_partition_cnt` bigint(20) NOT NULL DEFAULT '0' COMMENT '区间内最大的访问分区数', + `user_client_ip_of_max_affected_rows` varchar(32) NOT NULL DEFAULT '' COMMENT '区间内最大的影响行数对应的user_client_ip', + `user_client_ip_of_max_return_rows` varchar(32) NOT NULL DEFAULT '' COMMENT '区间内最大的返回行数对应的user_client_ip', + `user_client_ip_of_max_partition_cnt` varchar(32) NOT NULL DEFAULT '' COMMENT '区间内最大的分区数对应的user_client_ip', + `create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (`ob_cluster_id`, `cluster_name`, `ob_tenant_id`, `begin_interval_time`, `ob_server_id`, `ob_db_id`, `ob_user_id`, `sql_id`), + KEY `idx_ob_hist_sql_audit_stat_cluster_server_time` (`ob_cluster_id`, `cluster_name`, `ob_server_id`, `begin_interval_time`), + KEY `idx_sql_id_begin_interval_time` (`sql_id`, `begin_interval_time`) +) + """ + result = parser.parse(sql) + self.assertEqual(len(result["index_list"]), 3) + + def test_select(self): + sql = """ + SELECT * FROM T1 + """ + statment = '''{'line': 0, 'pos': 0, 'query_body': QuerySpecification(select=Select(distinct=False, select_items=[SingleColumn(expression=QualifiedNameReference(name=QualifiedName.of("*")))]), from_=Table(name=QualifiedName.of("T1"), for_update=False), order_by=[], limit=0, offset=0, for_update=False, nowait_or_wait=False), 'order_by': [], 'limit': 0, 'offset': 0}''' + result = parser.parse(sql) + self.assertEqual(str(result), statment) + + sql = """ + SELECT t1 FROM T1 + """ + statment = '''{'line': 0, 'pos': 0, 'query_body': QuerySpecification(select=Select(distinct=False, select_items=[SingleColumn(alias=[], expression=QualifiedNameReference(name=QualifiedName.of("t1")))]), from_=Table(name=QualifiedName.of("T1"), for_update=False), order_by=[], limit=0, offset=0, for_update=False, nowait_or_wait=False), 'order_by': [], 'limit': 0, 'offset': 0}''' + result = parser.parse(sql) + self.assertEqual(str(result), statment) + + sql = """ + SELECT t1 FROM T1 where t1 > 12 + """ + statment = '''{'line': 0, 'pos': 0, 'query_body': QuerySpecification(select=Select(distinct=False, select_items=[SingleColumn(alias=[], expression=QualifiedNameReference(name=QualifiedName.of("t1")))]), from_=Table(name=QualifiedName.of("T1"), for_update=False), where=ComparisonExpression(type='>', left=QualifiedNameReference(name=QualifiedName.of("t1")), right=LongLiteral(value=12)), order_by=[], limit=0, offset=0, for_update=False, nowait_or_wait=False), 'order_by': [], 'limit': 0, 'offset': 0}''' + result = parser.parse(sql) + print(result) + self.assertEqual(str(result), statment) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_sellect_all_rule.py b/test/analyzer/sql/test_sellect_all_rule.py new file mode 100644 index 00000000..2a3f1468 --- /dev/null +++ b/test/analyzer/sql/test_sellect_all_rule.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_sellect_all_rule.py +@desc: +""" +import unittest +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.review.select_all import SelectAllRule + + +class TestSelectAllCase(unittest.TestCase): + def test_select_all_rule_true(self): + statement = parser.parse("SELECT * FROM T1") + result_match = SelectAllRule().match(statement) + self.assertTrue(result_match) + result_suggestion = SelectAllRule().suggestion(statement) + print(result_suggestion) + # self.assertIsNotNone(result_suggestion) + + def test_select_all_rule_false(self): + statement = parser.parse("SELECT 1 FROM T1") + result_match = SelectAllRule().match(statement) + self.assertFalse(result_match) + result_suggestion = SelectAllRule().suggestion(statement) + self.assertIsNotNone(result_suggestion) + + def test_select_all_rule_false_1(self): + statement = parser.parse("SELECT count(*) FROM T1") + result_match = SelectAllRule().match(statement) + self.assertFalse(result_match) + result_suggestion = SelectAllRule().suggestion(statement) + self.assertIsNotNone(result_suggestion) + + def test_select_all_rule_true_1(self): + sql = ''' + SELECT * + FROM Employees e + JOIN Departments d ON e.DepartmentID = d.DepartmentID + LEFT JOIN ( + SELECT EmployeeID, ProjectID, COUNT(*) AS NumberOfProjects + FROM Projects_Employees_Pivot + GROUP BY EmployeeID, ProjectID + ) pe ON e.EmployeeID = pe.EmployeeID + WHERE d.DepartmentName = 'Sales' + ORDER BY e.EmployeeName + ''' + statement = parser.parse(sql) + result_match = SelectAllRule().match(statement) + self.assertTrue(result_match) + result_suggestion = SelectAllRule().suggestion(statement) + self.assertIsNotNone(result_suggestion) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_update_delete_multi_table_rule.py b/test/analyzer/sql/test_update_delete_multi_table_rule.py new file mode 100644 index 00000000..1e044492 --- /dev/null +++ b/test/analyzer/sql/test_update_delete_multi_table_rule.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_update_delete_multi_table_rule.py +@desc: +""" +import unittest +from handler.analyzer.sql.rules.review.update_delete_multi_table import UpdateDeleteMultiTableRule +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.level import Level + + +class TestUpdateDeleteMultiTableRule(unittest.TestCase): + + def setUp(self): + self.rule = UpdateDeleteMultiTableRule() + + def test_update_multi_table_detected(self): + # 假设这个SQL包含了多表更新 + sql_with_multi_table_update = """ + UPDATE table1 + INNER JOIN table2 ON table1.id = table2.table1_id + SET table1.column = 'new_value' + """ + parsed_stmt = parser.parse(sql_with_multi_table_update) + result = self.rule.match(parsed_stmt) + self.assertTrue(result, "Should detect multi-table UPDATE operation.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + def test_delete_multi_table_detected(self): + # 假设这个SQL包含了多表删除 + sql_with_multi_table_delete = """ + DELETE table1 + FROM table1 + INNER JOIN table2 ON table1.id = table2.table1_id + """ + parsed_stmt = parser.parse(sql_with_multi_table_delete) + result = self.rule.match(parsed_stmt) + self.assertTrue(result, "Should detect multi-table DELETE operation.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + def test_delete_with_subquery_and_join(self): + """测试包含子查询和联接的多表删除""" + complex_delete_sql = """ + DELETE table1 + FROM table1 + INNER JOIN ( + SELECT table1_id + FROM table2 + WHERE some_column = 'some_value' + GROUP BY table1_id + HAVING COUNT(*) > 1 + ) subquery ON table1.id = subquery.table1_id + """ + parsed_stmt = parser.parse(complex_delete_sql) + self.assertTrue(self.rule.match(parsed_stmt), "Should detect complex multi-table DELETE operation.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.WARN) + + def test_single_table_operation(self): + # 单表更新操作,应不触发警告 + sql_single_table_update = "UPDATE table1 SET column = 'value' WHERE id = 1" + parsed_stmt = parser.parse(sql_single_table_update) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not detect single-table UPDATE operation as an issue.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + # 单表删除操作,同样不应触发警告 + sql_single_table_delete = "DELETE FROM table1 WHERE id = 1" + parsed_stmt = parser.parse(sql_single_table_delete) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not detect single-table DELETE operation as an issue.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/sql/test_update_delete_without_where_or_true_condition_rule.py b/test/analyzer/sql/test_update_delete_without_where_or_true_condition_rule.py new file mode 100644 index 00000000..97c9c0d2 --- /dev/null +++ b/test/analyzer/sql/test_update_delete_without_where_or_true_condition_rule.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/06/05 +@file: test_update_delete_multi_table_rule.py +@desc: +""" +import unittest +from handler.analyzer.sql.rules.review.update_delete_without_where_or_true_condition import UpdateDeleteWithoutWhereOrTrueConditionRule +from sqlgpt_parser.parser.oceanbase_parser import parser +from handler.analyzer.sql.rules.level import Level + + +class TestUpdateDeleteWithoutWhereConditionRule(unittest.TestCase): + + def setUp(self): + self.rule = UpdateDeleteWithoutWhereOrTrueConditionRule() + + def test_update_without_where(self): + sql_without_where_update = "UPDATE table1 SET column1 = 'new_value'" + parsed_stmt = parser.parse(sql_without_where_update) + result = self.rule.match(parsed_stmt) + self.assertTrue(result, "Should detect UPDATE without WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.CRITICAL) + + def test_delete_without_where(self): + sql_without_where_delete = "DELETE FROM table1" + parsed_stmt = parser.parse(sql_without_where_delete) + result = self.rule.match(parsed_stmt) + self.assertTrue(result, "Should detect DELETE without WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.CRITICAL) + + def test_update_with_always_true_where(self): + sql_always_true_update = "UPDATE table1 SET column1 = 'new_value' WHERE 1 = 1" + parsed_stmt = parser.parse(sql_always_true_update) + result = self.rule.match(parsed_stmt) + self.assertTrue(result, "Should detect UPDATE with always-true WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.CRITICAL) + + def test_delete_with_always_true_where(self): + sql_always_true_delete = "DELETE FROM table1 WHERE 1 = 1" + parsed_stmt = parser.parse(sql_always_true_delete) + result = self.rule.match(parsed_stmt) + self.assertTrue(result, "Should detect DELETE with always-true WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.CRITICAL) + + def test_valid_update_with_where(self): + sql_valid_update = "UPDATE table1 SET column1 = 'new_value' WHERE id = 1" + parsed_stmt = parser.parse(sql_valid_update) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not detect a valid UPDATE with WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_valid_delete_with_where(self): + sql_valid_delete = "DELETE FROM table1 WHERE id = 1" + parsed_stmt = parser.parse(sql_valid_delete) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not detect a valid DELETE with WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_update_with_nested_subquery(self): + # 更新语句中使用了嵌套子查询,但依然有有效的WHERE条件 + sql_nested_update = """ + UPDATE table1 + SET column = (SELECT MAX(sub_col) FROM table2 WHERE table2.id = table1.id) + WHERE EXISTS(SELECT 1 FROM table3 WHERE table3.table1_id = table1.id) + """ + parsed_stmt = parser.parse(sql_nested_update) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not flag an UPDATE with a nested subquery and a valid WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_delete_with_function_in_where(self): + # 删除语句中WHERE子句使用了函数,但不是恒真条件 + sql_function_delete = "DELETE FROM table1 WHERE DATE(column) = CURDATE()" + parsed_stmt = parser.parse(sql_function_delete) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not flag a DELETE with a function in WHERE clause that's not always true.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_complex_delete_with_multi_level_joins(self): + # 复杂的多层JOIN删除,但有合理的WHERE条件限制 + sql_complex_delete = """ + DELETE t1 + FROM table1 t1 + JOIN table2 t2 ON t1.id = t2.t1_id + JOIN table3 t3 ON t2.id = t3.t2_id + WHERE t3.status = 'active' + """ + parsed_stmt = parser.parse(sql_complex_delete) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not flag a DELETE with multi-level JOINs and a specific WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_update_with_case_expression(self): + # UPDATE语句使用CASE表达式设置列值,同时有WHERE条件 + sql_case_update = """ + UPDATE table1 + SET column = CASE WHEN column2 = 'value' THEN 'new_val' ELSE column END + WHERE column3 IS NOT NULL + """ + parsed_stmt = parser.parse(sql_case_update) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not flag an UPDATE using CASE expression and a WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_delete_with_false_condition(self): + # DELETE语句有一个永远为假的WHERE条件 + sql_false_delete = "DELETE FROM table1 WHERE 1 = 0" + parsed_stmt = parser.parse(sql_false_delete) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should flag a DELETE with a never-true WHERE clause.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + def test_update_with_multiple_conditions(self): + # UPDATE语句带有多个AND/OR连接的条件 + sql_multiple_conditions_update = """ + UPDATE table1 + SET column = 'new_value' + WHERE column1 = 'value1' AND column2 = 'value2' OR column3 IN ('value3', 'value4') + """ + parsed_stmt = parser.parse(sql_multiple_conditions_update) + result = self.rule.match(parsed_stmt) + self.assertFalse(result, "Should not flag an UPDATE with multiple, combined WHERE conditions.") + suggestion = self.rule.suggestion(parsed_stmt) + self.assertEqual(suggestion.level, Level.OK) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/analyzer/test_level.py b/test/analyzer/test_level.py new file mode 100644 index 00000000..e0c696f9 --- /dev/null +++ b/test/analyzer/test_level.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -* +# Copyright (c) 2022 OceanBase +# OceanBase Diagnostic Tool is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. + +""" +@time: 2024/07/02 +@file: test_level.py +@desc: +""" +import unittest +from handler.analyzer.sql.rules.level import Level + + +class TestLevelEnum(unittest.TestCase): + + def test_enum_creation_and_access(self): + self.assertEqual(Level.OK.name, 'OK') + self.assertEqual(Level.OK.value, (1, 'ok')) + self.assertEqual(Level.CRITICAL.string, 'critical') + + def test_comparison_operators(self): + self.assertTrue(Level.OK < Level.NOTICE) + self.assertTrue(Level.NOTICE <= Level.NOTICE) + self.assertFalse(Level.WARN <= Level.OK) + self.assertTrue(Level.CRITICAL > Level.WARN) + self.assertTrue(Level.CRITICAL >= Level.CRITICAL) + + def test_from_string(self): + self.assertEqual(Level.from_string('ok'), Level.OK) + self.assertEqual(Level.from_string('warn'), Level.WARN) + + with self.assertRaises(ValueError) as context: + Level.from_string('error') + self.assertEqual(str(context.exception), "No such level: error") + + def test_invalid_string(self): + with self.assertRaises(ValueError) as context: + Level.from_string('unknown') + self.assertEqual(str(context.exception), "No such level: unknown") + + +if __name__ == '__main__': + unittest.main()