Skip to content

Commit

Permalink
merge bitcoin#24932: Convert lint-locale-dependence.sh to Python
Browse files Browse the repository at this point in the history
  • Loading branch information
kwvg committed Nov 24, 2024
1 parent 2e81b3c commit 544bc0d
Show file tree
Hide file tree
Showing 2 changed files with 264 additions and 246 deletions.
264 changes: 264 additions & 0 deletions test/lint/lint-locale-dependence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
#!/usr/bin/env python3
# Copyright (c) 2018-2022 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
#
# Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
# opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
# whereas no such call is made in bitcoind.
#
# Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
# specified by the user's LC_ALL (or LC_*) environment variable as the new
# C locale.
#
# In contrast, bitcoind does not opt in to localization -- no call to
# setlocale(LC_ALL, "") is made and the environment variables LC_* are
# thus ignored.
#
# This results in situations where bitcoind is guaranteed to be running
# with the classic locale ("C") whereas the locale of bitcoin-qt will vary
# depending on the user's environment variables.
#
# An example: Assuming the environment variable LC_ALL=de_DE then the
# call std::to_string(1.23) will return "1.230000" in bitcoind but
# "1,230000" in bitcoin-qt.
#
# From the Qt documentation:
# "On Unix/Linux Qt is configured to use the system locale settings by default.
# This can cause a conflict when using POSIX functions, for instance, when
# converting between data types such as floats and strings, since the notation
# may differ between locales. To get around this problem, call the POSIX function
# setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
# or QCoreApplication to reset the locale that is used for number formatting to
# "C"-locale."
#
# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
# https://stackoverflow.com/a/34878283 for more details.
#
# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent stoul/strtol with locale
# independent ToIntegral<T>(...).
# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent snprintf with strprintf.

import re
import sys

from subprocess import check_output, CalledProcessError


KNOWN_VIOLATIONS = [
"src/bitcoin-tx.cpp.*stoul",
"src/dbwrapper.cpp:.*vsnprintf",
"src/test/dbwrapper_tests.cpp:.*snprintf",
"src/test/fuzz/locale.cpp",
"src/test/fuzz/string.cpp",
"src/util/strencodings.cpp:.*strtoll",
"src/util/system.cpp:.*fprintf"
]

REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [
"src/crypto/ctaes/",
"src/leveldb/",
"src/secp256k1/",
"src/minisketch/",
"src/tinyformat.h",
"src/univalue/",
"src/dashbls/",
"src/immer/"
]

LOCALE_DEPENDENT_FUNCTIONS = [
"alphasort", # LC_COLLATE (via strcoll)
"asctime", # LC_TIME (directly)
"asprintf", # (via vasprintf)
"atof", # LC_NUMERIC (via strtod)
"atoi", # LC_NUMERIC (via strtol)
"atol", # LC_NUMERIC (via strtol)
"atoll", # (via strtoll)
"atoq",
"btowc", # LC_CTYPE (directly)
"ctime", # (via asctime or localtime)
"dprintf", # (via vdprintf)
"fgetwc",
"fgetws",
"fold_case", # boost::locale::fold_case
"fprintf", # (via vfprintf)
"fputwc",
"fputws",
"fscanf", # (via __vfscanf)
"fwprintf", # (via __vfwprintf)
"getdate", # via __getdate_r => isspace // __localtime_r
"getwc",
"getwchar",
"is_digit", # boost::algorithm::is_digit
"is_space", # boost::algorithm::is_space
"isalnum", # LC_CTYPE
"isalpha", # LC_CTYPE
"isblank", # LC_CTYPE
"iscntrl", # LC_CTYPE
"isctype", # LC_CTYPE
"isdigit", # LC_CTYPE
"isgraph", # LC_CTYPE
"islower", # LC_CTYPE
"isprint", # LC_CTYPE
"ispunct", # LC_CTYPE
"isspace", # LC_CTYPE
"isupper", # LC_CTYPE
"iswalnum", # LC_CTYPE
"iswalpha", # LC_CTYPE
"iswblank", # LC_CTYPE
"iswcntrl", # LC_CTYPE
"iswctype", # LC_CTYPE
"iswdigit", # LC_CTYPE
"iswgraph", # LC_CTYPE
"iswlower", # LC_CTYPE
"iswprint", # LC_CTYPE
"iswpunct", # LC_CTYPE
"iswspace", # LC_CTYPE
"iswupper", # LC_CTYPE
"iswxdigit", # LC_CTYPE
"isxdigit", # LC_CTYPE
"localeconv", # LC_NUMERIC + LC_MONETARY
"mblen", # LC_CTYPE
"mbrlen",
"mbrtowc",
"mbsinit",
"mbsnrtowcs",
"mbsrtowcs",
"mbstowcs", # LC_CTYPE
"mbtowc", # LC_CTYPE
"mktime",
"normalize", # boost::locale::normalize
"printf", # LC_NUMERIC
"putwc",
"putwchar",
"scanf", # LC_NUMERIC
"setlocale",
"snprintf",
"sprintf",
"sscanf",
"std::locale::global",
"std::to_string",
"stod",
"stof",
"stoi",
"stol",
"stold",
"stoll",
"stoul",
"stoull",
"strcasecmp",
"strcasestr",
"strcoll", # LC_COLLATE
#"strerror",
"strfmon",
"strftime", # LC_TIME
"strncasecmp",
"strptime",
"strtod", # LC_NUMERIC
"strtof",
"strtoimax",
"strtol", # LC_NUMERIC
"strtold",
"strtoll",
"strtoq",
"strtoul", # LC_NUMERIC
"strtoull",
"strtoumax",
"strtouq",
"strxfrm", # LC_COLLATE
"swprintf",
"to_lower", # boost::locale::to_lower
"to_title", # boost::locale::to_title
"to_upper", # boost::locale::to_upper
"tolower", # LC_CTYPE
"toupper", # LC_CTYPE
"towctrans",
"towlower", # LC_CTYPE
"towupper", # LC_CTYPE
"trim", # boost::algorithm::trim
"trim_left", # boost::algorithm::trim_left
"trim_right", # boost::algorithm::trim_right
"ungetwc",
"vasprintf",
"vdprintf",
"versionsort",
"vfprintf",
"vfscanf",
"vfwprintf",
"vprintf",
"vscanf",
"vsnprintf",
"vsprintf",
"vsscanf",
"vswprintf",
"vwprintf",
"wcrtomb",
"wcscasecmp",
"wcscoll", # LC_COLLATE
"wcsftime", # LC_TIME
"wcsncasecmp",
"wcsnrtombs",
"wcsrtombs",
"wcstod", # LC_NUMERIC
"wcstof",
"wcstoimax",
"wcstol", # LC_NUMERIC
"wcstold",
"wcstoll",
"wcstombs", # LC_CTYPE
"wcstoul", # LC_NUMERIC
"wcstoull",
"wcstoumax",
"wcswidth",
"wcsxfrm", # LC_COLLATE
"wctob",
"wctomb", # LC_CTYPE
"wctrans",
"wctype",
"wcwidth",
"wprintf"
]


def find_locale_dependent_function_uses():
regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS)
exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS]
git_grep_command = ["git", "grep", "-E", "[^a-zA-Z0-9_\\`'\"<>](" + regexp_locale_dependent_functions + "(_r|_s)?)[^a-zA-Z0-9_\\`'\"<>]", "--", "*.cpp", "*.h"] + exclude_args
git_grep_output = list()

try:
git_grep_output = check_output(git_grep_command, universal_newlines=True, encoding="utf8").splitlines()
except CalledProcessError as e:
if e.returncode > 1:
raise e

return git_grep_output


def main():
exit_code = 0

regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS)
git_grep_output = find_locale_dependent_function_uses()

for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS:
matches = [line for line in git_grep_output
if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", line)
and not re.search("\\.(c|cpp|h):\\s*(//|\\*|/\\*|\").*" + locale_dependent_function, line)
and not re.search(regexp_ignore_known_violations, line)]
if matches:
print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:")
for match in matches:
print(match)
print("")
exit_code = 1

if exit_code == 1:
print("Unnecessary locale depedence can cause bugs that are very tricky to isolate and fix. Please avoid using locale dependent functions if possible.\n")
print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}")

sys.exit(exit_code)


if __name__ == "__main__":
main()
Loading

0 comments on commit 544bc0d

Please sign in to comment.