Chocobo1
3 years ago
committed by
GitHub
11 changed files with 170 additions and 135 deletions
@ -0,0 +1,68 @@ |
|||||||
|
#!/usr/bin/env python3 |
||||||
|
|
||||||
|
# A pre-commit hook for detecting problematic <translation> tags |
||||||
|
# Copyright (C) 2021 Mike Tzou (Chocobo1) |
||||||
|
# |
||||||
|
# This program is free software; you can redistribute it and/or |
||||||
|
# modify it under the terms of the GNU General Public License |
||||||
|
# as published by the Free Software Foundation; either version 2 |
||||||
|
# of the License, or (at your option) any later version. |
||||||
|
# |
||||||
|
# This program is distributed in the hope that it will be useful, |
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
|
# GNU General Public License for more details. |
||||||
|
# |
||||||
|
# You should have received a copy of the GNU General Public License |
||||||
|
# along with this program; if not, write to the Free Software |
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
||||||
|
# |
||||||
|
# In addition, as a special exception, the copyright holders give permission to |
||||||
|
# link this program with the OpenSSL project's "OpenSSL" library (or with |
||||||
|
# modified versions of it that use the same license as the "OpenSSL" library), |
||||||
|
# and distribute the linked executables. You must obey the GNU General Public |
||||||
|
# License in all respects for all of the code used other than "OpenSSL". If you |
||||||
|
# modify file(s), you may extend this exception to your version of the file(s), |
||||||
|
# but you are not obligated to do so. If you do not wish to do so, delete this |
||||||
|
# exception statement from your version. |
||||||
|
|
||||||
|
from typing import Optional, Sequence |
||||||
|
import argparse |
||||||
|
import re |
||||||
|
|
||||||
|
def main(argv: Optional[Sequence[str]] = None) -> int: |
||||||
|
parser = argparse.ArgumentParser() |
||||||
|
parser.add_argument('filenames', nargs='*', help='Filenames to check') |
||||||
|
args = parser.parse_args(argv) |
||||||
|
|
||||||
|
error_msg = "" |
||||||
|
regex = re.compile(r"\s*</translation>") |
||||||
|
|
||||||
|
for filename in args.filenames: |
||||||
|
line_counter = 1 |
||||||
|
error_buffer = "" |
||||||
|
|
||||||
|
with open(filename) as file: |
||||||
|
try: |
||||||
|
for line in file: |
||||||
|
if (match := regex.match(line)) is not None: |
||||||
|
error_buffer += str(f"Defect file: \"{filename}\"\n" |
||||||
|
f"Line: {line_counter}\n" |
||||||
|
f"Column span: {match.span()}\n" |
||||||
|
f"Part: \"{match.group()}\"\n\n") |
||||||
|
line_counter += 1 |
||||||
|
|
||||||
|
except UnicodeDecodeError as error: |
||||||
|
# not a text file, skip |
||||||
|
continue |
||||||
|
|
||||||
|
error_msg += error_buffer |
||||||
|
|
||||||
|
if len(error_msg) > 0: |
||||||
|
print(error_msg) |
||||||
|
return 1 |
||||||
|
|
||||||
|
return 0 |
||||||
|
|
||||||
|
if __name__ == '__main__': |
||||||
|
exit(main()) |
@ -1,85 +0,0 @@ |
|||||||
#!/usr/bin/env zsh |
|
||||||
|
|
||||||
set -o nounset |
|
||||||
|
|
||||||
# Assumption: file names don't contain `:` (for the `cut` invocation). |
|
||||||
# Safe to assume, as such a character in a filename would cause trouble on Windows, a platform we support |
|
||||||
|
|
||||||
# any regression turn this non-zero |
|
||||||
regressions=0 |
|
||||||
|
|
||||||
# exclusions (these are just grep extended regular expressions to match against paths relative to the root of the repository) |
|
||||||
exclusions_nonutf8='(.*(7z|gif|ic(ns|o)|png|qm|zip))' |
|
||||||
exclusions_bom='src/base/unicodestrings.h' |
|
||||||
exclusions_tw='(*.ts)|src/webui/www/private/scripts/lib/*' |
|
||||||
exclusions_trailing_newline='configure' |
|
||||||
exclusions_no_lf='(*.ts)|(.*svg)|compile_commands.json|src/webui/www/private/scripts/lib/*' |
|
||||||
|
|
||||||
echo -e "\n*** Detect files not encoded in UTF-8 ***\n" |
|
||||||
|
|
||||||
find . -path ./build -prune -false -o -path ./.git -prune -false -o -type f -exec file --mime {} \; | sort \ |
|
||||||
| grep -v -e "charset=us-ascii" -e "charset=utf-8" | cut -d ":" -f 1 \ |
|
||||||
| grep -E -v -e "${exclusions_nonutf8}" \ |
|
||||||
| tee >(echo -e "--> Files not encoded in UTF-8: found" "$(wc -l < /dev/stdin)" "regression(s)\n") \ |
|
||||||
| xargs -I my_input -0 bash -c 'echo "my_input"; test "$(echo -n "my_input" | wc -l)" -eq 0' |
|
||||||
regressions=$((regressions+$?)) |
|
||||||
|
|
||||||
echo -e "\n*** Detect files encoded in UTF-8 with BOM ***\n" |
|
||||||
|
|
||||||
grep --exclude-dir={.git,build} -rIl $'\xEF\xBB\xBF' | sort \ |
|
||||||
| grep -E -v -e "${exclusions_bom}" \ |
|
||||||
| tee >(echo -e "--> Files encoded in UTF-8 with BOM: found" "$(wc -l < /dev/stdin)" "regression(s)\n") \ |
|
||||||
| xargs -I my_input -0 bash -c 'echo "my_input"; test "$(echo -n "my_input" | wc -l)" -eq 0' |
|
||||||
regressions=$((regressions+$?)) |
|
||||||
|
|
||||||
echo -e "\n*** Detect usage of CR byte ***\n" |
|
||||||
|
|
||||||
grep --exclude-dir={.git,build} -rIlU $'\x0D' | sort \ |
|
||||||
| tee >(echo -e "--> Usage of CR byte: found" "$(wc -l < /dev/stdin)" "regression(s)\n") \ |
|
||||||
| xargs -I my_input -0 bash -c 'echo "my_input"; test "$(echo -n "my_input" | wc -l)" -eq 0' |
|
||||||
regressions=$((regressions+$?)) |
|
||||||
|
|
||||||
echo -e "\n*** Detect trailing whitespace in lines ***\n" |
|
||||||
|
|
||||||
grep --exclude-dir={.git,build} -rIl "[[:blank:]]$" | sort \ |
|
||||||
| grep -E -v -e "${exclusions_tw}" \ |
|
||||||
| tee >(echo -e "--> Trailing whitespace in lines: found" "$(wc -l < /dev/stdin)" "regression(s)\n") \ |
|
||||||
| xargs -I my_input -0 bash -c 'echo "my_input"; test "$(echo -n "my_input" | wc -l)" -eq 0'; |
|
||||||
regressions=$((regressions+$?)) |
|
||||||
|
|
||||||
echo -e "\n*** Detect too many trailing newlines ***\n" |
|
||||||
|
|
||||||
find . -path ./build -prune -false -o -path ./.git -prune -false -o -type f -exec file --mime {} \; | sort \ |
|
||||||
| grep -e "charset=us-ascii" -e "charset=utf-8" | cut -d ":" -f 1 \ |
|
||||||
| grep -E -v -e "${exclusions_trailing_newline}" \ |
|
||||||
| xargs -L1 -I my_input bash -c 'test "$(tail -q -c2 "my_input" | hexdump -C | grep "0a 0a")" && echo "my_input"' \ |
|
||||||
| tee >(echo -e "--> Too many trailing newlines: found" "$(wc -l < /dev/stdin)" "regression(s)\n") \ |
|
||||||
| xargs -I my_input -0 bash -c 'echo "my_input"; test "$(echo -n "my_input" | wc -l)" -eq 0' |
|
||||||
regressions=$((regressions+$?)) |
|
||||||
|
|
||||||
echo -e "\n*** Detect no trailing newline ***\n" |
|
||||||
|
|
||||||
find . -path ./build -prune -false -o -path ./.git -prune -false -o -type f -exec file --mime {} \; | sort \ |
|
||||||
| grep -e "charset=us-ascii" -e "charset=utf-8" | cut -d ":" -f 1 \ |
|
||||||
| grep -E -v -e "${exclusions_no_lf}" \ |
|
||||||
| xargs -L1 -I my_input bash -c 'test "$(tail -q -c1 "my_input" | hexdump -C | grep "0a")" || echo "my_input"' \ |
|
||||||
| tee >(echo -e "--> No trailing newline: found" "$(wc -l < /dev/stdin)" "regression(s)\n") \ |
|
||||||
| xargs -I my_input -0 bash -c 'echo "my_input"; test "$(echo -n "my_input" | wc -l)" -eq 0' |
|
||||||
regressions=$((regressions+$?)) |
|
||||||
|
|
||||||
echo -e "\n*** Detect translation closing tag in new line ***\n" |
|
||||||
|
|
||||||
grep --exclude-dir={.git,build} -nri "^</translation>" | sort \ |
|
||||||
| cut -d ":" -f 1,2 \ |
|
||||||
| tee >(echo -e "--> Translation closing tag in new line: found" "$(wc -l < /dev/stdin)" "regression(s)\n") \ |
|
||||||
| xargs -I my_input -0 bash -c 'echo "my_input"; test "$(echo -n "my_input" | wc -l)" -eq 0' |
|
||||||
regressions=$((regressions+$?)) |
|
||||||
|
|
||||||
if [ "$regressions" -ne 0 ]; then |
|
||||||
regressions=1 |
|
||||||
echo "\nFile health regressions found. Please fix them (or add them as exclusions)." |
|
||||||
else |
|
||||||
echo "All OK, no file health regressions found." |
|
||||||
fi |
|
||||||
|
|
||||||
exit $regressions; |
|
@ -0,0 +1,55 @@ |
|||||||
|
repos: |
||||||
|
- repo: local |
||||||
|
hooks: |
||||||
|
- id: check-translation-tag |
||||||
|
name: Check newline characters in <translation> tag |
||||||
|
entry: .github/workflows/check_translation_tag.py |
||||||
|
language: script |
||||||
|
types_or: |
||||||
|
- ts |
||||||
|
|
||||||
|
- repo: https://github.com/pre-commit/pre-commit-hooks.git |
||||||
|
rev: v4.0.1 |
||||||
|
hooks: |
||||||
|
- id: check-json |
||||||
|
name: Check JSON files |
||||||
|
|
||||||
|
- id: check-yaml |
||||||
|
name: Check YAML files |
||||||
|
|
||||||
|
- id: fix-byte-order-marker |
||||||
|
name: Check file encoding (UTF-8 without BOM) |
||||||
|
exclude: | |
||||||
|
(?x)^( |
||||||
|
src/base/unicodestrings.h |
||||||
|
)$ |
||||||
|
|
||||||
|
- id: mixed-line-ending |
||||||
|
name: Check line ending character (LF) |
||||||
|
args: ["--fix=lf"] |
||||||
|
exclude: | |
||||||
|
(?x)^( |
||||||
|
compile_commands.json | |
||||||
|
src/webui/www/private/scripts/lib/.* |
||||||
|
)$ |
||||||
|
|
||||||
|
- id: end-of-file-fixer |
||||||
|
name: Check trailing newlines |
||||||
|
exclude: | |
||||||
|
(?x)^( |
||||||
|
compile_commands.json | |
||||||
|
configure | |
||||||
|
src/webui/www/private/scripts/lib/.* |
||||||
|
)$ |
||||||
|
exclude_types: |
||||||
|
- svg |
||||||
|
- ts |
||||||
|
|
||||||
|
- id: trailing-whitespace |
||||||
|
name: Check trailing whitespaces |
||||||
|
exclude: | |
||||||
|
(?x)^( |
||||||
|
src/webui/www/private/scripts/lib/.* |
||||||
|
)$ |
||||||
|
exclude_types: |
||||||
|
- ts |
Loading…
Reference in new issue