[doc] lgspell: allow spell checking arbitrary text

This commit is contained in:
Quantum 2021-09-01 19:48:38 -04:00 committed by Geoffrey McRae
parent ce091fd4e4
commit 909a9a903f

32
doc/lgspell.py Normal file → Executable file
View File

@ -1,14 +1,21 @@
#!/usr/bin/env python3
import re
from enchant.tokenize import Filter
reacronym = re.compile(r'^[A-Z]+s?$')
reoption = re.compile(r'^[a-z]+:\w+$')
recamel = re.compile(r'^[a-z]+[A-Z]\w+$')
recamel = re.compile(r'^[A-Za-z]+[A-Z]\w+$')
repackage = re.compile(r'^[\w-]+-(?:dev|bin)$|^fonts-[\w-]+-ttf$|^virt-manager$')
repath = re.compile(r'^/dev/')
repath = re.compile(r'^/dev/|.*\.\w+$')
recrypto = re.compile(r'^[13][A-Za-z0-9]{25,34}$|^0x[0-9a-fA-F]{40}$')
class AcronymFilter(Filter):
def _skip(self, word):
return reacronym.match(word)
class OptionFilter(Filter):
def _skip(self, word):
return reoption.match(word) or recamel.match(word)
@ -27,3 +34,24 @@ class PathFilter(Filter):
class CryptoAddressFilter(Filter):
def _skip(self, word):
return recrypto.match(word)
if __name__ == '__main__':
import os
import sys
from enchant.checker import SpellChecker
checker = SpellChecker('en_US', sys.stdin.read(), filters=[
AcronymFilter, OptionFilter, PackageFilter, PathFilter,
])
with open(os.path.join(os.path.dirname(__file__), 'words.txt')) as f:
for line in f:
checker.add(line.strip())
has_error = False
for error in checker:
print(f'Spelling error: {error.word}')
print(f'Context: {error.leading_context(30)}{error.word}{error.trailing_context(30)}')
has_error = True
sys.exit(has_error)