You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
235 lines
9.2 KiB
235 lines
9.2 KiB
#!/usr/bin/env python3
|
|
#-*- coding: utf-8 -*-
|
|
|
|
# Copyright (C) 2018 The Android Open Source Project
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the 'License');
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an 'AS IS' BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Enforces common Android string best-practices. It ignores lint messages from
|
|
a previous strings file, if provided.
|
|
|
|
Usage: stringslint.py strings.xml
|
|
Usage: stringslint.py strings.xml old_strings.xml
|
|
|
|
In general:
|
|
* Errors signal issues that must be fixed before submitting, and are only
|
|
used when there are no false-positives.
|
|
* Warnings signal issues that might need to be fixed, but need manual
|
|
inspection due to risk of false-positives.
|
|
* Info signal issues that should be fixed to match best-practices, such
|
|
as providing comments to aid translation.
|
|
"""
|
|
|
|
import re, sys, codecs
|
|
import lxml.etree as ET
|
|
|
|
BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)
|
|
|
|
def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):
|
|
# manually derived from http://en.wikipedia.org/wiki/ANSI_escape_code#Codes
|
|
codes = []
|
|
if reset: codes.append("0")
|
|
else:
|
|
if not fg is None: codes.append("3%d" % (fg))
|
|
if not bg is None:
|
|
if not bright: codes.append("4%d" % (bg))
|
|
else: codes.append("10%d" % (bg))
|
|
if bold: codes.append("1")
|
|
elif dim: codes.append("2")
|
|
else: codes.append("22")
|
|
return "\033[%sm" % (";".join(codes))
|
|
|
|
warnings = None
|
|
|
|
def warn(tag, msg, actual, expected, color=YELLOW):
|
|
global warnings
|
|
key = "%s:%d" % (tag.attrib["name"], hash(msg))
|
|
value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True),
|
|
tag.sourceline,
|
|
tag.attrib["name"],
|
|
format(reset=True),
|
|
msg)
|
|
if not actual is None: value += "\n\tActual: %s%s%s" % (format(dim=True),
|
|
actual,
|
|
format(reset=True))
|
|
if not expected is None: value += "\n\tExample: %s%s%s" % (format(dim=True),
|
|
expected,
|
|
format(reset=True))
|
|
warnings[key] = value
|
|
|
|
|
|
def error(tag, msg, actual, expected):
|
|
warn(tag, msg, actual, expected, RED)
|
|
|
|
def info(tag, msg, actual, expected):
|
|
warn(tag, msg, actual, expected, CYAN)
|
|
|
|
# Escaping logic borrowed from https://stackoverflow.com/a/24519338
|
|
ESCAPE_SEQUENCE_RE = re.compile(r'''
|
|
( \\U........ # 8-digit hex escapes
|
|
| \\u.... # 4-digit hex escapes
|
|
| \\x.. # 2-digit hex escapes
|
|
| \\[0-7]{1,3} # Octal escapes
|
|
| \\N\{[^}]+\} # Unicode characters by name
|
|
| \\[\\'"abfnrtv] # Single-character escapes
|
|
)''', re.UNICODE | re.VERBOSE)
|
|
|
|
def decode_escapes(s):
|
|
def decode_match(match):
|
|
return codecs.decode(match.group(0), 'unicode-escape')
|
|
|
|
s = re.sub(r"\n\s*", " ", s)
|
|
s = ESCAPE_SEQUENCE_RE.sub(decode_match, s)
|
|
s = re.sub(r"%(\d+\$)?[a-z]", "____", s)
|
|
s = re.sub(r"\^\d+", "____", s)
|
|
s = re.sub(r"<br/?>", "\n", s)
|
|
s = re.sub(r"</?[a-z]+>", "", s)
|
|
return s
|
|
|
|
def sample_iter(tag):
|
|
if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib:
|
|
yield tag.attrib["example"]
|
|
elif tag.text:
|
|
yield decode_escapes(tag.text)
|
|
for e in tag:
|
|
for v in sample_iter(e):
|
|
yield v
|
|
if e.tail:
|
|
yield decode_escapes(e.tail)
|
|
|
|
def lint(path):
|
|
global warnings
|
|
warnings = {}
|
|
|
|
with open(path) as f:
|
|
raw = f.read()
|
|
if len(raw.strip()) == 0:
|
|
return warnings
|
|
tree = ET.fromstring(bytes(raw, encoding='utf-8'))
|
|
root = tree #tree.getroot()
|
|
|
|
last_comment = None
|
|
for child in root:
|
|
# TODO: handle plurals
|
|
if isinstance(child, ET._Comment):
|
|
last_comment = child
|
|
elif child.tag == "string":
|
|
# We always consume comment
|
|
comment = last_comment
|
|
last_comment = None
|
|
|
|
# Prepare string for analysis
|
|
text = "".join(child.itertext())
|
|
sample = "".join(sample_iter(child)).strip().strip("'\"")
|
|
|
|
# Validate comment
|
|
if comment is None:
|
|
info(child, "Missing string comment to aid translation",
|
|
None, None)
|
|
continue
|
|
if "do not translate" in comment.text.lower():
|
|
continue
|
|
if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false":
|
|
continue
|
|
|
|
misspelled_attributes = [
|
|
("translateable", "translatable"),
|
|
]
|
|
for misspelling, expected in misspelled_attributes:
|
|
if misspelling in child.attrib:
|
|
error(child, "Misspelled <string> attribute.", misspelling, expected)
|
|
|
|
limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text)
|
|
if limit is None:
|
|
info(child, "Missing CHAR LIMIT to aid translation",
|
|
repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->")
|
|
elif re.match("\d+", limit.group(1)):
|
|
limit = int(limit.group(1))
|
|
if len(sample) > limit:
|
|
warn(child, "Expanded string length is larger than CHAR LIMIT",
|
|
sample, None)
|
|
|
|
# Look for common mistakes/substitutions
|
|
if "'" in text:
|
|
error(child, "Turned quotation mark glyphs are more polished",
|
|
text, "This doesn\u2019t need to \u2018happen\u2019 today")
|
|
if '"' in text and not text.startswith('"') and text.endswith('"'):
|
|
error(child, "Turned quotation mark glyphs are more polished",
|
|
text, "This needs to \u201chappen\u201d today")
|
|
if "..." in text:
|
|
error(child, "Ellipsis glyph is more polished",
|
|
text, "Loading\u2026")
|
|
if "wi-fi" in text.lower():
|
|
error(child, "Non-breaking glyph is more polished",
|
|
text, "Wi\u2011Fi")
|
|
if "wifi" in text.lower():
|
|
error(child, "Using non-standard spelling",
|
|
text, "Wi\u2011Fi")
|
|
if re.search("\d-\d", text):
|
|
warn(child, "Ranges should use en dash glyph",
|
|
text, "You will find this material in chapters 8\u201312")
|
|
if "--" in text:
|
|
warn(child, "Phrases should use em dash glyph",
|
|
text, "Upon discovering errors\u2014all 124 of them\u2014they recalled.")
|
|
if ". " in text:
|
|
warn(child, "Only use single space between sentences",
|
|
text, "First idea. Second idea.")
|
|
if re.match(r"^[A-Z\s]{5,}$", text):
|
|
warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym",
|
|
text, "Refresh data")
|
|
if " phone " in text and "product" not in child.attrib:
|
|
warn(child, "Strings mentioning phones should have variants for tablets",
|
|
text, None)
|
|
|
|
# When more than one substitution, require indexes
|
|
if len(re.findall("%[^%]", text)) > 1:
|
|
if len(re.findall("%[^\d]", text)) > 0:
|
|
error(child, "Substitutions must be indexed",
|
|
text, "Add %1$s to %2$s")
|
|
|
|
# Require xliff substitutions
|
|
for gc in child.iter():
|
|
badsub = False
|
|
if gc.tail and re.search("%[^%]", gc.tail): badsub = True
|
|
if re.match("{.*xliff.*}g", gc.tag):
|
|
if "id" not in gc.attrib:
|
|
error(child, "Substitutions must define id attribute",
|
|
None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
|
|
if "example" not in gc.attrib:
|
|
error(child, "Substitutions must define example attribute",
|
|
None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
|
|
else:
|
|
if gc.text and re.search("%[^%]", gc.text): badsub = True
|
|
if badsub:
|
|
error(child, "Substitutions must be inside xliff tags",
|
|
text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
|
|
|
|
return warnings
|
|
|
|
if len(sys.argv) > 2:
|
|
before = lint(sys.argv[2])
|
|
else:
|
|
before = {}
|
|
after = lint(sys.argv[1])
|
|
|
|
for b in before:
|
|
if b in after:
|
|
del after[b]
|
|
|
|
if len(after) > 0:
|
|
for a in sorted(after.keys()):
|
|
print(after[a])
|
|
print()
|
|
sys.exit(1)
|