You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

209 lines
7.2 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/python
# coding=UTF-8
#
# Copyright 2014 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Create a curated subset of NotoSansSymbols for Android."""
__author__ = 'roozbeh@google.com (Roozbeh Pournader)'
import contextlib
import os
import sys
import tempfile
from fontTools import ttLib
from pathlib import PurePosixPath
from nototools import subset
from nototools import unicode_data
# Unicode blocks that we want to include in the font
BLOCKS_TO_INCLUDE = """
20D0..20FF; Combining Diacritical Marks for Symbols
2100..214F; Letterlike Symbols
2190..21FF; Arrows
2200..22FF; Mathematical Operators
2300..23FF; Miscellaneous Technical
2400..243F; Control Pictures
2440..245F; Optical Character Recognition
2460..24FF; Enclosed Alphanumerics
2500..257F; Box Drawing
2580..259F; Block Elements
25A0..25FF; Geometric Shapes
2600..26FF; Miscellaneous Symbols
2700..27BF; Dingbats
27C0..27EF; Miscellaneous Mathematical Symbols-A
27F0..27FF; Supplemental Arrows-A
2800..28FF; Braille Patterns
2900..297F; Supplemental Arrows-B
2980..29FF; Miscellaneous Mathematical Symbols-B
2A00..2AFF; Supplemental Mathematical Operators
2B00..2BFF; Miscellaneous Symbols and Arrows
4DC0..4DFF; Yijing Hexagram Symbols
10140..1018F; Ancient Greek Numbers
10190..101CF; Ancient Symbols
101D0..101FF; Phaistos Disc
1D000..1D0FF; Byzantine Musical Symbols
1D100..1D1FF; Musical Symbols
1D200..1D24F; Ancient Greek Musical Notation
1D300..1D35F; Tai Xuan Jing Symbols
1D360..1D37F; Counting Rod Numerals
1D400..1D7FF; Mathematical Alphanumeric Symbols
1F000..1F02F; Mahjong Tiles
1F030..1F09F; Domino Tiles
1F0A0..1F0FF; Playing Cards
1F700..1F77F; Alchemical Symbols
"""
# One-off characters to be included. At the moment, this is the Bitcoin sign
# (since it's not supported in Roboto yet, and the Japanese TV symbols of
# Unicode 9.
ONE_OFF_ADDITIONS = {
0x20BF, # ₿ BITCOIN SIGN
0x1F19B, # 🆛 SQUARED THREE D
0x1F19C, # 🆜 SQUARED SECOND SCREEN
0x1F19D, # 🆝 SQUARED TWO K;So;0;L;;;;;N;;;;;
0x1F19E, # 🆞 SQUARED FOUR K;So;0;L;;;;;N;;;;;
0x1F19F, # 🆟 SQUARED EIGHT K;So;0;L;;;;;N;;;;;
0x1F1A0, # 🆠 SQUARED FIVE POINT ONE;So;0;L;;;;;N;;;;;
0x1F1A1, # 🆡 SQUARED SEVEN POINT ONE;So;0;L;;;;;N;;;;;
0x1F1A2, # 🆢 SQUARED TWENTY-TWO POINT TWO;So;0;L;;;;;N;;;;;
0x1F1A3, # 🆣 SQUARED SIXTY P;So;0;L;;;;;N;;;;;
0x1F1A4, # 🆤 SQUARED ONE HUNDRED TWENTY P;So;0;L;;;;;N;;;;;
0x1F1A5, # 🆥 SQUARED LATIN SMALL LETTER D;So;0;L;;;;;N;;;;;
0x1F1A6, # 🆦 SQUARED HC;So;0;L;;;;;N;;;;;
0x1F1A7, # 🆧 SQUARED HDR;So;0;L;;;;;N;;;;;
0x1F1A8, # 🆨 SQUARED HI-RES;So;0;L;;;;;N;;;;;
0x1F1A9, # 🆩 SQUARED LOSSLESS;So;0;L;;;;;N;;;;;
0x1F1AA, # 🆪 SQUARED SHV;So;0;L;;;;;N;;;;;
0x1F1AB, # 🆫 SQUARED UHD;So;0;L;;;;;N;;;;;
0x1F1AC, # 🆬 SQUARED VOD;So;0;L;;;;;N;;;;;
0x1F23B, # 🈻 SQUARED CJK UNIFIED IDEOGRAPH-914D
}
# letter-based characters, provided by Roboto
LETTERLIKE_CHARS_IN_ROBOTO = {
0x2100, # ℀ ACCOUNT OF
0x2101, # ℁ ADDRESSED TO THE SUBJECT
0x2103, # ℃ DEGREE CELSIUS
0x2105, # ℅ CARE OF
0x2106, # ℆ CADA UNA
0x2109, # ℉ DEGREE FAHRENHEIT
0x2113, # SCRIPT SMALL L
0x2116, # № NUMERO SIGN
0x2117, # ℗ SOUND RECORDING COPYRIGHT
0x211E, # ℞ PRESCRIPTION TAKE
0x211F, # ℟ RESPONSE
0x2120, # ℠ SERVICE MARK
0x2121, # ℡ TELEPHONE SIGN
0x2122, # ™ TRADE MARK SIGN
0x2123, # ℣ VERSICLE
0x2125, # ℥ OUNCE SIGN
0x2126, # Ω OHM SIGN
0x212A, # KELVIN SIGN
0x212B, # Å ANGSTROM SIGN
0x212E, # ESTIMATED SYMBOL
0x2132, # Ⅎ TURNED CAPITAL F
0x213B, # ℻ FACSIMILE SIGN
0x214D, # ⅍ AKTIESELSKAB
0x214F, # ⅏ SYMBOL FOR SAMARITAN SOURCE
}
DEFAULT_EMOJI = unicode_data.get_presentation_default_emoji()
EMOJI_ADDITIONS_FILE = os.path.join(
os.path.dirname(__file__), os.path.pardir, os.path.pardir,
'unicode', 'additions', 'emoji-data.txt')
# Characters we have decided we are doing as emoji-style in Android,
# despite UTR#51's recommendation
def get_android_emoji():
"""Return additional Android default emojis."""
android_emoji = set()
with open(EMOJI_ADDITIONS_FILE) as emoji_additions:
data = unicode_data._parse_semicolon_separated_data(
emoji_additions.read())
for codepoint, prop in data:
if prop == 'Emoji_Presentation':
android_emoji.add(int(codepoint, 16))
return android_emoji
def rename_postscript_name(source_font, target_font, new_name):
"""Rename the post script name to given one"""
with contextlib.closing(ttLib.TTFont(source_font)) as ttf:
nameTable = ttf['name']
for name in nameTable.names:
if (name.nameID == 6 and name.platformID == 3 and name.platEncID == 1
and name.langID == 0x0409):
name.string = new_name
ttf.save(target_font)
def main(argv):
"""Subset the Noto Symbols font.
The first argument is the source file name, and the second argument is
the target file name.
"""
target_coverage = set()
# Add all characters in BLOCKS_TO_INCLUDE
for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE):
target_coverage.update(range(first, last+1))
# Add one-off characters
target_coverage |= ONE_OFF_ADDITIONS
# Remove characters preferably coming from Roboto
target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO
# Remove characters that are supposed to default to emoji
android_emoji = get_android_emoji()
target_coverage -= DEFAULT_EMOJI | android_emoji
# Remove dentistry symbols, as their main use appears to be for CJK:
# http://www.unicode.org/L2/L2000/00098-n2195.pdf
target_coverage -= set(range(0x23BE, 0x23CC+1))
# Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji
# mechanism to work properly.
target_coverage.remove(0x20E3)
source_file_name = argv[1]
target_file_name = argv[2]
tmp = tempfile.NamedTemporaryFile()
subset.subset_font(
source_file_name,
tmp.name,
include=target_coverage)
# Use given file name as the PostScript name.
postScriptName = PurePosixPath(target_file_name).stem
rename_postscript_name(tmp.name, target_file_name, postScriptName)
second_subset_coverage = DEFAULT_EMOJI | android_emoji
second_subset_file_name = argv[3]
subset.subset_font(
source_file_name,
tmp.name,
include=second_subset_coverage)
# Use given file name as the PostScript name.
postScriptName = PurePosixPath(second_subset_file_name).stem
rename_postscript_name(tmp.name, second_subset_file_name, postScriptName)
if __name__ == '__main__':
main(sys.argv)