You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
765 lines
33 KiB
765 lines
33 KiB
#!/usr/bin/env python3
|
|
#
|
|
# Copyright (C) 2017 The Android Open Source Project
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""
|
|
Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format
|
|
under a meta tag with name 'Emji'.
|
|
|
|
In order to create the final font the followings are used as inputs:
|
|
|
|
- NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at
|
|
external/noto-fonts/emoji/NotoColorEmoji.ttf
|
|
|
|
- Unicode files: Unicode files that are in the framework, and lists information about all the
|
|
emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt,
|
|
and emoji-variation-sequences.txt. Currently at external/unicode/.
|
|
|
|
- additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are
|
|
in the Android font. Resides in framework and currently under external/unicode/.
|
|
|
|
- data/emoji_metadata.txt: The file that includes the id, codepoints, the first
|
|
Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font
|
|
version that the emoji was added (compatAdded). Updated when the script is executed.
|
|
|
|
- data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/.
|
|
|
|
After execution the following files are generated if they don't exist otherwise, they are updated:
|
|
- font/NotoColorEmojiCompat.ttf
|
|
- supported-emojis/emojis.txt
|
|
- data/emoji_metadata.txt
|
|
- src/java/android/support/text/emoji/flatbuffer/*
|
|
"""
|
|
|
|
import contextlib
|
|
import csv
|
|
import hashlib
|
|
import itertools
|
|
import json
|
|
import os
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
from fontTools import ttLib
|
|
from fontTools.ttLib.tables import otTables
|
|
from nototools import font_data
|
|
|
|
########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ###########
|
|
# Last Android SDK Version
|
|
SDK_VERSION = 30
|
|
# metadata version that will be embedded into font. If there are updates to the font that would
|
|
# cause data/emoji_metadata.txt to change, this integer number should be incremented. This number
|
|
# defines in which EmojiCompat metadata version the emoji is added to the font.
|
|
METADATA_VERSION = 7
|
|
|
|
####### main directories where output files are created #######
|
|
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
|
|
FONT_DIR = os.path.join(SCRIPT_DIR, 'font')
|
|
DATA_DIR = os.path.join(SCRIPT_DIR, 'data')
|
|
SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis')
|
|
JAVA_SRC_DIR = os.path.join('src', 'java')
|
|
####### output files #######
|
|
# font file
|
|
FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf')
|
|
# emoji metadata json output file
|
|
OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt')
|
|
# emojis test file
|
|
TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt')
|
|
####### input files #######
|
|
# Unicode file names to read emoji data
|
|
EMOJI_DATA_FILE = 'emoji-data.txt'
|
|
EMOJI_SEQ_FILE = 'emoji-sequences.txt'
|
|
EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt'
|
|
EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt'
|
|
# Android OS emoji file for emojis that are not in Unicode files
|
|
ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt')
|
|
ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt')
|
|
# Android OS emoji style override file. Codepoints that are rendered with emoji style by default
|
|
# even though not defined so in <code>emoji-data.txt</code>.
|
|
EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt')
|
|
# emoji metadata file
|
|
INPUT_META_FILE = OUTPUT_META_FILE
|
|
# default flatbuffer module location (if not specified by caller)
|
|
FLATBUFFER_MODULE_DIR = os.path.join(SCRIPT_DIR, '..', 'emoji-compat-flatbuffers')
|
|
# flatbuffer schema
|
|
FLATBUFFER_SCHEMA = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'emoji_metadata.fbs')
|
|
# file path for java header, it will be prepended to flatbuffer java files
|
|
FLATBUFFER_HEADER = os.path.join(FLATBUFFER_MODULE_DIR, 'data', 'flatbuffer_header.txt')
|
|
# temporary emoji metadata json output file
|
|
OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json'
|
|
# temporary binary file generated by flatbuffer
|
|
FLATBUFFER_BIN = 'emoji_metadata.bin'
|
|
# directory representation for flatbuffer java package
|
|
FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '')
|
|
# temporary directory that contains flatbuffer java files
|
|
FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH)
|
|
FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java"
|
|
FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java"
|
|
# directory under source where flatbuffer java files will be copied into
|
|
FLATBUFFER_JAVA_TARGET = os.path.join(FLATBUFFER_MODULE_DIR, JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH)
|
|
# meta tag name used in the font to embed the emoji metadata. This value is also used in
|
|
# MetadataListReader.java in order to locate the metadata location.
|
|
EMOJI_META_TAG_NAME = 'Emji'
|
|
|
|
EMOJI_STR = 'EMOJI'
|
|
EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION'
|
|
ACCEPTED_EMOJI_PROPERTIES = [EMOJI_PRESENTATION_STR, EMOJI_STR]
|
|
STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE'
|
|
|
|
DEFAULT_EMOJI_ID = 0xF0001
|
|
EMOJI_STYLE_VS = 0xFE0F
|
|
|
|
# The reference code point to be used for filling metrics of wartermark glyph
|
|
WATERMARK_REF_CODE_POINT = 0x1F600
|
|
# The code point and glyph name used for watermark.
|
|
WATERMARK_NEW_CODE_POINT = 0x10FF00
|
|
WATERMARK_NEW_GLYPH_ID = 'u10FF00'
|
|
|
|
def to_hex_str(value):
|
|
"""Converts given int value to hex without the 0x prefix"""
|
|
return format(value, 'X')
|
|
|
|
def hex_str_to_int(string):
|
|
"""Convert a hex string into int"""
|
|
return int(string, 16)
|
|
|
|
def codepoint_to_string(codepoints):
|
|
"""Converts a list of codepoints into a string separated with space."""
|
|
return ' '.join([to_hex_str(x) for x in codepoints])
|
|
|
|
def prepend_header_to_file(file_path, header_path):
|
|
"""Prepends the header to the file. Used to update flatbuffer java files with header, comments
|
|
and annotations."""
|
|
with open(file_path, "r+") as original_file:
|
|
with open(header_path, "r") as copyright_file:
|
|
original_content = original_file.read()
|
|
original_file.seek(0)
|
|
original_file.write(copyright_file.read() + "\n" + original_content)
|
|
|
|
|
|
def update_flatbuffer_java_files(flatbuffer_java_dir, header_dir, target_dir):
|
|
"""Prepends headers to flatbuffer java files and copies to the final destination"""
|
|
tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA
|
|
tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA
|
|
prepend_header_to_file(tmp_metadata_list, header_dir)
|
|
prepend_header_to_file(tmp_metadata_item, header_dir)
|
|
|
|
if not os.path.exists(target_dir):
|
|
os.makedirs(target_dir)
|
|
|
|
shutil.copy(tmp_metadata_list, os.path.join(target_dir, FLATBUFFER_METADATA_LIST_JAVA))
|
|
shutil.copy(tmp_metadata_item, os.path.join(target_dir, FLATBUFFER_METADATA_ITEM_JAVA))
|
|
|
|
def create_test_data(unicode_path):
|
|
"""Read all the emojis in the unicode files and update the test file"""
|
|
lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE))
|
|
lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE))
|
|
|
|
lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True)
|
|
lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True)
|
|
|
|
# standardized variants contains a huge list of sequences, only read the ones that are emojis
|
|
# and also the ones with FE0F (emoji style)
|
|
standardized_variants_lines = read_emoji_lines(
|
|
os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE))
|
|
for line in standardized_variants_lines:
|
|
if STD_VARIANTS_EMOJI_STYLE in line:
|
|
lines.append(line)
|
|
|
|
emojis_set = set()
|
|
for line in lines:
|
|
# In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
|
|
# here since we are already checking the emoji presentations with
|
|
# emoji-variation-sequences.txt.
|
|
if "BASIC_EMOJI" in line:
|
|
continue
|
|
codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
|
|
emojis_set.add(codepoint_to_string(codepoints).upper())
|
|
|
|
emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE))
|
|
for line in emoji_data_lines:
|
|
codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
|
|
if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
|
|
continue
|
|
is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
|
|
if is_emoji_style:
|
|
codepoints = [to_hex_str(x) for x in
|
|
codepoints_for_emojirange(codepoints_range)]
|
|
emojis_set.update(codepoints)
|
|
|
|
emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
|
|
# finally add the android default emoji exceptions
|
|
emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions])
|
|
|
|
emojis_list = list(emojis_set)
|
|
emojis_list.sort()
|
|
with open(TEST_DATA_PATH, "w") as test_file:
|
|
for line in emojis_list:
|
|
test_file.write("%s\n" % line)
|
|
|
|
class _EmojiData(object):
|
|
"""Holds the information about a single emoji."""
|
|
|
|
def __init__(self, codepoints, is_emoji_style):
|
|
self.codepoints = codepoints
|
|
self.emoji_style = is_emoji_style
|
|
self.emoji_id = 0
|
|
self.width = 0
|
|
self.height = 0
|
|
self.sdk_added = SDK_VERSION
|
|
self.compat_added = METADATA_VERSION
|
|
|
|
def update_metrics(self, metrics):
|
|
"""Updates width/height instance variables with the values given in metrics dictionary.
|
|
:param metrics: a dictionary object that has width and height values.
|
|
"""
|
|
self.width = metrics.width
|
|
self.height = metrics.height
|
|
|
|
def __repr__(self):
|
|
return '<EmojiData {0} - {1}>'.format(self.emoji_style,
|
|
codepoint_to_string(self.codepoints))
|
|
|
|
def create_json_element(self):
|
|
"""Creates the json representation of EmojiData."""
|
|
json_element = {}
|
|
json_element['id'] = self.emoji_id
|
|
json_element['emojiStyle'] = self.emoji_style
|
|
json_element['sdkAdded'] = self.sdk_added
|
|
json_element['compatAdded'] = self.compat_added
|
|
json_element['width'] = self.width
|
|
json_element['height'] = self.height
|
|
json_element['codepoints'] = self.codepoints
|
|
return json_element
|
|
|
|
def create_txt_row(self):
|
|
"""Creates array of values for CSV of EmojiData."""
|
|
row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added]
|
|
row += [to_hex_str(x) for x in self.codepoints]
|
|
return row
|
|
|
|
def update(self, emoji_id, sdk_added, compat_added):
|
|
"""Updates current EmojiData with the values in a json element"""
|
|
self.emoji_id = emoji_id
|
|
self.sdk_added = sdk_added
|
|
self.compat_added = compat_added
|
|
|
|
|
|
def read_emoji_lines(file_path, optional=False):
|
|
"""Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty
|
|
lines and comments
|
|
:param file_path: unicode emoji file path
|
|
:param optional: if True no exception is raised when the file cannot be read
|
|
:return: list of uppercase strings
|
|
"""
|
|
result = []
|
|
try:
|
|
with open(file_path) as file_stream:
|
|
for line in file_stream:
|
|
line = line.strip()
|
|
if line and not line.startswith('#'):
|
|
result.append(line.upper())
|
|
except IOError:
|
|
if optional:
|
|
pass
|
|
else:
|
|
raise
|
|
|
|
return result
|
|
|
|
def get_emoji_style_exceptions(unicode_path):
|
|
"""Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers"""
|
|
lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE))
|
|
exceptions = []
|
|
for line in lines:
|
|
codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0])
|
|
exceptions.append(codepoint)
|
|
return exceptions
|
|
|
|
def codepoints_for_emojirange(codepoints_range):
|
|
""" Return codepoints given in emoji files. Expand the codepoints that are given as a range
|
|
such as XYZ ... UVT
|
|
"""
|
|
codepoints = []
|
|
if '..' in codepoints_range:
|
|
range_start, range_end = codepoints_range.split('..')
|
|
codepoints_range = range(hex_str_to_int(range_start),
|
|
hex_str_to_int(range_end) + 1)
|
|
codepoints.extend(codepoints_range)
|
|
else:
|
|
codepoints.append(hex_str_to_int(codepoints_range))
|
|
return codepoints
|
|
|
|
def codepoints_and_emoji_prop(line):
|
|
"""For a given emoji file line, return codepoints and emoji property in the line.
|
|
1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
|
|
|Extended_Pictographic] # [...]"""
|
|
line = line.strip()
|
|
if '#' in line:
|
|
line = line[:line.index('#')]
|
|
else:
|
|
raise ValueError("Line is expected to have # in it")
|
|
line = line.split(';')
|
|
codepoints_range = line[0].strip()
|
|
emoji_property = line[1].strip()
|
|
|
|
return codepoints_range, emoji_property
|
|
|
|
def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions):
|
|
"""Read unicode lines of unicode emoji file in which each line describes a set of codepoint
|
|
intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map.
|
|
A line format that is expected is as follows:
|
|
1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
|
|
|Extended_Pictographic] # [...]"""
|
|
lines = read_emoji_lines(file_path)
|
|
|
|
for line in lines:
|
|
codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
|
|
if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
|
|
continue
|
|
is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
|
|
codepoints = codepoints_for_emojirange(codepoints_range)
|
|
|
|
for codepoint in codepoints:
|
|
key = codepoint_to_string([codepoint])
|
|
codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions
|
|
if key in emoji_data_map:
|
|
# since there are multiple definitions of emojis, only update when emoji style is
|
|
# True
|
|
if codepoint_is_emoji_style:
|
|
emoji_data_map[key].emoji_style = True
|
|
else:
|
|
emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style)
|
|
emoji_data_map[key] = emoji_data
|
|
|
|
|
|
def read_emoji_sequences(emoji_data_map, file_path, optional=False):
|
|
"""Reads the content of the file which contains emoji sequences. Creates EmojiData for each
|
|
line and puts into emoji_data_map."""
|
|
lines = read_emoji_lines(file_path, optional)
|
|
# 1F1E6 1F1E8 ; Name ; [...]
|
|
for line in lines:
|
|
# In unicode 12.0, "emoji-sequences.txt" contains "Basic_Emoji" session. We ignore them
|
|
# here since we are already checking the emoji presentations with
|
|
# emoji-variation-sequences.txt.
|
|
if "BASIC_EMOJI" in line:
|
|
continue
|
|
codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
|
|
codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS]
|
|
key = codepoint_to_string(codepoints)
|
|
if not key in emoji_data_map:
|
|
emoji_data = _EmojiData(codepoints, False)
|
|
emoji_data_map[key] = emoji_data
|
|
|
|
|
|
def load_emoji_data_map(unicode_path):
|
|
"""Reads the emoji data files, constructs a map of space separated codepoints to EmojiData.
|
|
:return: map of space separated codepoints to EmojiData
|
|
"""
|
|
emoji_data_map = {}
|
|
emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
|
|
read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE),
|
|
emoji_style_exceptions)
|
|
read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE))
|
|
read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE))
|
|
|
|
# Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists.
|
|
read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE),
|
|
optional=True)
|
|
# Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists.
|
|
read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE),
|
|
optional=True)
|
|
|
|
return emoji_data_map
|
|
|
|
|
|
def load_previous_metadata(emoji_data_map):
|
|
"""Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields
|
|
in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest
|
|
emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not
|
|
exist, or contains no emojis defined returns DEFAULT_EMOJI_ID"""
|
|
current_emoji_id = DEFAULT_EMOJI_ID
|
|
if os.path.isfile(INPUT_META_FILE):
|
|
with open(INPUT_META_FILE) as csvfile:
|
|
reader = csv.reader(csvfile, delimiter=' ')
|
|
for row in reader:
|
|
if row[0].startswith('#'):
|
|
continue
|
|
emoji_id = hex_str_to_int(row[0])
|
|
sdk_added = int(row[1])
|
|
compat_added = int(row[2])
|
|
key = codepoint_to_string(hex_str_to_int(x) for x in row[3:])
|
|
if key in emoji_data_map:
|
|
emoji_data = emoji_data_map[key]
|
|
emoji_data.update(emoji_id, sdk_added, compat_added)
|
|
if emoji_data.emoji_id >= current_emoji_id:
|
|
current_emoji_id = emoji_data.emoji_id + 1
|
|
|
|
return current_emoji_id
|
|
|
|
|
|
def update_ttlib_orig_sort():
|
|
"""Updates the ttLib tag sort with a closure that makes the meta table first."""
|
|
orig_sort = ttLib.sortedTagList
|
|
|
|
def meta_first_table_sort(tag_list, table_order=None):
|
|
"""Sorts the tables with the original ttLib sort, then makes the meta table first."""
|
|
tag_list = orig_sort(tag_list, table_order)
|
|
tag_list.remove('meta')
|
|
tag_list.insert(0, 'meta')
|
|
return tag_list
|
|
|
|
ttLib.sortedTagList = meta_first_table_sort
|
|
|
|
|
|
def inject_meta_into_font(ttf, flatbuffer_bin_filename):
|
|
"""inject metadata binary into font"""
|
|
if not 'meta' in ttf:
|
|
ttf['meta'] = ttLib.getTableClass('meta')()
|
|
meta = ttf['meta']
|
|
with open(flatbuffer_bin_filename, 'rb') as flatbuffer_bin_file:
|
|
meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read()
|
|
|
|
# sort meta tables for faster access
|
|
update_ttlib_orig_sort()
|
|
|
|
|
|
def validate_input_files(font_path, unicode_path, flatbuffer_path):
|
|
"""Validate the existence of font file and the unicode files"""
|
|
if not os.path.isfile(font_path):
|
|
raise ValueError("Font file does not exist: " + font_path)
|
|
|
|
if not os.path.isdir(unicode_path):
|
|
raise ValueError(
|
|
"Unicode directory does not exist or is not a directory " + unicode_path)
|
|
|
|
emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE),
|
|
os.path.join(unicode_path, EMOJI_ZWJ_FILE),
|
|
os.path.join(unicode_path, EMOJI_SEQ_FILE)]
|
|
for emoji_filename in emoji_filenames:
|
|
if not os.path.isfile(emoji_filename):
|
|
raise ValueError("Unicode emoji data file does not exist: " + emoji_filename)
|
|
|
|
if not os.path.isdir(flatbuffer_path):
|
|
raise ValueError(
|
|
"Flatbuffer directory does not exist or is not a directory " + flatbuffer_path)
|
|
|
|
flatbuffer_filenames = [os.path.join(flatbuffer_path, FLATBUFFER_SCHEMA),
|
|
os.path.join(flatbuffer_path, FLATBUFFER_HEADER)]
|
|
for flatbuffer_filename in flatbuffer_filenames:
|
|
if not os.path.isfile(flatbuffer_filename):
|
|
raise ValueError("Flatbuffer file does not exist: " + flatbuffer_filename)
|
|
|
|
|
|
def add_file_to_sha(sha_algo, file_path):
|
|
with open(file_path, 'rb') as input_file:
|
|
for data in iter(lambda: input_file.read(8192), b''):
|
|
sha_algo.update(data)
|
|
|
|
def create_sha_from_source_files(font_paths):
|
|
"""Creates a SHA from the given font files"""
|
|
sha_algo = hashlib.sha256()
|
|
for file_path in font_paths:
|
|
add_file_to_sha(sha_algo, file_path)
|
|
return sha_algo.hexdigest()
|
|
|
|
|
|
class EmojiFontCreator(object):
|
|
"""Creates the EmojiCompat font"""
|
|
|
|
def __init__(self, font_path, unicode_path):
|
|
validate_input_files(font_path, unicode_path, FLATBUFFER_MODULE_DIR)
|
|
|
|
self.font_path = font_path
|
|
self.unicode_path = unicode_path
|
|
self.emoji_data_map = {}
|
|
self.remapped_codepoints = {}
|
|
self.glyph_to_image_metrics_map = {}
|
|
# set default emoji id to start of Supplemental Private Use Area-A
|
|
self.emoji_id = DEFAULT_EMOJI_ID
|
|
|
|
def update_emoji_data(self, codepoints, glyph_name):
|
|
"""Updates the existing EmojiData identified with codepoints. The fields that are set are:
|
|
- emoji_id (if it does not exist)
|
|
- image width/height"""
|
|
key = codepoint_to_string(codepoints)
|
|
if key in self.emoji_data_map:
|
|
# add emoji to final data
|
|
emoji_data = self.emoji_data_map[key]
|
|
emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name])
|
|
if emoji_data.emoji_id == 0:
|
|
emoji_data.emoji_id = self.emoji_id
|
|
self.emoji_id = self.emoji_id + 1
|
|
self.remapped_codepoints[emoji_data.emoji_id] = glyph_name
|
|
|
|
def read_cbdt(self, ttf):
|
|
"""Read image size data from CBDT."""
|
|
cbdt = ttf['CBDT']
|
|
for strike_data in cbdt.strikeData:
|
|
for key, data in strike_data.items():
|
|
data.decompile()
|
|
self.glyph_to_image_metrics_map[key] = data.metrics
|
|
|
|
def read_cmap12(self, ttf, glyph_to_codepoint_map):
|
|
"""Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and
|
|
finally clears all elements in CMAP 12"""
|
|
cmap = ttf['cmap']
|
|
for table in cmap.tables:
|
|
if table.format == 12 and table.platformID == 3 and table.platEncID == 10:
|
|
for codepoint, glyph_name in table.cmap.items():
|
|
glyph_to_codepoint_map[glyph_name] = codepoint
|
|
self.update_emoji_data([codepoint], glyph_name)
|
|
return table
|
|
raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10")
|
|
|
|
def read_gsub(self, ttf, glyph_to_codepoint_map):
|
|
"""Reads the emoji sequences defined in GSUB and clear all elements under GSUB"""
|
|
gsub = ttf['GSUB']
|
|
ligature_subtables = []
|
|
context_subtables = []
|
|
# this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat
|
|
# and would be expensive with little value
|
|
for lookup in gsub.table.LookupList.Lookup:
|
|
for subtable in lookup.SubTable:
|
|
if subtable.LookupType == 5:
|
|
context_subtables.append(subtable)
|
|
elif subtable.LookupType == 4:
|
|
ligature_subtables.append(subtable)
|
|
|
|
for subtable in context_subtables:
|
|
self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map)
|
|
|
|
for subtable in ligature_subtables:
|
|
self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map)
|
|
|
|
def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map):
|
|
"""Add substitutions defined as OpenType Context Substitution"""
|
|
for sub_class_set in subtable.SubClassSet:
|
|
if sub_class_set:
|
|
for sub_class_rule in sub_class_set.SubClassRule:
|
|
# prepare holder for substitution list. each rule will have a list that is added
|
|
# to the subs_list.
|
|
subs_list = len(sub_class_rule.SubstLookupRecord) * [None]
|
|
for record in sub_class_rule.SubstLookupRecord:
|
|
subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list,
|
|
record.LookupListIndex)
|
|
# create combinations or all lists. the combinations will be filtered by
|
|
# emoji_data_map. the first element that contain as a valid glyph will be used
|
|
# as the final glyph
|
|
combinations = list(itertools.product(*subs_list))
|
|
for seq in combinations:
|
|
glyph_names = [x["input"] for x in seq]
|
|
codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
|
|
outputs = [x["output"] for x in seq if x["output"]]
|
|
nonempty_outputs = list(filter(lambda x: x.strip() , outputs))
|
|
if len(nonempty_outputs) == 0:
|
|
print("Warning: no output glyph is set for " + str(glyph_names))
|
|
continue
|
|
elif len(nonempty_outputs) > 1:
|
|
print(
|
|
"Warning: multiple glyph is set for "
|
|
+ str(glyph_names) + ", will use the first one")
|
|
|
|
glyph = nonempty_outputs[0]
|
|
self.update_emoji_data(codepoints, glyph)
|
|
|
|
def get_substitutions(self, lookup_list, index):
|
|
result = []
|
|
for x in lookup_list.Lookup[index].SubTable:
|
|
for input, output in x.mapping.items():
|
|
result.append({"input": input, "output": output})
|
|
return result
|
|
|
|
def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map):
|
|
for name, ligatures in subtable.ligatures.items():
|
|
for ligature in ligatures:
|
|
glyph_names = [name] + ligature.Component
|
|
codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
|
|
self.update_emoji_data(codepoints, ligature.LigGlyph)
|
|
|
|
def write_metadata_json(self, output_json_file_path):
|
|
"""Writes the emojis into a json file"""
|
|
output_json = {}
|
|
output_json['version'] = METADATA_VERSION
|
|
output_json['sourceSha'] = create_sha_from_source_files(
|
|
[self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA])
|
|
output_json['list'] = []
|
|
|
|
emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
|
|
|
|
total_emoji_count = 0
|
|
for emoji_data in emoji_data_list:
|
|
element = emoji_data.create_json_element()
|
|
output_json['list'].append(element)
|
|
total_emoji_count = total_emoji_count + 1
|
|
|
|
# write the new json file to be processed by FlatBuffers
|
|
with open(output_json_file_path, 'w') as json_file:
|
|
print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')),
|
|
file=json_file)
|
|
|
|
return total_emoji_count
|
|
|
|
def write_metadata_csv(self):
|
|
"""Writes emoji metadata into space separated file"""
|
|
with open(OUTPUT_META_FILE, 'w') as csvfile:
|
|
csvwriter = csv.writer(csvfile, delimiter=' ')
|
|
emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
|
|
csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints'])
|
|
for emoji_data in emoji_data_list:
|
|
csvwriter.writerow(emoji_data.create_txt_row())
|
|
|
|
def add_watermark(self, ttf):
|
|
cmap = ttf.getBestCmap()
|
|
gsub = ttf['GSUB'].table
|
|
|
|
# Obtain Version string
|
|
m = re.search('^Version (\d*)\.(\d*)', font_data.font_version(ttf))
|
|
if not m:
|
|
raise ValueError('The font does not have proper version string.')
|
|
major = m.group(1)
|
|
minor = m.group(2)
|
|
# Replace the dot with space since NotoColorEmoji does not have glyph for dot.
|
|
glyphs = [cmap[ord(x)] for x in '%s %s' % (major, minor)]
|
|
|
|
# Update Glyph metrics
|
|
ttf.getGlyphOrder().append(WATERMARK_NEW_GLYPH_ID)
|
|
refGlyphId = cmap[WATERMARK_REF_CODE_POINT]
|
|
ttf['hmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['hmtx'].metrics[refGlyphId]
|
|
ttf['vmtx'].metrics[WATERMARK_NEW_GLYPH_ID] = ttf['vmtx'].metrics[refGlyphId]
|
|
|
|
# Add new Glyph to cmap
|
|
font_data.add_to_cmap(ttf, { WATERMARK_NEW_CODE_POINT : WATERMARK_NEW_GLYPH_ID })
|
|
|
|
# Add lookup table for the version string.
|
|
lookups = gsub.LookupList.Lookup
|
|
new_lookup = otTables.Lookup()
|
|
new_lookup.LookupType = 2 # Multiple Substitution Subtable.
|
|
new_lookup.LookupFlag = 0
|
|
new_subtable = otTables.MultipleSubst()
|
|
new_subtable.mapping = { WATERMARK_NEW_GLYPH_ID : tuple(glyphs) }
|
|
new_lookup.SubTable = [ new_subtable ]
|
|
new_lookup_index = len(lookups)
|
|
lookups.append(new_lookup)
|
|
|
|
# Add feature
|
|
feature = next(x for x in gsub.FeatureList.FeatureRecord if x.FeatureTag == 'ccmp')
|
|
if not feature:
|
|
raise ValueError("Font doesn't contain ccmp feature.")
|
|
|
|
feature.Feature.LookupListIndex.append(new_lookup_index)
|
|
|
|
def create_font(self):
|
|
"""Creates the EmojiCompat font.
|
|
:param font_path: path to Android NotoColorEmoji font
|
|
:param unicode_path: path to directory that contains unicode files
|
|
"""
|
|
|
|
tmp_dir = tempfile.mkdtemp()
|
|
|
|
# create emoji codepoints to EmojiData map
|
|
self.emoji_data_map = load_emoji_data_map(self.unicode_path)
|
|
|
|
# read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is
|
|
# returned is either default or 1 greater than the largest id in previous data
|
|
self.emoji_id = load_previous_metadata(self.emoji_data_map)
|
|
|
|
# recalcTimestamp parameter will keep the modified field same as the original font. Changing
|
|
# the modified field in the font causes the font ttf file to change, which makes it harder
|
|
# to understand if something really changed in the font.
|
|
with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf:
|
|
# read image size data
|
|
self.read_cbdt(ttf)
|
|
|
|
# glyph name to codepoint map
|
|
glyph_to_codepoint_map = {}
|
|
|
|
# read single codepoint emojis under cmap12 and clear the table contents
|
|
cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map)
|
|
|
|
# read emoji sequences gsub and clear the table contents
|
|
self.read_gsub(ttf, glyph_to_codepoint_map)
|
|
|
|
# add all new codepoint to glyph mappings
|
|
cmap12_table.cmap.update(self.remapped_codepoints)
|
|
|
|
# final metadata csv will be used to generate the sha, therefore write it before
|
|
# metadata json is written.
|
|
self.write_metadata_csv()
|
|
|
|
output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME)
|
|
flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN)
|
|
flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH)
|
|
|
|
total_emoji_count = self.write_metadata_json(output_json_file)
|
|
|
|
# create the flatbuffers binary and java classes
|
|
flatc_command = ['flatc',
|
|
'-o',
|
|
tmp_dir,
|
|
'-b',
|
|
'-j',
|
|
FLATBUFFER_SCHEMA,
|
|
output_json_file]
|
|
subprocess.check_output(flatc_command)
|
|
|
|
# inject metadata binary into font
|
|
inject_meta_into_font(ttf, flatbuffer_bin_file)
|
|
|
|
# add wartermark glyph for manual verification.
|
|
self.add_watermark(ttf)
|
|
|
|
# update CBDT and CBLC versions since older android versions cannot read > 2.0
|
|
ttf['CBDT'].version = 2.0
|
|
ttf['CBLC'].version = 2.0
|
|
|
|
# save the new font
|
|
ttf.save(FONT_PATH)
|
|
|
|
update_flatbuffer_java_files(flatbuffer_java_dir, #tmp dir
|
|
FLATBUFFER_HEADER,
|
|
FLATBUFFER_JAVA_TARGET)
|
|
|
|
create_test_data(self.unicode_path)
|
|
|
|
# clear the tmp output directory
|
|
shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
|
|
print(
|
|
"{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR))
|
|
|
|
|
|
def print_usage():
|
|
"""Prints how to use the script."""
|
|
print("Please specify a path to font and unicode files.\n"
|
|
"usage: createfont.py noto-color-emoji-path unicode-dir-path")
|
|
|
|
def parse_args(argv):
|
|
# parse manually to avoid any extra dependencies
|
|
if len(argv) < 3:
|
|
print_usage()
|
|
sys.exit(1)
|
|
return (sys.argv[1], sys.argv[2])
|
|
|
|
def main():
|
|
font_file, unicode_dir = parse_args(sys.argv)
|
|
EmojiFontCreator(font_file, unicode_dir).create_font()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|