Skip to content

Commit e2befff

Browse files
committed
Use glyph indices for font tracking in vector formats
With libraqm, string layout produces glyph indices, not character codes, and font features may even produce different glyphs for the same character code (e.g., by picking a different Stylistic Set). Thus we cannot rely on character codes as unique items within a font, and must move toward glyph indices everywhere.
1 parent 9766cbd commit e2befff

File tree

9 files changed

+129
-133
lines changed

9 files changed

+129
-133
lines changed

lib/matplotlib/_mathtext.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939
if T.TYPE_CHECKING:
4040
from collections.abc import Iterable
41-
from .ft2font import CharacterCodeType, Glyph
41+
from .ft2font import CharacterCodeType, Glyph, GlyphIndexType
4242

4343

4444
ParserElement.enable_packrat()
@@ -87,7 +87,7 @@ class VectorParse(NamedTuple):
8787
width: float
8888
height: float
8989
depth: float
90-
glyphs: list[tuple[FT2Font, float, CharacterCodeType, float, float]]
90+
glyphs: list[tuple[FT2Font, float, GlyphIndexType, float, float]]
9191
rects: list[tuple[float, float, float, float]]
9292

9393
VectorParse.__module__ = "matplotlib.mathtext"
@@ -132,7 +132,7 @@ def __init__(self, box: Box):
132132
def to_vector(self) -> VectorParse:
133133
w, h, d = map(
134134
np.ceil, [self.box.width, self.box.height, self.box.depth])
135-
gs = [(info.font, info.fontsize, info.num, ox, h - oy + info.offset)
135+
gs = [(info.font, info.fontsize, info.glyph_id, ox, h - oy + info.offset)
136136
for ox, oy, info in self.glyphs]
137137
rs = [(x1, h - y2, x2 - x1, y2 - y1)
138138
for x1, y1, x2, y2 in self.rects]
@@ -214,7 +214,7 @@ class FontInfo(NamedTuple):
214214
fontsize: float
215215
postscript_name: str
216216
metrics: FontMetrics
217-
num: CharacterCodeType
217+
glyph_id: GlyphIndexType
218218
glyph: Glyph
219219
offset: float
220220

@@ -375,7 +375,8 @@ def _get_info(self, fontname: str, font_class: str, sym: str, fontsize: float,
375375
dpi: float) -> FontInfo:
376376
font, num, slanted = self._get_glyph(fontname, font_class, sym)
377377
font.set_size(fontsize, dpi)
378-
glyph = font.load_char(num, flags=self.load_glyph_flags)
378+
glyph_id = font.get_char_index(num)
379+
glyph = font.load_glyph(glyph_id, flags=self.load_glyph_flags)
379380

380381
xmin, ymin, xmax, ymax = (val / 64 for val in glyph.bbox)
381382
offset = self._get_offset(font, glyph, fontsize, dpi)
@@ -397,7 +398,7 @@ def _get_info(self, fontname: str, font_class: str, sym: str, fontsize: float,
397398
fontsize=fontsize,
398399
postscript_name=font.postscript_name,
399400
metrics=metrics,
400-
num=num,
401+
glyph_id=glyph_id,
401402
glyph=glyph,
402403
offset=offset
403404
)
@@ -427,8 +428,7 @@ def get_kern(self, font1: str, fontclass1: str, sym1: str, fontsize1: float,
427428
info1 = self._get_info(font1, fontclass1, sym1, fontsize1, dpi)
428429
info2 = self._get_info(font2, fontclass2, sym2, fontsize2, dpi)
429430
font = info1.font
430-
return font.get_kerning(font.get_char_index(info1.num),
431-
font.get_char_index(info2.num),
431+
return font.get_kerning(info1.glyph_id, info2.glyph_id,
432432
Kerning.DEFAULT) / 64
433433
return super().get_kern(font1, fontclass1, sym1, fontsize1,
434434
font2, fontclass2, sym2, fontsize2, dpi)

lib/matplotlib/_text_helpers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
class LayoutItem:
1515
ft_object: FT2Font
1616
char: str
17-
glyph_idx: GlyphIndexType
17+
glyph_index: GlyphIndexType
1818
x: float
1919
prev_kern: float
2020

lib/matplotlib/backends/_backend_pdf_ps.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,27 @@ def _cached_get_afm_from_fname(fname):
2020
return AFM(fh)
2121

2222

23-
def get_glyphs_subset(fontfile, characters):
23+
def get_glyphs_subset(fontfile, glyphs):
2424
"""
25-
Subset a TTF font
25+
Subset a TTF font.
2626
27-
Reads the named fontfile and restricts the font to the characters.
27+
Reads the named fontfile and restricts the font to the glyphs.
2828
2929
Parameters
3030
----------
3131
fontfile : str
3232
Path to the font file
33-
characters : str
34-
Continuous set of characters to include in subset
33+
glyphs : set[int]
34+
Set of glyph IDs to include in subset.
3535
3636
Returns
3737
-------
3838
fontTools.ttLib.ttFont.TTFont
3939
An open font object representing the subset, which needs to
4040
be closed by the caller.
4141
"""
42-
43-
options = subset.Options(glyph_names=True, recommended_glyphs=True)
42+
options = subset.Options(glyph_names=True, recommended_glyphs=True,
43+
retain_gids=True)
4444

4545
# Prevent subsetting extra tables.
4646
options.drop_tables += [
@@ -71,7 +71,7 @@ def get_glyphs_subset(fontfile, characters):
7171

7272
font = subset.load_font(fontfile, options)
7373
subsetter = subset.Subsetter(options=options)
74-
subsetter.populate(text=characters)
74+
subsetter.populate(gids=glyphs)
7575
subsetter.subset(font)
7676
return font
7777

@@ -97,10 +97,10 @@ def font_as_file(font):
9797

9898
class CharacterTracker:
9999
"""
100-
Helper for font subsetting by the pdf and ps backends.
100+
Helper for font subsetting by the PDF and PS backends.
101101
102-
Maintains a mapping of font paths to the set of character codepoints that
103-
are being used from that font.
102+
Maintains a mapping of font paths to the set of glyphs that are being used from that
103+
font.
104104
"""
105105

106106
def __init__(self):
@@ -110,10 +110,11 @@ def track(self, font, s):
110110
"""Record that string *s* is being typeset using font *font*."""
111111
char_to_font = font._get_fontmap(s)
112112
for _c, _f in char_to_font.items():
113-
self.used.setdefault(_f.fname, set()).add(ord(_c))
113+
glyph_index = _f.get_char_index(ord(_c))
114+
self.used.setdefault(_f.fname, set()).add(glyph_index)
114115

115116
def track_glyph(self, font, glyph):
116-
"""Record that codepoint *glyph* is being typeset using font *font*."""
117+
"""Record that glyph index *glyph* is being typeset using font *font*."""
117118
self.used.setdefault(font.fname, set()).add(glyph)
118119

119120

lib/matplotlib/backends/backend_cairo.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import functools
1010
import gzip
11+
import itertools
1112
import math
1213

1314
import numpy as np
@@ -248,13 +249,12 @@ def _draw_mathtext(self, gc, x, y, s, prop, angle):
248249
if angle:
249250
ctx.rotate(np.deg2rad(-angle))
250251

251-
for font, fontsize, idx, ox, oy in glyphs:
252+
for (font, fontsize), font_glyphs in itertools.groupby(
253+
glyphs, key=lambda x: (x[0], x[1])):
252254
ctx.new_path()
253-
ctx.move_to(ox, -oy)
254-
ctx.select_font_face(
255-
*_cairo_font_args_from_font_prop(ttfFontProperty(font)))
255+
ctx.select_font_face(*_cairo_font_args_from_font_prop(ttfFontProperty(font)))
256256
ctx.set_font_size(self.points_to_pixels(fontsize))
257-
ctx.show_text(chr(idx))
257+
ctx.show_glyphs([(idx, ox, -oy) for _, _, idx, ox, oy in font_glyphs])
258258

259259
for ox, oy, w, h in rects:
260260
ctx.new_path()

lib/matplotlib/backends/backend_pdf.py

Lines changed: 37 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -960,9 +960,9 @@ def writeFonts(self):
960960
else:
961961
# a normal TrueType font
962962
_log.debug('Writing TrueType font.')
963-
chars = self._character_tracker.used.get(filename)
964-
if chars:
965-
fonts[Fx] = self.embedTTF(filename, chars)
963+
glyphs = self._character_tracker.used.get(filename)
964+
if glyphs:
965+
fonts[Fx] = self.embedTTF(filename, glyphs)
966966
self.writeObject(self.fontObject, fonts)
967967

968968
def _write_afm_font(self, filename):
@@ -1136,9 +1136,8 @@ def _get_xobject_glyph_name(self, filename, glyph_name):
11361136
end
11371137
end"""
11381138

1139-
def embedTTF(self, filename, characters):
1139+
def embedTTF(self, filename, glyphs):
11401140
"""Embed the TTF font from the named file into the document."""
1141-
11421141
font = get_font(filename)
11431142
fonttype = mpl.rcParams['pdf.fonttype']
11441143

@@ -1153,7 +1152,7 @@ def cvt(length, upe=font.units_per_EM, nearest=True):
11531152
else:
11541153
return math.ceil(value)
11551154

1156-
def embedTTFType3(font, characters, descriptor):
1155+
def embedTTFType3(font, glyphs, descriptor):
11571156
"""The Type 3-specific part of embedding a Truetype font"""
11581157
widthsObject = self.reserveObject('font widths')
11591158
fontdescObject = self.reserveObject('font descriptor')
@@ -1200,15 +1199,13 @@ def get_char_width(charcode):
12001199
# Make the "Differences" array, sort the ccodes < 255 from
12011200
# the multi-byte ccodes, and build the whole set of glyph ids
12021201
# that we need from this font.
1203-
glyph_ids = []
12041202
differences = []
12051203
multi_byte_chars = set()
1206-
for c in characters:
1207-
ccode = c
1208-
gind = font.get_char_index(ccode)
1209-
glyph_ids.append(gind)
1204+
charmap = {gind: ccode for ccode, gind in font.get_charmap().items()}
1205+
for gind in glyphs:
12101206
glyph_name = font.get_glyph_name(gind)
1211-
if ccode <= 255:
1207+
ccode = charmap.get(gind)
1208+
if ccode is not None and ccode <= 255:
12121209
differences.append((ccode, glyph_name))
12131210
else:
12141211
multi_byte_chars.add(glyph_name)
@@ -1222,7 +1219,7 @@ def get_char_width(charcode):
12221219
last_c = c
12231220

12241221
# Make the charprocs array.
1225-
rawcharprocs = _get_pdf_charprocs(filename, glyph_ids)
1222+
rawcharprocs = _get_pdf_charprocs(filename, glyphs)
12261223
charprocs = {}
12271224
for charname in sorted(rawcharprocs):
12281225
stream = rawcharprocs[charname]
@@ -1259,7 +1256,7 @@ def get_char_width(charcode):
12591256

12601257
return fontdictObject
12611258

1262-
def embedTTFType42(font, characters, descriptor):
1259+
def embedTTFType42(font, glyphs, descriptor):
12631260
"""The Type 42-specific part of embedding a Truetype font"""
12641261
fontdescObject = self.reserveObject('font descriptor')
12651262
cidFontDictObject = self.reserveObject('CID font dictionary')
@@ -1269,9 +1266,8 @@ def embedTTFType42(font, characters, descriptor):
12691266
wObject = self.reserveObject('Type 0 widths')
12701267
toUnicodeMapObject = self.reserveObject('ToUnicode map')
12711268

1272-
subset_str = "".join(chr(c) for c in characters)
1273-
_log.debug("SUBSET %s characters: %s", filename, subset_str)
1274-
with _backend_pdf_ps.get_glyphs_subset(filename, subset_str) as subset:
1269+
_log.debug("SUBSET %s characters: %s", filename, glyphs)
1270+
with _backend_pdf_ps.get_glyphs_subset(filename, glyphs) as subset:
12751271
fontdata = _backend_pdf_ps.font_as_file(subset)
12761272
_log.debug(
12771273
"SUBSET %s %d -> %d", filename,
@@ -1319,11 +1315,11 @@ def embedTTFType42(font, characters, descriptor):
13191315
cid_to_gid_map = ['\0'] * 65536
13201316
widths = []
13211317
max_ccode = 0
1322-
for c in characters:
1323-
ccode = c
1324-
gind = font.get_char_index(ccode)
1325-
glyph = font.load_char(ccode,
1326-
flags=LoadFlags.NO_SCALE | LoadFlags.NO_HINTING)
1318+
charmap = {gind: ccode for ccode, gind in font.get_charmap().items()}
1319+
for gind in glyphs:
1320+
glyph = font.load_glyph(gind,
1321+
flags=LoadFlags.NO_SCALE | LoadFlags.NO_HINTING)
1322+
ccode = charmap[gind]
13271323
widths.append((ccode, cvt(glyph.horiAdvance)))
13281324
if ccode < 65536:
13291325
cid_to_gid_map[ccode] = chr(gind)
@@ -1361,11 +1357,10 @@ def embedTTFType42(font, characters, descriptor):
13611357
(len(unicode_groups), b"\n".join(unicode_bfrange)))
13621358

13631359
# Add XObjects for unsupported chars
1364-
glyph_ids = []
1365-
for ccode in characters:
1366-
if not _font_supports_glyph(fonttype, ccode):
1367-
gind = full_font.get_char_index(ccode)
1368-
glyph_ids.append(gind)
1360+
glyph_ids = [
1361+
gind for gind in glyphs
1362+
if not _font_supports_glyph(fonttype, charmap[gind])
1363+
]
13691364

13701365
bbox = [cvt(x, nearest=False) for x in full_font.bbox]
13711366
rawcharprocs = _get_pdf_charprocs(filename, glyph_ids)
@@ -1450,9 +1445,9 @@ def embedTTFType42(font, characters, descriptor):
14501445
}
14511446

14521447
if fonttype == 3:
1453-
return embedTTFType3(font, characters, descriptor)
1448+
return embedTTFType3(font, glyphs, descriptor)
14541449
elif fonttype == 42:
1455-
return embedTTFType42(font, characters, descriptor)
1450+
return embedTTFType42(font, glyphs, descriptor)
14561451

14571452
def alphaState(self, alpha):
14581453
"""Return name of an ExtGState that sets alpha to the given value."""
@@ -2215,28 +2210,32 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
22152210
oldx, oldy = 0, 0
22162211
unsupported_chars = []
22172212

2213+
font_charmaps = {}
22182214
self.file.output(Op.begin_text)
2219-
for font, fontsize, num, ox, oy in glyphs:
2220-
self.file._character_tracker.track_glyph(font, num)
2215+
for font, fontsize, glyph_index, ox, oy in glyphs:
2216+
self.file._character_tracker.track_glyph(font, glyph_index)
22212217
fontname = font.fname
2222-
if not _font_supports_glyph(fonttype, num):
2218+
if font not in font_charmaps:
2219+
font_charmaps[font] = {gind: ccode
2220+
for ccode, gind in font.get_charmap().items()}
2221+
ccode = font_charmaps[font].get(glyph_index)
2222+
if ccode is None or not _font_supports_glyph(fonttype, ccode):
22232223
# Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
22242224
# Type 42) must be emitted separately (below).
2225-
unsupported_chars.append((font, fontsize, ox, oy, num))
2225+
unsupported_chars.append((font, fontsize, ox, oy, glyph_index))
22262226
else:
22272227
self._setup_textpos(ox, oy, 0, oldx, oldy)
22282228
oldx, oldy = ox, oy
22292229
if (fontname, fontsize) != prev_font:
22302230
self.file.output(self.file.fontName(fontname), fontsize,
22312231
Op.selectfont)
22322232
prev_font = fontname, fontsize
2233-
self.file.output(self.encode_string(chr(num), fonttype),
2233+
self.file.output(self.encode_string(chr(ccode), fonttype),
22342234
Op.show)
22352235
self.file.output(Op.end_text)
22362236

2237-
for font, fontsize, ox, oy, num in unsupported_chars:
2238-
self._draw_xobject_glyph(
2239-
font, fontsize, font.get_char_index(num), ox, oy)
2237+
for font, fontsize, ox, oy, glyph_index in unsupported_chars:
2238+
self._draw_xobject_glyph(font, fontsize, glyph_index, ox, oy)
22402239

22412240
# Draw any horizontal lines in the math layout
22422241
for ox, oy, width, height in rects:
@@ -2399,7 +2398,7 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
23992398
singlebyte_chunks[-1][2].append(item.char)
24002399
prev_was_multibyte = False
24012400
else:
2402-
multibyte_glyphs.append((item.ft_object, item.x, item.glyph_idx))
2401+
multibyte_glyphs.append((item.ft_object, item.x, item.glyph_index))
24032402
prev_was_multibyte = True
24042403
# Do the rotation and global translation as a single matrix
24052404
# concatenation up front
@@ -2409,7 +2408,6 @@ def draw_text(self, gc, x, y, s, prop, angle, ismath=False, mtext=None):
24092408
-math.sin(a), math.cos(a),
24102409
x, y, Op.concat_matrix)
24112410
# Emit all the 1-byte characters in a BT/ET group.
2412-
24132411
self.file.output(Op.begin_text)
24142412
prev_start_x = 0
24152413
for ft_object, start_x, kerns_or_chars in singlebyte_chunks:

0 commit comments

Comments
 (0)