-
Notifications
You must be signed in to change notification settings - Fork 3
/
luametalatex-pdf-font-cff.lua
591 lines (589 loc) · 19.3 KB
/
luametalatex-pdf-font-cff.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
local readfile = require'luametalatex-readfile'
local sfnt = require'luametalatex-font-sfnt'
local stdStrings = require'luametalatex-font-cff-data'
local offsetfmt = ">I%i"
local function parse_index(buf, i)
local count, offsize
count, offsize, i = string.unpack(">I2B", buf, i)
if count == 0 then return {}, i-1 end
local fmt = offsetfmt:format(offsize)
local offsets = {}
local dataoffset = i + offsize*count - 1
for j=1,count+1 do
offsets[j], i = string.unpack(fmt, buf, i)
end
for j=1,count+1 do
offsets[j] = offsets[j] + i - 1
end
return offsets, offsets[#offsets]
end
local real_mapping = { [0] = '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'.', 'E', 'E-', nil, '-', nil}
local function parse_real(cs, offset)
local c = cs:byte(offset)
if not c then return offset end
local c1, c2 = real_mapping[c>>4], real_mapping[c&0xF]
if not c1 or not c2 then
return c1 or offset, c1 and offset
else
return c1, c2, parse_real(cs, offset+1) --Warning: This is not a tail-call,
-- so we are affected by the stack limit. On the other hand, as long as
-- there are less than ~50 bytes we should be safe.
end
end
local function get_number(result)
if #result ~= 1 then
print(require'inspect'(result))
end
assert(#result == 1)
local num = result[1]
result[1] = nil
return num
end
local function get_bool(result)
return get_number(result) == 1
end
local function get_string(result, strings)
local sid = get_number(result)
return stdStrings[sid] or strings[sid-#stdStrings]
end
local function get_array(result)
local arr = table.move(result, 1, #result, 1, {})
for i=1,#result do result[i] = nil end
return arr
end
local function get_delta(result)
local arr = get_array(result)
local last = 0
for i=1,#arr do
arr[i] = arr[i]+last
last = arr[i]
end
return arr
end
local function get_private(result)
local arr = get_array(result)
assert(#arr == 2)
return arr
end
local function get_ros(result, strings)
local arr = get_array(result)
assert(#arr == 3)
result[1] = arr[1] arr[1] = get_string(result, strings)
result[1] = arr[2] arr[2] = get_string(result, strings)
return arr
end
local function apply_matrix(m, x, y)
return (m[1] * x + m[3] * y + m[5])*1000, (m[2] * x + m[4] * y + m[6])*1000
end
local operators = {
[0] = {'version', get_string},
{'Notice', get_string},
{'FullName', get_string},
{'FamilyName', get_string},
{'Weight', get_string},
{'FontBBox', get_array},
{'BlueValues', get_delta},
{'OtherBlues', get_delta},
{'FamilyBlues', get_delta},
{'FamilyOtherBlues', get_delta},
{'StdHW', get_number},
{'StdVW', get_number},
nil, -- 12, escape
{'UniqueID', get_number},
{'XUID', get_array},
{'charset', get_number},
{'Encoding', get_number},
{'CharStrings', get_number},
{'Private', get_private},
{'Subrs', get_number},
{'defaultWidthX', get_number},
{'nominalWidthX', get_number},
[-1] = {'Copyright', get_string},
[-2] = {'isFixedPitch', get_bool},
[-3] = {'ItalicAngle', get_number},
[-4] = {'UnderlinePosition', get_number},
[-5] = {'UnderlineThickness', get_number},
[-6] = {'PaintType', get_number},
[-7] = {'CharstringType', get_number},
[-8] = {'FontMatrix', get_array},
[-9] = {'StrokeWidth', get_number},
[-10] = {'BlueScale', get_number},
[-11] = {'BlueShift', get_number},
[-12] = {'BlueFuzz', get_number},
[-13] = {'StemSnapH', get_delta},
[-14] = {'StemSnapV', get_delta},
[-15] = {'ForceBold', get_bool},
[-18] = {'LanguageGroup', get_number},
[-19] = {'ExpansionFactor', get_number},
[-20] = {'initialRandomSeed', get_number},
[-21] = {'SyntheticBase', get_number},
[-22] = {'PostScript', get_string},
[-23] = {'BaseFontName', get_string},
[-24] = {'BaseFontBlend', get_delta},
[-31] = {'ROS', get_ros},
[-32] = {'CIDFontVersion', get_number},
[-33] = {'CIDFontRevision', get_number},
[-34] = {'CIDFontType', get_number},
[-35] = {'CIDCount', get_number},
[-36] = {'UIDBase', get_number},
[-37] = {'FDArray', get_number},
[-38] = {'FDSelect', get_number},
[-39] = {'FontName', get_string},
}
local function parse_dict(buf, i, j, strings)
result = {}
while i<=j do
local cmd = buf:byte(i)
if cmd == 29 then
result[#result+1] = string.unpack(">i4", buf:sub(i+1, i+4))
i = i+4
elseif cmd == 28 then
result[#result+1] = string.unpack(">i2", buf:sub(i+1, i+2))
i = i+2
elseif cmd >= 251 then -- Actually "and cmd ~= 255", but 255 is reserved
result[#result+1] = -((cmd-251)*256)-string.byte(buf, i+1)-108
i = i+1
elseif cmd >= 247 then
result[#result+1] = (cmd-247)*256+string.byte(buf, i+1)+108
i = i+1
elseif cmd >= 32 then
result[#result+1] = cmd-139
elseif cmd == 30 then -- 31 is reserved again
local real = {parse_real(buf, i+1)}
i = real[#real]
real[#real] = nil
result[#result+1] = tonumber(table.concat(real))
else
if cmd == 12 then
i = i+1
cmd = -buf:byte(i)-1
end
local op = operators[cmd]
if not op then error[[Unknown CFF operator]] end
result[op[1]] = op[2](result, strings)
end
i = i+1
end
return result
end
local function parse_charstring(cs, globalsubrs, subrs, result)
result = result or {{false}, stemcount = 0}
local lastresult = result[#result]
local i = 1
while i~=#cs+1 do
local cmd = cs:byte(i)
if cmd == 28 then
lastresult[#lastresult+1] = string.unpack(">i2", cs:sub(i+1, i+2))
i = i+2
elseif cmd == 255 then
lastresult[#lastresult+1] = string.unpack(">i4", cs:sub(i+1, i+4))/0x10000
i = i+4
elseif cmd >= 251 then
lastresult[#lastresult+1] = -((cmd-251)*256)-string.byte(cs, i+1)-108
i = i+1
elseif cmd >= 247 then
lastresult[#lastresult+1] = (cmd-247)*256+string.byte(cs, i+1)+108
i = i+1
elseif cmd >= 32 then
lastresult[#lastresult+1] = cmd-139
elseif cmd == 10 then
local idx = lastresult[#lastresult]+subrs.bias
local subr = subrs[idx]
subrs.used[idx] = true
lastresult[#lastresult] = nil
parse_charstring(subr, globalsubrs, subrs, result)
lastresult = result[#result]
elseif cmd == 29 then
local idx = lastresult[#lastresult]+globalsubrs.bias
local subr = globalsubrs[idx]
globalsubrs.used[idx] = true
lastresult[#lastresult] = nil
parse_charstring(subr, globalsubrs, subrs, result)
lastresult = result[#result]
elseif cmd == 11 then
break -- We do not keep subroutines, so drop returns and continue with the outer commands
elseif cmd == 12 then
i = i+1
cmd = cs:byte(i)
lastresult[1] = -cmd-1
lastresult = {false}
result[#result+1] = lastresult
elseif cmd == 19 or cmd == 20 then
if #result == 1 then
lastresult = {}
result[#result+1] = lastresult
end
lastresult[1] = cmd
local newi = i+(result.stemcount+7)//8
lastresult[2] = cs:sub(i+1, newi)
i = newi
else
if cmd == 21 and #result == 1 then
table.insert(result, 1, {false})
if #lastresult == 4 then
result[1][2] = lastresult[2]
table.remove(lastresult, 2)
end
elseif (cmd == 4 or cmd == 22) and #result == 1 then
table.insert(result, 1, {false})
if #lastresult == 3 then
result[1][2] = lastresult[2]
table.remove(lastresult, 2)
end
elseif cmd == 14 and #result == 1 then
table.insert(result, 1, {false})
if #lastresult == 2 or #lastresult == 6 then
result[1][2] = lastresult[2]
table.remove(lastresult, 2)
end
elseif cmd == 1 or cmd == 3 or cmd == 18 or cmd == 23 then
if #result == 1 then
table.insert(result, 1, {false})
if #lastresult % 2 == 0 then
result[1][2] = lastresult[2]
table.remove(lastresult, 2)
end
end
result.stemcount = result.stemcount + #lastresult//2
end
lastresult[1] = cmd
lastresult = {false}
result[#result+1] = lastresult
end
i = i+1
end
return result
end
local function parse_charset(buf, i0, offset, strings, num)
if not offset then offset = 0 end
if offset == 0 then
return ISOAdobe
elseif offset == 1 then
return Expert
elseif offset == 2 then
return ExpertSubset
else offset = i0+offset end
local format
format, offset = string.unpack(">B", buf, offset)
local charset = {[0] = 0}
if format == 0 then
for i=1,num-1 do
charset[i], offset = string.unpack(">I2", buf, offset)
end
elseif format == 1 then
local i = 1
while i < num do
local first, nLeft
first, nLeft, offset = string.unpack(">I2I1", buf, offset)
for j=0,nLeft do
charset[i+j] = first+j
end
i = i+1+nLeft
end
elseif format == 2 then
local i = 1
while i < num do
local first, nLeft
first, nLeft, offset = string.unpack(">I2I2", buf, offset)
for j=0,nLeft do
charset[i+j] = first+j
end
i = i+1+nLeft
end
else
error[[Invalid Charset format]]
end
if strings then -- We are not CID-keyed, so we should use strings instead of numbers
local string_charset = {}
for i=#charset,0,-1 do
local sid = charset[i]
charset[i] = nil
string_charset[i] = stdStrings[sid] or strings[sid-#stdStrings]
end
charset = string_charset
end
return charset
end
local function parse_encoding(buf, i0, offset, CharStrings)
if not offset then offset = 0 end
if offset == 0 then
error[[TODO]]
return "StandardEncoding"
elseif offset == 1 then
error[[TODO]]
return "ExpertEncoding"
else offset = i0+offset end
local format, num
format, num, offset = string.unpack(">BB", buf, offset)
local encoding = {}
if format == 0 then
for i=1,num do
local code
code, offset = string.unpack(">B", buf, offset)
encoding[code] = CharStrings[i]
end
elseif format == 1 then
local i = 1
while i <= num do
local first, nLeft
first, nLeft, offset = string.unpack(">BB", buf, offset)
for j=0,nLeft do
encoding[first + j] = CharStrings[i + j]
end
i = i+1+nLeft
end
else
error[[Invalid Encoding format]]
end
return encoding
end
local function parse_fdselect(buf, offset, CharStrings)
local format
format, offset = string.unpack(">B", buf, offset)
if format == 0 then
for i=1,#CharStrings-1 do
local code
code, offset = string.unpack(">B", buf, offset)
CharStrings[i][3] = code + 1
end -- Reimplement with string.byte
elseif format == 3 then
local count, last
count, offset = string.unpack(">I2", buf, offset)
for i=1,count do
local first, code, after = string.unpack(">I2BI2", buf, offset)
for j=first, after-1 do
CharStrings[j][3] = code + 1
end
offset = offset + 3
end
else
error[[Invalid FDSelect format]]
end
end
local function applyencoding(buf, i, usedcids, encoding)
local usednames = {}
local numglyphs
numglyphs, i = string.unpack(">I2", buf, i)
local stroffset = 2*numglyphs + i
local names = setmetatable({}, {__index = function(t, i)
for j=#t+1,i do
t[j], stroffset = string.unpack("s1", buf, stroffset)
end
return t[i]
end})
local newusedcids = {}
for j=1,#usedcids do
local name = encoding[usedcids[j][1]]
if name then
local new = {old = usedcids[j]}
usednames[name], newusedcids[j] = new, new
else
newusedcids[j] = {j} -- FIXME: Someone used a character which does not exists in the encoding.
-- This should probably at least trigger a warning.
end
end
for j=1,numglyphs do
local name
name, i = string.unpack(">I2", buf, i)
if name < 258 then
name = stdnames[name]
else
name = names[name-257]
end
if usednames[name] then
usednames[name][1] = j-1
usednames[name] = nil
end
end
if next(usednames) then
error[[Missing character]]
end
return newusedcids
end
-- The encoding parameter might be:
-- an encoding dictionary - Use the supplied encoding
-- true - Use the build-in encoding
-- false - Use GIDs
-- nil - Use CIDs, falling back to GIDs in name.based fonts
function myfunc(buf, i0, fontid, usedcids, encoding, trust_widths)
-- return function(filename, fontid)
fontid = fontid or 1
local major, minor, hdrSize, offSize = string.unpack(">BBBB", buf, i0)
if major ~= 1 then error[[Unsupported CFF version]] end
-- local offfmt = offsetfmt:format(offSize)
local nameoffsets, topoffsets, stringoffsets, globalsubrs
local i = i0+hdrSize
nameoffsets, i = parse_index(buf, i)
topoffsets, i = parse_index(buf, i)
stringoffsets, i = parse_index(buf, i)
globalsubrs, i = parse_index(buf, i)
local strings = {}
for j=1,#stringoffsets-1 do
strings[j] = buf:sub(stringoffsets[j], stringoffsets[j+1]-1)
end
if #nameoffsets ~= #topoffsets then error[[Inconsistant size of FontSet]] end
if fontid >= #nameoffsets then error[[Invalid font id]] end
local top = parse_dict(buf, topoffsets[fontid], topoffsets[fontid+1]-1, strings)
top.FontName = buf:sub(nameoffsets[fontid], nameoffsets[fontid+1]-1)
local gsubrsdict = {}
for i=1,#globalsubrs-1 do
gsubrsdict[i] = buf:sub(globalsubrs[i], globalsubrs[i+1]-1)
end
gsubrsdict.used = {}
gsubrsdict.bias = #gsubrsdict < 1240 and 108 or #gsubrsdict < 33900 and 1132 or 32769
top.GlobalSubrs = gsubrsdict
local CharStrings = parse_index(buf, i0+top.CharStrings)
if not not encoding ~= encoding and (encoding or top.ROS) then -- If we use the build-in encoding *or* GIDs, we do not need to waste our time making sense of the charset
local charset = parse_charset(buf, i0, top.charset, not top.ROS and strings, #CharStrings-1)
named_charstrings = {}
for i=1,#CharStrings-1 do
named_charstrings[charset[i-1]] = {CharStrings[i], CharStrings[i+1]-1}
end
CharStrings = named_charstrings
else
for i=1,#CharStrings-1 do
CharStrings[i-1] = {CharStrings[i], CharStrings[i+1]-1}
end
CharStrings[#CharStrings] = nil
CharStrings[#CharStrings] = nil
end
-- top.CharStrings = named_charstrings
if not top.ROS then
if encoding == true then -- Use the built-in encoding
CharStrings = parse_encoding(buf, i0, top.Encoding, CharStrings)
elseif encoding then
encoding = require'luametalatex-font-enc'(encoding)
local encoded = {}
for i, n in pairs(encoding) do
encoded[i] = CharStrings[n]
end
CharStrings = encoded
end -- else: Use GIDs
top.Privates = {parse_dict(buf, i0+top.Private[2], i0+top.Private[2]+top.Private[1]-1, strings)}
local subrs = top.Privates[1].Subrs
if subrs then
subrs = parse_index(buf, i0+top.Private[2]+subrs)
local subrsdict ={}
for j=1,#subrs-1 do
subrsdict[j] = buf:sub(subrs[j], subrs[j+1]-1)
end
subrsdict.used = {}
subrsdict.bias = #subrsdict < 1240 and 108 or #subrsdict < 33900 and 1132 or 32769
top.Privates[1].Subrs = subrsdict
end
top.Private = nil
else
assert(not encoding) -- FIXME: If we actually get these from OpenType, the glyph names might be hidden there...
-- Would that even be allowed?
local fonts = parse_index(buf, i0+top.FDArray)
local privates = {}
top.Privates = privates
for i=1,#fonts-1 do
local font = fonts[i]
local fontdir = parse_dict(buf, fonts[i], fonts[i+1]-1, strings)
privates[i] = parse_dict(buf, i0+fontdir.Private[2], i0+fontdir.Private[2]+fontdir.Private[1]-1, strings)
privates[i].FontName = fontdir.FontName
local subrs = privates[i].Subrs
if subrs then
subrs = parse_index(buf, i0+fontdir.Private[2]+subrs)
local subrsdict ={}
for j=1,#subrs-1 do
subrsdict[j] = buf:sub(subrs[j], subrs[j+1]-1)
end
subrsdict.used = {}
subrsdict.bias = #subrsdict < 1240 and 108 or #subrsdict < 33900 and 1132 or 32769
privates[i].Subrs = subrsdict
end
end
top.FDArray = nil
parse_fdselect(buf, i0+top.FDSelect, CharStrings)
end
local glyphs = {}
if usedcids then -- Subsetting maybeFIXME: Should be Disabled, because other tables have to be fixed up first -- Actually seems to work now, let's test it a bit more
local usedfonts = {}
for i=1,#usedcids do
local cid = usedcids[i][1]
local cs = CharStrings[cid]
glyphs[i] = {cs = buf:sub(cs[1], cs[2]), index = cid, cidfont = cs[3], usedcid = usedcids[i]}
usedfonts[CharStrings[cid][3] or 1] = true
end
local lastfont = 0
for i=1,#top.Privates do
if usedfonts[i] then
lastfont = lastfont + 1
usedfonts[i] = lastfont
top.Privates[lastfont] = top.Privates[i]
end
end
for i=lastfont+1,#top.Privates do
top.Privates[i] = nil
end
for i=1,#glyphs do
glyphs[i].cidfont = usedfonts[glyphs[i].cidfont]
end
-- Subrs subsetting... Instead of deleting unused SubRS, we only make them empty.
-- This avoids problems with renumberings which would have to be consitant across
-- Fonts in some odd way, because they might be used by globalsubrs.
for i=1,#glyphs do
local g = glyphs[i]
local private = top.Privates[g.cidfont or 1]
local parsed = parse_charstring(g.cs, top.GlobalSubrs, private.Subrs)
local width = parsed[1][2]
if width then
width = width + (private.nominalWidthX or 0)
else
width = private.defaultWidthX or 0
end
local m = top.FontMatrix or {.001, 0, 0, .001, 0, 0}
width = width * m[1] + m[3] -- I really have no idea why m[3] /= 0 might happen, but why not?
width = math.floor(width*1000+.5) -- Thats rescale into "PDF glyph space"
if g.usedcid[2] ~= width then print("MISMATCH:", g.usedcid[1], g.usedcid[2], width) end
g.usedcid[2] = width
end
for i=1,#top.GlobalSubrs do
if not top.GlobalSubrs.used[i] then
top.GlobalSubrs[i] = ""
end
end
for _, priv in ipairs(top.Privates) do if priv.Subrs then
for i=1,#priv.Subrs do
if not priv.Subrs.used[i] then
priv.Subrs[i] = ""
end
end
end end
else
for i, cs in pairs(CharStrings) do -- Not subsetting
glyphs[#glyphs+1] = {cs = buf:sub(cs[1], cs[2]), index = i, cidfont = cs.font}
end
end
top.glyphs = glyphs
table.sort(glyphs, function(a,b)return a.index<b.index end)
local bbox
if top.FontMatrix then
local x0, y0 = apply_matrix(top.FontMatrix, top.FontBBox[1], top.FontBBox[2])
local x1, y1 = apply_matrix(top.FontMatrix, top.FontBBox[3], top.FontBBox[4])
bbox = {x0, y0, x1, y1}
else
bbox = top.FontBBox
end
return require'luametalatex-font-cff'(top), bbox
end
return function(filename, fontid, encoding) return function(fontdir, usedcids)
local file <close> = readfile('opentype', filename)
local buf = file()
local i = 1
local magic = buf:sub(1, 4)
if magic == "ttcf" or magic == "OTTO" then
-- assert(not encoding) -- nil or false
encoding = encoding or false
local magic, tables = sfnt.parse(buf, fontid) -- TODO: Interpret widths etc, they might differ from the CFF ones.
assert(magic == "OTTO")
-- Also CFF2 would be nice to have
i = tables['CFF '][1]
end
local content, bbox = myfunc(buf, i, fontid, usedcids, encoding)
fontdir.bbox = bbox
return content
end end