Skip to content

Commit

Permalink
Port some of the binary parsing utilities to BinaryParsingTools
Browse files Browse the repository at this point in the history
  • Loading branch information
serenity4 committed Jun 5, 2024
1 parent 9f90e8c commit 5f82131
Show file tree
Hide file tree
Showing 7 changed files with 13 additions and 249 deletions.
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
name = "OpenType"
uuid = "e5a4412f-5132-4ec5-841c-0d067f41fa94"
authors = ["Cédric Belmant"]
version = "0.1.4"
authors = ["Cédric Belmant"]

[deps]
Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
BinaryParsingTools = "6663bfc4-f836-4634-93ec-aee42a53a86a"
BitMasks = "a3e06817-fd65-4797-8291-16f435bc2529"
Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Expand All @@ -16,6 +17,7 @@ StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
SwapStreams = "0b152c4a-7e29-418b-9258-223db38db9d9"

[compat]
BinaryParsingTools = "0.1"
BitMasks = "0.1"
Colors = "0.12"
Dates = "1"
Expand Down
7 changes: 1 addition & 6 deletions src/OpenType.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
module OpenType

using Dates
using SwapStreams
using BinaryParsingTools
using GeometryExperiments
using GeometryExperiments: decompactify
using Colors
Expand Down Expand Up @@ -54,11 +54,6 @@ export GlyphID,
TextLimits, TextOptions,
text_glyphs,

@tag_str,
@tag2_str,
@tag3_str,
@tag4_str,

shape,
ShapingOptions,
ShapingInfo,
Expand Down
13 changes: 6 additions & 7 deletions src/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,9 @@ A specification for OpenType font files is available
at https://docs.microsoft.com/en-us/typography/opentype/spec/otff
"""

Base.read(io::IOBuffer, ::Type{OpenTypeData}; verify_checksums::Bool = true) = read(correct_endianess(io), OpenTypeData; verify_checksums)
Base.read(io::IO, ::Type{OpenTypeData}; verify_checksums::Bool = true) = read(IOBuffer(read(io)), OpenTypeData; verify_checksums)
BinaryParsingTools.swap_endianness(io::IO, ::Type{OpenTypeData}) = peek(io, UInt32) == 0x00000100

function Base.read(io::Union{SwapStream,TracedIO{<:SwapStream}}, ::Type{OpenTypeData}; verify_checksums::Bool = true)
function Base.read(io::Union{BinaryIO, TracedIO{<:BinaryIO}}, ::Type{OpenTypeData}; verify_checksums::Bool = true)
table_directory, nav = TableNavigationMap(io)
if verify_checksums
ret = @__MODULE__().verify_checksums(io, nav)
Expand All @@ -63,7 +62,7 @@ function Base.read(io::Union{SwapStream,TracedIO{<:SwapStream}}, ::Type{OpenType
read(io, OpenTypeData, table_directory, nav)
end

function TableNavigationMap(io::Union{SwapStream,TracedIO{<:SwapStream}})
function TableNavigationMap(io::IO)
sfnt = peek(io, UInt32)
sfnt in (0x00010000, 0x4F54544F) || error_invalid_font("Invalid format: unknown SFNT version (expected 0x00010000 or 0x4F54544F). The provided IO may not describe an OpenType font, or may describe one that is not conform to the OpenType specification.")
table_directory = read(io, TableDirectory)
Expand All @@ -72,7 +71,7 @@ function TableNavigationMap(io::Union{SwapStream,TracedIO{<:SwapStream}})
table_directory, nav
end

function Base.read(io::Union{SwapStream,TracedIO{<:SwapStream}}, ::Type{OpenTypeData}, table_directory::TableDirectory, nav::TableNavigationMap)
function Base.read(io::IO, ::Type{OpenTypeData}, table_directory::TableDirectory, nav::TableNavigationMap)
cmap = read_table(Base.Fix2(read, CharacterToGlyphIndexMappingTable), io, nav, tag"cmap")::CharacterToGlyphIndexMappingTable
head = read_table(Base.Fix2(read, FontHeader), io, nav, tag"head")::FontHeader
hhea = read_table(Base.Fix2(read, HorizontalHeader), io, nav, tag"hhea")::HorizontalHeader
Expand All @@ -99,10 +98,10 @@ end

function OpenTypeData(file::AbstractString; verify_checksums::Bool = true, debug::Bool = false)
open(file) do io
io = correct_endianess(IOBuffer(read(io)))
if !debug
read(io, OpenTypeData; verify_checksums)
read_binary(io, OpenTypeData; verify_checksums)
else
io = BinaryParsingTools.BinaryIO(BinaryParsingTools.swap_endianness(io, OpenTypeData), io)
io = TracedIO(io)
table_directory, nav = TableNavigationMap(io)
read(io, OpenTypeData)
Expand Down
188 changes: 1 addition & 187 deletions src/parse.jl
Original file line number Diff line number Diff line change
@@ -1,190 +1,4 @@
"""
Return an IO that will always read in the right endianness.
"""
function correct_endianess(io::IO)
SwapStream(peek(io, UInt32) == 0x00000100, io)
end

function word_align(size)
4 * cld(size, 4)
end

function read_expr(field, linenum::LineNumberNode)
if isexpr(field, :(::))
T = field.args[2]
isexpr(T, :curly) && T.args[1] == :Vector && error("Vectors must have a corresponding length.")
isexpr(T, :curly) && T.args[1] == :NTuple && return :(Tuple(read(io, $(T.args[2]) for _ in 1:$(T.args[3]))))
T == :String && error("Strings are not supported yet.")
return :(read(io, $T))
elseif isexpr(field, :call) && field.args[1] == :(=>)
field, length = field.args[2:3]
if isexpr(field, :(::))
T = last(field.args)
isexpr(T, :curly, 2) && T.args[1] == :Vector && return :([read(io, $(T.args[2])) for _ in 1:$length])
end
elseif isexpr(field, :call) && field.args[1] == :(<<)
ex = field.args[3]
# Add linenum info to comprehensions to make stack traces more readable.
isexpr(ex, :comprehension) && (ex.args[1].args[1] = Expr(:block, linenum, ex.args[1].args[1]))
return ex
end
error("Unexpected expression form: $field")
end

function serializable(ex, source::LineNumberNode)
!isexpr(ex, :struct) && error("Expected a struct definition, got $(repr(ex))")
typedecl, fields = ex.args[2:3]
fields = isexpr(fields, :block) ? fields.args : [fields]

argmeta = Expr[]
filter!(fields) do ex
if isexpr(ex, :macrocall) && ex.args[1] == Symbol("@arg")
push!(argmeta, ex)
false
else
true
end
end

t = typedecl
isexpr(t, :(<:)) && (t = first(t.args))
isexpr(t, :curly) && error("Parametric types are not supported.")
@assert t isa Symbol
exprs = Expr[]
lengths = Dict{Symbol,Any}()
fieldnames = Symbol[]
field_linenums = LineNumberNode[]
fields_nolinenums = filter(fields) do x
!isa(x, LineNumberNode) && return true
push!(field_linenums, x)
false
end
# Ignore linenums for `@arg x` definitions.
field_linenums = field_linenums[begin + length(argmeta):end]
required_fields = Symbol[]
fields_withlength = Symbol[]
for ex in fields_nolinenums
if isexpr(ex, :call) && ex.args[1] == :(=>)
(field, l) = ex.args[2:3]
isexpr(field, :(::)) && (field = first(field.args))
lengths[field] = l
push!(fieldnames, field)
push!(fields_withlength, field)
isa(l, Symbol) && push!(required_fields, l)
continue
else
isexpr(ex, :call) && ex.args[1] == :(<<) && (ex = ex.args[2])
if isexpr(ex, :(::))
push!(fieldnames, first(ex.args))
continue
end
end
error("Field $(repr(ex)) must be typed.")
end

body = Expr(:block, source, :(__origin__ = position(io)))
for (linenum, var, field) in zip(field_linenums, fieldnames, fields_nolinenums)
push!(body.args, linenum, :($var = $(read_expr(field, linenum))))
end
push!(body.args, :($t($(fieldnames...))))
fdecl = :(Base.read(io::IO, ::Type{$t}))
for ex in argmeta
if isexpr(ex, :macrocall) && ex.args[1] == Symbol("@arg")
argex = last(ex.args)
Meta.isexpr(argex, :(=)) && (argex.head = :kw)
push!(fdecl.args, argex)
end
end
read_f = Expr(:function, fdecl, body)

fields = map(fields) do ex
isexpr(ex, :call) && return ex.args[2]
ex
end
struct_def = Expr(:struct, ex.args[1:2]..., Expr(:block, fields...))
quote
Core.@__doc__ $struct_def
$read_f
end
end

"""
Mark a given struct as serializable, automatically implementing `Base.read`.
If some of the structure members are vectors, their length
must be specified using a syntax of the form `params::Vector{UInt32} => param_count`
where `param_count` can be any expression, which may depend on other structure members.
Fields can be read in a custom manner by using a syntax of the form
`params::SomeField << ex` where `ex` can be e.g. `read(io, SomeField, other_field.length)`
where `other_field` can refer to any previous field in the struct. This expression may
refer to a special variable `__origin__`, which is the position of the IO before parsing the struct.y hb
Additional arguments required for `Base.read` can be specified with the syntax `@arg name` at the very start of the structure,
before any actual fields. In this way, the definition for `Base.read` will include these extra arguments. Calling code
will then have to provide these extra arguments.
`LineNumberNode`s will be preserved and inserted wherever necessary to keep stack traces informative.
# Examples
```julia
@serializable struct MarkArrayTable
mark_count::UInt16
mark_records::Vector{MarkRecord} => mark_count
end
```
```julia
@serializable struct LigatureAttachTable
@arg mark_class_count # will need to be provided when `Base.read`ing this type.
# Length of `component_records`.
component_count::UInt16
component_records::Vector{Vector{UInt16}} << [[read(io, UInt16) for _ in 1:mark_class_count] for _ in 1:component_count]
end
```
Here is an advanced example which makes use of all the features:
```julia
@serializable struct LigatureArrayTable
@arg mark_class_count # will need to be provided when `Base.read`ing this type.
# Length of `ligature_attach_offsets`.
ligature_count::UInt16
# Offsets in bytes from the origin of the structure to data blocks formatted as `LigatureAttachTable`s.
ligature_attach_offsets::Vector{UInt16} => ligature_count
ligature_attach_tables::Vector{LigatureAttachTable} << [read_at(io, LigatureAttachTable, offset, mark_class_count; start = __origin__) for offset in ligature_attach_offsets]
end
```
"""
macro serializable(ex)
try
ex = serializable(ex, __source__)
catch
(; file, line) = __source__
@error "An error happened while parsing an expression at $file:$line"
rethrow()
end

esc(ex)
end

"""
Read a value of type `T` located at an offset from a given start (defaulting
to the current position), without modifying the stream position.
"""
function read_at(io::IO, @nospecialize(T), offset, args...; start = position(io))
pos = position(io)
seek(io, start + offset)
val = read(io, T, args...)
seek(io, pos)
val
end
word_align(size) = 4 * cld(size, 4)

include("parsing/table_records.jl")
include("parsing/font_header.jl")
Expand Down
35 changes: 0 additions & 35 deletions src/tags.jl
Original file line number Diff line number Diff line change
@@ -1,38 +1,3 @@
"N-string tag."
struct Tag{N}
data::NTuple{N, UInt8}
end

function Tag{N}(str::AbstractString) where {N}
chars = collect(str)
length(chars) == N || error("Expected $N-character string for tag, got string \"$str\" with length $(length(chars)).")
for c in chars
isascii(c) || error("Tags must be ASCII strings, got non-ASCII character '$c' for \"$str\".")
end
Tag(ntuple(i -> UInt8(chars[i]), N))
end

Tag(str::AbstractString) = Tag{length(str)}(str)

const Tag2 = Tag{2}
const Tag3 = Tag{3}
const Tag4 = Tag{4}

Base.uppercase(tag::Tag) = Tag(UInt8.((uppercase.(Char.(tag.data)))))
Base.lowercase(tag::Tag) = Tag(UInt8.((lowercase.(Char.(tag.data)))))

macro tag_str(str) Tag(str) end
macro tag2_str(str) Tag2(str) end
macro tag3_str(str) Tag3(str) end
macro tag4_str(str) Tag4(str) end

Base.read(io::IO, T::Type{Tag{N}}) where {N} = T(ntuple(_ -> read(io, UInt8), N))
Base.show(io::IO, tag::Tag) = print(io, '"', join(Char.(tag.data)), '"')
Base.string(tag::Tag) = join(Char.(tag.data))
Base.convert(::Type{Tag}, str::AbstractString) = Tag(str)
Base.convert(::Type{Tag{N}}, str::AbstractString) where {N} = Tag{N}(str)
Base.isless(x::Tag, y::Tag) = isless(string(x), string(y))

include("generated/tags.jl")

function find_language_tag(tag::Tag4)
Expand Down
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using OpenType, Test
using OpenType: Tag, Tag2, Tag3, Tag4, Text, lines, extract_style_from_text, CharacterStyle
using OpenType: Text, lines, extract_style_from_text, CharacterStyle
using BinaryParsingTools
using GeometryExperiments: Point2
using Accessors: @set, @reset
using HarfBuzz_jll: libharfbuzz
Expand Down
12 changes: 0 additions & 12 deletions test/tags.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,6 @@ using OpenType: find_language_tag, find_script_tag
using Test

@testset "Tags" begin
t = Tag("FRA ")
@test isa(t, Tag4)
@test convert(Tag, "FRA ") === t
@test convert(Tag4, "FRA ") === t
@test tag"FRA " === t
@test tag"FRA" !== tag"FRA "
@test isa(tag"FRA", Tag3)
@test_throws "4-character" Tag4("FRA")
@test_throws "ASCII" Tag("FRAα")
@test uppercase(tag"fr") === tag"FR"
@test lowercase(tag"FR") === tag"fr"

@testset "Language tags" begin
res = tag"FRA "
@test find_language_tag(tag"FRA ") === res
Expand Down

0 comments on commit 5f82131

Please sign in to comment.