Skip to content

Commit

Permalink
[PERFORMANCE] Improve FileSystemFontProvider.scanFonts() performance …
Browse files Browse the repository at this point in the history
…by adding 'only headers' mode to TTF parser:

* only read tables needed for FSFontInfo ('name', 'head', 'OS/2', 'CFF ', 'gcid')
* 'CFF ' and 'head' table parsers finish as soon as it has all needed headers
  • Loading branch information
bogdiuk committed Jun 30, 2024
1 parent cb29a4a commit 54c4eb0
Show file tree
Hide file tree
Showing 10 changed files with 618 additions and 129 deletions.
107 changes: 91 additions & 16 deletions fontbox/src/main/java/org/apache/fontbox/cff/CFFParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fontbox.ttf.FontHeaders;
import org.apache.pdfbox.io.RandomAccessRead;


Expand Down Expand Up @@ -106,13 +107,61 @@ public List<CFFFont> parse(RandomAccessRead randomAccessRead) throws IOException
}

/**
* Parse CFF font using a DataInput as input.
* Extract "Registry", "Ordering" and "Supplement" properties from the first CFF subfont.
*
* @param input the source to be parsed
* @param randomAccessRead the source to be parsed
* @param outHeaders where to put results
* @return the parsed CFF fonts
* @throws IOException If there is an error reading from the stream
*/
private List<CFFFont> parse(DataInput input) throws IOException
public void parseFirstSubFontROS(RandomAccessRead randomAccessRead, FontHeaders outHeaders) throws IOException
{
// this method is a simplified and merged version of parse(RandomAccessRead) > parse(DataInput) > parseFont(...)

// start code from parse(RandomAccessRead)
randomAccessRead.seek(0);
DataInput input = new DataInputRandomAccessRead(randomAccessRead);

// start code from parse(DataInput)
input = skipHeader(input);
String[] nameIndex = readStringIndexData(input);
if (nameIndex.length == 0)
{
outHeaders.setError("Name index missing in CFF font");
return;
}
byte[][] topDictIndex = readIndexData(input);
if (topDictIndex.length == 0)
{
outHeaders.setError("Top DICT INDEX missing in CFF font");
return;
}

// 'stringIndex' is required by 'parseROS() > readString()'
stringIndex = readStringIndexData(input);

// start code from parseFont(...)
DataInputByteArray topDictInput = new DataInputByteArray(topDictIndex[0]);
DictData topDict = readDictData(topDictInput);

DictData.Entry syntheticBaseEntry = topDict.getEntry("SyntheticBase");
if (syntheticBaseEntry != null)
{
outHeaders.setError("Synthetic Fonts are not supported");
return;
}

CFFCIDFont cffCIDFont = parseROS(topDict);
if (cffCIDFont != null)
{
outHeaders.setOtfROS(
cffCIDFont.getRegistry(),
cffCIDFont.getOrdering(),
cffCIDFont.getSupplement());
}
}

private DataInput skipHeader(DataInput input) throws IOException
{
String firstTag = readTagName(input);
// try to determine which kind of font we have
Expand All @@ -132,6 +181,19 @@ private List<CFFFont> parse(DataInput input) throws IOException

@SuppressWarnings("unused")
Header header = readHeader(input);
return input;
}

/**
* Parse CFF font using a DataInput as input.
*
* @param input the source to be parsed
* @return the parsed CFF fonts
* @throws IOException If there is an error reading from the stream
*/
private List<CFFFont> parse(DataInput input) throws IOException
{
input = skipHeader(input);
String[] nameIndex = readStringIndexData(input);
if (nameIndex.length == 0)
{
Expand Down Expand Up @@ -463,6 +525,28 @@ private static Double readRealNumber(DataInput input) throws IOException
}
}

/**
* Extracts Registry, Ordering and Supplement from {@code topDict["ROS"]}.
*/
private CFFCIDFont parseROS(DictData topDict) throws IOException
{
// determine if this is a Type 1-equivalent font or a CIDFont
DictData.Entry rosEntry = topDict.getEntry("ROS");
if (rosEntry != null)
{
if (rosEntry.size() < 3)
{
throw new IOException("ROS entry must have 3 elements");
}
CFFCIDFont cffCIDFont = new CFFCIDFont();
cffCIDFont.setRegistry(readString(rosEntry.getNumber(0).intValue()));
cffCIDFont.setOrdering(readString(rosEntry.getNumber(1).intValue()));
cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue());
return cffCIDFont;
}
return null;
}

private CFFFont parseFont(DataInput input, String name, byte[] topDictIndex) throws IOException
{
// top dict
Expand All @@ -476,21 +560,12 @@ private CFFFont parseFont(DataInput input, String name, byte[] topDictIndex) thr
throw new IOException("Synthetic Fonts are not supported");
}

// determine if this is a Type 1-equivalent font or a CIDFont
CFFFont font;
boolean isCIDFont = topDict.getEntry("ROS") != null;
if (isCIDFont)
CFFCIDFont cffCIDFont = parseROS(topDict);
// determine if this is a Type 1-equivalent font or a CIDFont
boolean isCIDFont = cffCIDFont != null;
if (cffCIDFont != null)
{
CFFCIDFont cffCIDFont = new CFFCIDFont();
DictData.Entry rosEntry = topDict.getEntry("ROS");
if (rosEntry == null || rosEntry.size() < 3)
{
throw new IOException("ROS entry must have 3 elements");
}
cffCIDFont.setRegistry(readString(rosEntry.getNumber(0).intValue()));
cffCIDFont.setOrdering(readString(rosEntry.getNumber(1).intValue()));
cffCIDFont.setSupplement(rosEntry.getNumber(2).intValue());

font = cffCIDFont;
}
else
Expand Down
23 changes: 23 additions & 0 deletions fontbox/src/main/java/org/apache/fontbox/ttf/CFFTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import java.io.IOException;
import org.apache.fontbox.cff.CFFFont;
import org.apache.fontbox.cff.CFFParser;
import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.io.RandomAccessReadBuffer;

/**
* PostScript font program (compact font format).
Expand Down Expand Up @@ -56,6 +58,27 @@ void read(TrueTypeFont ttf, TTFDataStream data) throws IOException
initialized = true;
}

/** {@inheritDoc} */
@Override
void readHeaders(TrueTypeFont ttf, TTFDataStream data, FontHeaders outHeaders) throws IOException
{
try (RandomAccessRead subReader = data.createSubView(getLength()))
{
RandomAccessRead reader;
if (subReader != null)
{
reader = subReader;
}
else
{
assert false : "It is inefficient to read TTFDataStream into an array";
byte[] bytes = data.read((int)getLength());
reader = new RandomAccessReadBuffer(bytes);
}
new CFFParser().parseFirstSubFontROS(reader, outHeaders);
}
}

/**
* Returns the CFF font, which is a compact representation of a PostScript Type 1, or CIDFont
*
Expand Down
152 changes: 152 additions & 0 deletions fontbox/src/main/java/org/apache/fontbox/ttf/FontHeaders.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.fontbox.ttf;

/**
* To improve performance of {@code FileSystemFontProvider.scanFonts(...)},
* this class is used both as a marker (to skip unused data) and as a storage for collected data.
* <p>
* Tables it needs:<ul>
* <li>NamingTable.TAG
* <li>HeaderTable.TAG
* <li>OS2WindowsMetricsTable.TAG
* <li>CFFTable.TAG (for OTF)
* <li>"gcid" (for non-OTF)
* </ul>
*
* @author Mykola Bohdiuk
*/
public final class FontHeaders
{
static final int BYTES_GCID = 142;

private String error;
private String name;
private Integer headerMacStyle;
private OS2WindowsMetricsTable os2Windows;
private String fontFamily;
private String fontSubFamily;
private byte[] nonOtfGcid142;
//
private boolean isOTFAndPostScript;
private String otfRegistry;
private String otfOrdering;
private int otfSupplement;

public String getError()
{
return error;
}

public String getName()
{
return name;
}

/**
* null == no HeaderTable, {@code ttf.getHeader().getMacStyle()}
*/
public Integer getHeaderMacStyle()
{
return headerMacStyle;
}

public OS2WindowsMetricsTable getOS2Windows()
{
return os2Windows;
}

// only when LOGGER(FileSystemFontProvider).isTraceEnabled() tracing: FontFamily, FontSubfamily
public String getFontFamily()
{
return fontFamily;
}

public String getFontSubFamily()
{
return fontSubFamily;
}

public boolean isOpenTypePostScript()
{
return isOTFAndPostScript;
}

public byte[] getNonOtfTableGCID142()
{
return nonOtfGcid142;
}

public String getOtfRegistry()
{
return otfRegistry;
}

public String getOtfOrdering()
{
return otfOrdering;
}

public int getOtfSupplement()
{
return otfSupplement;
}

public void setError(String exception)
{
this.error = exception;
}

void setName(String name)
{
this.name = name;
}

void setHeaderMacStyle(Integer headerMacStyle)
{
this.headerMacStyle = headerMacStyle;
}

void setOs2Windows(OS2WindowsMetricsTable os2Windows)
{
this.os2Windows = os2Windows;
}

void setFontFamily(String fontFamily, String fontSubFamily)
{
this.fontFamily = fontFamily;
this.fontSubFamily = fontSubFamily;
}

void setNonOtfGcid142(byte[] nonOtfGcid142)
{
this.nonOtfGcid142 = nonOtfGcid142;
}

void setIsOTFAndPostScript(boolean isOTFAndPostScript)
{
this.isOTFAndPostScript = isOTFAndPostScript;
}

// public because CFFParser is in a different package
public void setOtfROS(String otfRegistry, String otfOrdering, int otfSupplement)
{
this.otfRegistry = otfRegistry;
this.otfOrdering = otfOrdering;
this.otfSupplement = otfSupplement;
}
}
10 changes: 10 additions & 0 deletions fontbox/src/main/java/org/apache/fontbox/ttf/HeaderTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,16 @@ public class HeaderTable extends TTFTable
super();
}

/** {@inheritDoc} */
@Override
void readHeaders(TrueTypeFont ttf, TTFDataStream data, FontHeaders outHeaders) throws IOException
{
// 44 == 4 + 4 + 4 + 4 + 2 + 2 + 2*8 + 4*2, see read()
data.seek(data.getCurrentPosition() + 44);
macStyle = data.readUnsignedShort();
outHeaders.setHeaderMacStyle(macStyle);
}

/**
* This will read the required data from the stream.
*
Expand Down
Loading

0 comments on commit 54c4eb0

Please sign in to comment.