diff --git a/pdfbox/src/main/java/org/apache/pdfbox/cos/COSBase.java b/pdfbox/src/main/java/org/apache/pdfbox/cos/COSBase.java index cf96c2cde9e..31e217267e2 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/cos/COSBase.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/cos/COSBase.java @@ -29,6 +29,7 @@ public abstract class COSBase implements COSObjectable { private boolean direct; private COSObjectKey key; + private COSObject referencedObject; /** * Constructor. @@ -98,4 +99,13 @@ public void setKey(COSObjectKey key) this.key = key; } + public COSObject getReferencedObject() + { + return referencedObject; + } + + public void setReferencedObject(COSObject referencedObject) + { + this.referencedObject = referencedObject; + } } diff --git a/pdfbox/src/main/java/org/apache/pdfbox/cos/COSObject.java b/pdfbox/src/main/java/org/apache/pdfbox/cos/COSObject.java index f65b2c57cbc..34217e6c8f1 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/cos/COSObject.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/cos/COSObject.java @@ -106,24 +106,49 @@ public boolean isObjectNull() */ public COSBase getObject() { - if (!isDereferenced && parser != null) + COSBase dereferencedObject = dereferenceObject(); + + if(dereferencedObject != null) + { + isDereferenced = true; + baseObject = dereferencedObject; + } + + return baseObject; + } + + public COSBase getObjectWithoutCaching() { + if(baseObject != null) { + return baseObject; + } + + return dereferenceObject(); + } + + private COSBase dereferenceObject() + { + COSBase dereferencedObject = null; + + if(!isDereferenced && parser != null) { try { // mark as dereferenced to avoid endless recursions - isDereferenced = true; - baseObject = parser.dereferenceCOSObject(this); + dereferencedObject = parser.dereferenceCOSObject(this); + + if(dereferencedObject != null) + { + dereferencedObject.setReferencedObject(this); + } + } - catch (IOException e) + catch(IOException e) { LOG.error("Can't dereference " + this, e); } - finally - { - parser = null; - } } - return baseObject; + + return dereferencedObject; } /** diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java index 5b0732c797e..b5b5681c526 100644 --- a/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java @@ -30,9 +30,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Deque; -import java.util.HashMap; import java.util.HashSet; -import java.util.Hashtable; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -50,7 +48,6 @@ import org.apache.pdfbox.cos.COSInteger; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSNull; -import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSObjectKey; import org.apache.pdfbox.cos.COSStream; @@ -185,15 +182,6 @@ public class COSWriter implements ICOSVisitor // indicates whether existing object keys should be reused or not private boolean reuseObjectNumbers = true; - // maps the object to the keys generated in the writer - // these are used for indirect references in other objects - //A hashtable is used on purpose over a hashmap - //so that null entries will not get added. - @SuppressWarnings({"squid:S1149"}) - private final Map objectKeys = new Hashtable<>(); - - private final Map keyObject = new HashMap<>(); - // the list of x ref entries to be made so far private final List xRefEntries = new ArrayList<>(); @@ -231,6 +219,8 @@ public class COSWriter implements ICOSVisitor private final CompressParameters compressParameters; private boolean blockAddingObject = false; + private COSWriterObjectStorage objectStorage; + /** * COSWriter constructor. * @@ -253,6 +243,7 @@ public COSWriter(OutputStream outputStream, CompressParameters compressParameter setOutput(outputStream); setStandardOutput(new COSStandardOutputStream(output)); this.compressParameters = compressParameters; + this.objectStorage = new COSWriterObjectStorage(); } /** @@ -279,6 +270,8 @@ public COSWriter(OutputStream outputStream, RandomAccessRead inputData) throws I incrementalInput = inputData; incrementalOutput = outputStream; incrementalUpdate = true; + + this.objectStorage = new COSWriterObjectStorage(); } /** @@ -323,24 +316,6 @@ public boolean isCompress() { return compressParameters != null && compressParameters.isCompress(); } - - private void prepareIncrement() - { - COSDocument cosDoc = pdDocument.getDocument(); - Set keySet = cosDoc.getXrefTable().keySet(); - for (COSObjectKey cosObjectKey : keySet) - { - COSBase object = cosDoc.getObjectFromPool(cosObjectKey).getObject(); - if (object != null && cosObjectKey != null && !(object instanceof COSNumber)) - { - // FIXME see PDFBOX-4997: objectKeys is (theoretically) risky because a COSName in - // different objects would appear only once. Rev 1092855 considered this - // but only for COSNumber. - objectKeys.put(object, cosObjectKey); - keyObject.put(cosObjectKey, object); - } - } - } /** * add an entry in the x ref table for later dump. @@ -474,21 +449,19 @@ private void doWriteBodyCompressed(COSDocument document) throws IOException { COSBase object = compressionPool.getObject(key); writtenObjects.add(object); - objectKeys.put(object, key); - keyObject.put(key, object); + objectStorage.put(key, object); } // Append top level objects to document. for (COSObjectKey key : compressionPool.getTopLevelObjects()) { COSBase object = compressionPool.getObject(key); writtenObjects.add(object); - objectKeys.put(object, key); - keyObject.put(key, object); + objectStorage.put(key, object); } for (COSObjectKey key : compressionPool.getTopLevelObjects()) { currentObjectKey = key; - doWriteObject(key, keyObject.get(key)); + doWriteObject(key, objectStorage.getObject(key)); } // Append object streams to document. number = compressionPool.getHighestXRefObjectNumber(); @@ -520,8 +493,7 @@ private void doWriteBodyCompressed(COSDocument document) throws IOException COSObjectKey encryptKey = new COSObjectKey(++number, 0); currentObjectKey = encryptKey; writtenObjects.add(encrypt); - keyObject.put(encryptKey, encrypt); - objectKeys.put(encrypt, encryptKey); + objectStorage.put(encryptKey, encrypt); doWriteObject(encryptKey, encrypt); } @@ -543,11 +515,7 @@ private void addObjectToWrite( COSBase object ) { return; } - COSBase actual = object; - if( actual instanceof COSObject ) - { - actual = ((COSObject)actual).getObject(); - } + COSBase actual = objectStorage.convertToActual(object); if (!writtenObjects.contains(object) // && !objectsToWrite.contains(object) // @@ -557,13 +525,13 @@ private void addObjectToWrite( COSBase object ) COSObjectKey cosObjectKey = null; if (actual != null) { - cosObjectKey = objectKeys.get(actual); + cosObjectKey = objectStorage.getKey(actual); } if (cosObjectKey != null) { - cosBase = keyObject.get(cosObjectKey); + cosBase = objectStorage.getObject(cosObjectKey); } - if (actual != null && objectKeys.containsKey(actual) && !isNeedToBeUpdated(object) + if (actual != null && objectStorage.getKey(actual) != null && !isNeedToBeUpdated(object) && !isNeedToBeUpdated(cosBase)) { return; @@ -1052,7 +1020,7 @@ else if (last == -2) } return list.toArray(new Long[list.size()]); } - + /** * This will get the object key for the object. * @@ -1062,25 +1030,42 @@ else if (last == -2) */ private COSObjectKey getObjectKey( COSBase obj ) { - COSBase actual = obj; - if( actual instanceof COSObject ) + if( + reuseObjectNumbers && + obj instanceof COSObject && + obj.getKey() != null + ) { - if (reuseObjectNumbers) - { - COSObjectKey key = obj.getKey(); - if (key != null) - { - objectKeys.put(obj, key); - return key; - } - } - actual = ((COSObject) obj).getObject(); + objectStorage.put(obj.getKey(), obj); + return obj.getKey(); } + + COSBase actual = objectStorage.convertToActual(obj); + // PDFBOX-4540: because objectKeys is accessible from outside, it is possible // that a COSObject obj is already in the objectKeys map. - return objectKeys.computeIfAbsent(actual, k -> new COSObjectKey(++number, 0)); + COSObjectKey key = objectStorage.getKey(obj); + if( key == null && actual != null ) + { + key = objectStorage.getKey(actual); + } + if (key == null) + { + key = new COSObjectKey(++number, 0); + if( actual != null ) + { + objectStorage.put(key, actual); + } + else + { + objectStorage.put(key, obj); + } + } + + return key; } + @Override public Object visitFromArray( COSArray obj ) throws IOException { @@ -1103,7 +1088,7 @@ public Object visitFromArray( COSArray obj ) throws IOException } else if( current instanceof COSObject ) { - COSBase subValue = ((COSObject)current).getObject(); + COSBase subValue = objectStorage.convertToActual(current); if (willEncrypt || incrementalUpdate // || subValue instanceof COSDictionary // || subValue instanceof COSArray // @@ -1208,7 +1193,7 @@ public Object visitFromDictionary(COSDictionary obj) throws IOException } else if( value instanceof COSObject ) { - COSBase subValue = ((COSObject)value).getObject(); + COSBase subValue = objectStorage.convertToActual(value); if (willEncrypt || incrementalUpdate // || subValue instanceof COSDictionary // || subValue instanceof COSArray // @@ -1470,7 +1455,7 @@ public void write(PDDocument doc, SignatureInterface signInterface) throws IOExc number = pdDocument.getDocument().getHighestXRefObjectNumber(); if(incrementalUpdate) { - prepareIncrement(); + objectStorage.setDocument(doc.getDocument()); } Long idTime = pdDocument.getDocumentId() == null ? System.currentTimeMillis() : pdDocument.getDocumentId(); diff --git a/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriterObjectStorage.java b/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriterObjectStorage.java new file mode 100644 index 00000000000..047a3660df2 --- /dev/null +++ b/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriterObjectStorage.java @@ -0,0 +1,100 @@ +package org.apache.pdfbox.pdfwriter; + +import org.apache.pdfbox.cos.*; + +import java.util.HashMap; +import java.util.Map; + +/** + * Bidirectional COSObject-COSObjectKey Storage for COSWriter. + */ +class COSWriterObjectStorage +{ + private final Map objectToKeyMap; + + private final Map keyToObjectMap; + + private COSDocument document; + + public COSWriterObjectStorage() { + this.objectToKeyMap = new HashMap<>(); + this.keyToObjectMap = new HashMap<>(); + } + + public void setDocument(COSDocument document) { + this.document = document; + } + + public COSBase getObject(COSObjectKey key) + { + if(key == null) + { + throw new IllegalArgumentException("The key must not be null."); + } + + if(document != null && document.getXrefTable().containsKey(key)) + { + COSBase object = document.getObjectFromPool(key); + return convertToActual(object); + } + + return keyToObjectMap.get(key); + } + + public COSObjectKey getKey(COSBase object) + { + if(object == null) + { + throw new IllegalArgumentException("The object must not be null."); + } + + if(document != null && object.getReferencedObject() != null) + { + COSObject referencedObject = object.getReferencedObject(); + COSObjectKey key = new COSObjectKey(referencedObject); + + if(referencedObject.equals(keyToObjectMap.get(key))) + { + return key; + } + else if( + document.getXrefTable().containsKey(key) && + document.getObjectFromPool(key).equals(referencedObject) + ) + { + return key; + } + + return null; + } + + return objectToKeyMap.get(object); + } + + public void put(COSObjectKey key, COSBase object) + { + if(key == null) + { + throw new IllegalArgumentException("The key must not be null."); + } + if(object == null) + { + throw new IllegalArgumentException("The object must not be null."); + } + + if(document != null && object.getReferencedObject() != null) + { + object = object.getReferencedObject(); + } + + objectToKeyMap.put(object, key); + keyToObjectMap.put(key, object); + } + + public COSBase convertToActual(COSBase object) + { + return object instanceof COSObject ? + ((COSObject) object).getObjectWithoutCaching() : + object; + } +} diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterObjectStorageTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterObjectStorageTest.java new file mode 100644 index 00000000000..6b58236f68b --- /dev/null +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterObjectStorageTest.java @@ -0,0 +1,158 @@ +package org.apache.pdfbox.pdfwriter; + +import org.apache.pdfbox.cos.*; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +import java.util.Collections; +import java.util.HashMap; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.BDDMockito.given; + +class COSWriterObjectStorageTest { + + private COSWriterObjectStorage storage; + private COSObjectKey key1; + private COSObjectKey key2; + private COSObjectKey key3; + + private COSDocument document; + + private COSBase base1; + + private COSObject object1; + private COSObject object2; + private COSObject object3; + + @BeforeEach + void setUp() { + storage = new COSWriterObjectStorage(); + document = Mockito.mock(COSDocument.class); + + key1 = new COSObjectKey(1L, 1); + key2 = new COSObjectKey(2L, 2); + key3 = new COSObjectKey(3L, 3); + + object1 = Mockito.mock(COSObject.class); + object2 = Mockito.mock(COSObject.class); + object3 = Mockito.mock(COSObject.class); + + base1 = Mockito.mock(COSBase.class); + } + + @Test + void testObjectStorage_noDocumentSet_storesKeysAndObjectsBidirectionally() { + storage.put(key1, object1); + storage.put(key2, object2); + + assertAll( + () -> assertEquals(key1, storage.getKey(object1)), + () -> assertEquals(key2, storage.getKey(object2)), + () -> assertEquals(object1, storage.getObject(key1)), + () -> assertEquals(object2, storage.getObject(key2)) + ); + } + + @Test + void testGetObject_documentWasSet_returnsActualObjectFromDocumentObjectPoolIfExistent() { + storage.setDocument(document); + storage.put(key1, object2); + storage.put(key2, object2); + + given(object1.getObjectWithoutCaching()).willReturn(base1); + given(document.getXrefTable()).willReturn(Collections.singletonMap(key1, null)); + given(document.getObjectFromPool(key1)).willReturn(object1); + + assertAll( + // Key1 exists in document. + () -> assertEquals(base1, storage.getObject(key1)), + + // Key2 exists in bidirectional map + () -> assertEquals(object2, storage.getObject(key2)), + + // Key 3 does not exist. + () -> assertNull(storage.getObject(key3)) + ); + } + + @Test + void testGetKey_documentWasSet_returnsKeyStoredInMapOrDocument() { + storage.setDocument(document); + + COSObject referencedObject1 = Mockito.mock(COSObject.class); + storage.put(key1, referencedObject1); + + given(object1.getReferencedObject()).willReturn(referencedObject1); + given(referencedObject1.getObjectNumber()).willReturn(1L); + given(referencedObject1.getGenerationNumber()).willReturn(1); + + COSObject referencedObject2 = Mockito.mock(COSObject.class); + given(object2.getReferencedObject()).willReturn(referencedObject2); + given(referencedObject2.getObjectNumber()).willReturn(2L); + given(referencedObject2.getGenerationNumber()).willReturn(2); + + given(document.getXrefTable()).willReturn(Collections.singletonMap(key2, null)); + given(document.getObjectFromPool(key2)).willReturn(referencedObject2); + + given(object3.getReferencedObject()).willReturn(Mockito.mock(COSObject.class)); + + assertAll( + // Referenced object exists in bidirectional map. + () -> assertEquals(key1, storage.getKey(object1)), + + // Referenced object exists in document. + () -> assertEquals(key2, storage.getKey(object2)), + + // Referenced was de-referenced, but neither exists in map nor document object pool. + () -> assertNull(storage.getKey(object3)) + ); + } + + @Test + void testPut_documentWasSet_putsReferencedObjectIfExistent() { + storage.setDocument(document); + given(object2.getReferencedObject()).willReturn(object3); + + storage.put(key1, object1); + storage.put(key2, object2); + + assertAll( + () -> assertEquals(object1, storage.getObject(key1)), + () -> assertEquals(object3, storage.getObject(key2)), + () -> assertEquals(key1, storage.getKey(object1)), + () -> assertNull(storage.getKey(object2)), + () -> assertEquals(key2, storage.getKey(object3)) + ); + } + + @Test + void testConvertToActual() { + given(object1.getObjectWithoutCaching()).willReturn(base1); + + assertAll( + () -> assertEquals(base1, storage.convertToActual(object1)), + () -> assertEquals(base1, storage.convertToActual(base1)) + ); + } + + @Test + void testGetObject_throwExceptionForNullKey() { + assertThrows(IllegalArgumentException.class, () -> storage.getObject(null)); + } + + @Test + void testGetKey_throwExceptionForNullObject() { + assertThrows(IllegalArgumentException.class, () -> storage.getKey(null)); + } + + @Test + void testPut_throwExceptionForNullObjectOrKey() { + assertAll( + () -> assertThrows(IllegalArgumentException.class, () -> storage.put(key1,null)), + () -> assertThrows(IllegalArgumentException.class, () -> storage.put(null,object1)), + () -> assertThrows(IllegalArgumentException.class, () -> storage.put(null,null)) + ); + } +} \ No newline at end of file diff --git a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormFlattenTest.java b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormFlattenTest.java index 01db88f50d4..7100914e4d1 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormFlattenTest.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/form/PDAcroFormFlattenTest.java @@ -52,7 +52,6 @@ * as the test results need manual inspection. Enable as needed. * */ -@Execution(ExecutionMode.CONCURRENT) class PDAcroFormFlattenTest {