Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Object stream fix #1

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 41 additions & 25 deletions peepdf/PDFCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,6 +1217,14 @@ def setElements(self, newElements):
ret = self.update()
return ret

def getJSCode(self):
'''
Gets the Javascript code of the object

@return: An array of Javascript code sections
'''
return self.JSCode


class PDFDictionary(PDFObject):
def __init__(self, rawContent='', elements={}, rawNames={}):
Expand Down Expand Up @@ -3000,29 +3008,31 @@ def update(self, modifiedCompressedObjects=False, onlyElements=False, decrypt=Fa
if self.isEncodedStream:
self.decode()
self.size = len(self.rawStream)
offsetsSection = self.decodedStream[:self.firstObjectOffset]
objectsSection = self.decodedStream[self.firstObjectOffset:]
numbers = re.findall('\d{1,10}', offsetsSection)
if numbers != [] and len(numbers) % 2 == 0:
for i in range(0, len(numbers), 2):
id = int(numbers[i])
offset = int(numbers[i+1])
ret = PDFParser().readObject(objectsSection[offset:])
if ret[0] == -1:
if isForceMode:
object = None
self.addError(ret[1])

if not self.updateNeeded:
offsetsSection = self.decodedStream[:self.firstObjectOffset]
objectsSection = self.decodedStream[self.firstObjectOffset:]
numbers = re.findall('\d{1,10}', offsetsSection)
if numbers != [] and len(numbers) % 2 == 0:
for i in range(0, len(numbers), 2):
id = int(numbers[i])
offset = int(numbers[i+1])
ret = PDFParser().readObject(objectsSection[offset:])
if ret[0] == -1:
if isForceMode:
object = None
self.addError(ret[1])
else:
return ret
else:
return ret
else:
object = ret[1]
self.compressedObjectsDict[id] = [offset, object]
self.indexes.append(id)
else:
if isForceMode:
self.addError('Missing offsets in object stream')
object = ret[1]
self.compressedObjectsDict[id] = [offset, object]
self.indexes.append(id)
else:
return (-1, 'Missing offsets in object stream')
if isForceMode:
self.addError('Missing offsets in object stream')
else:
return (-1, 'Missing offsets in object stream')
elif modifiedCompressedObjects:
tmpStreamObjects = ''
tmpStreamObjectsInfo = ''
Expand Down Expand Up @@ -3249,8 +3259,9 @@ def resolveReferences(self):
numbers = re.findall('\d{1,10}', offsetsSection)
if numbers != [] and len(numbers) % 2 == 0:
for i in range(0, len(numbers), 2):
offset = numbers[i+1]
ret = PDFParser.readObject(objectsSection[offset:])
id = int(numbers[i])
offset = int(numbers[i+1])
ret = PDFParser().readObject(objectsSection[offset:])
if ret[0] == -1:
if isForceMode:
object = None
Expand All @@ -3259,7 +3270,8 @@ def resolveReferences(self):
return ret
else:
object = ret[1]
self.compressedObjectsDict[numbers[i]] = [offset, object]
self.compressedObjectsDict[id] = [offset, object]
self.indexes.append(id)
else:
errorMessage = 'Missing offsets in object stream'
if isForceMode:
Expand Down Expand Up @@ -7845,6 +7857,11 @@ def readObject(self, content, objectType=None, forceMode=False, looseMode=False)
pdfObject = None
oldCounter = self.charCounter
self.charCounter = 0
# skip leading whitespace in case of sloppy reference offsets
self.readSpaces(content)
if self.charCounter > 0:
content = content[self.charCounter:]
self.charCounter = 0
if objectType is not None:
objectsTypeArray = [self.delimiters[i][2] for i in range(len(self.delimiters))]
index = objectsTypeArray.index(objectType)
Expand Down Expand Up @@ -8153,7 +8170,6 @@ def readUntilSymbol(self, string, symbol):

newString = string[self.charCounter:]

self.charCounter = 0
index = newString.find(symbol)
if index == -1:
errorMessage = 'Symbol "'+symbol+'" not found'
Expand Down