Update matching logic: AI scores all candidates, lower threshold, absolute amount, prompt improvements
This commit is contained in:
@@ -0,0 +1,144 @@
|
||||
# Copyright (c) 2006, Mathieu Fenniak
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# * The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
"""Implementation of generic PDF objects (dictionary, number, string, ...)."""
|
||||
__author__ = "Mathieu Fenniak"
|
||||
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||||
|
||||
from typing import Dict, List, Union
|
||||
|
||||
from .._utils import StreamType, deprecate_with_replacement
|
||||
from ..constants import OutlineFontFlag
|
||||
from ._annotations import AnnotationBuilder
|
||||
from ._base import (
|
||||
BooleanObject,
|
||||
ByteStringObject,
|
||||
FloatObject,
|
||||
IndirectObject,
|
||||
NameObject,
|
||||
NullObject,
|
||||
NumberObject,
|
||||
PdfObject,
|
||||
TextStringObject,
|
||||
encode_pdfdocencoding,
|
||||
)
|
||||
from ._data_structures import (
|
||||
ArrayObject,
|
||||
ContentStream,
|
||||
DecodedStreamObject,
|
||||
Destination,
|
||||
DictionaryObject,
|
||||
EncodedStreamObject,
|
||||
Field,
|
||||
StreamObject,
|
||||
TreeObject,
|
||||
read_object,
|
||||
)
|
||||
from ._fit import Fit
|
||||
from ._outline import Bookmark, OutlineItem
|
||||
from ._rectangle import RectangleObject
|
||||
from ._utils import (
|
||||
create_string_object,
|
||||
decode_pdfdocencoding,
|
||||
hex_to_rgb,
|
||||
read_hex_string_from_stream,
|
||||
read_string_from_stream,
|
||||
)
|
||||
|
||||
|
||||
def readHexStringFromStream(
|
||||
stream: StreamType,
|
||||
) -> Union["TextStringObject", "ByteStringObject"]: # pragma: no cover
|
||||
deprecate_with_replacement(
|
||||
"readHexStringFromStream", "read_hex_string_from_stream", "4.0.0"
|
||||
)
|
||||
return read_hex_string_from_stream(stream)
|
||||
|
||||
|
||||
def readStringFromStream(
|
||||
stream: StreamType,
|
||||
forced_encoding: Union[None, str, List[str], Dict[int, str]] = None,
|
||||
) -> Union["TextStringObject", "ByteStringObject"]: # pragma: no cover
|
||||
deprecate_with_replacement(
|
||||
"readStringFromStream", "read_string_from_stream", "4.0.0"
|
||||
)
|
||||
return read_string_from_stream(stream, forced_encoding)
|
||||
|
||||
|
||||
def createStringObject(
|
||||
string: Union[str, bytes],
|
||||
forced_encoding: Union[None, str, List[str], Dict[int, str]] = None,
|
||||
) -> Union[TextStringObject, ByteStringObject]: # pragma: no cover
|
||||
deprecate_with_replacement("createStringObject", "create_string_object", "4.0.0")
|
||||
return create_string_object(string, forced_encoding)
|
||||
|
||||
|
||||
PAGE_FIT = Fit.fit()
|
||||
|
||||
|
||||
__all__ = [
|
||||
# Base types
|
||||
"BooleanObject",
|
||||
"FloatObject",
|
||||
"NumberObject",
|
||||
"NameObject",
|
||||
"IndirectObject",
|
||||
"NullObject",
|
||||
"PdfObject",
|
||||
"TextStringObject",
|
||||
"ByteStringObject",
|
||||
# Annotations
|
||||
"AnnotationBuilder",
|
||||
# Fit
|
||||
"Fit",
|
||||
"PAGE_FIT",
|
||||
# Data structures
|
||||
"ArrayObject",
|
||||
"DictionaryObject",
|
||||
"TreeObject",
|
||||
"StreamObject",
|
||||
"DecodedStreamObject",
|
||||
"EncodedStreamObject",
|
||||
"ContentStream",
|
||||
"RectangleObject",
|
||||
"Field",
|
||||
"Destination",
|
||||
# --- More specific stuff
|
||||
# Outline
|
||||
"OutlineItem",
|
||||
"OutlineFontFlag",
|
||||
"Bookmark",
|
||||
# Data structures core functions
|
||||
"read_object",
|
||||
# Utility functions
|
||||
"create_string_object",
|
||||
"encode_pdfdocencoding",
|
||||
"decode_pdfdocencoding",
|
||||
"hex_to_rgb",
|
||||
"read_hex_string_from_stream",
|
||||
"read_string_from_stream",
|
||||
]
|
||||
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,275 @@
|
||||
from typing import Optional, Tuple, Union
|
||||
|
||||
from ._base import (
|
||||
BooleanObject,
|
||||
FloatObject,
|
||||
NameObject,
|
||||
NumberObject,
|
||||
TextStringObject,
|
||||
)
|
||||
from ._data_structures import ArrayObject, DictionaryObject
|
||||
from ._fit import DEFAULT_FIT, Fit
|
||||
from ._rectangle import RectangleObject
|
||||
from ._utils import hex_to_rgb
|
||||
|
||||
|
||||
class AnnotationBuilder:
|
||||
"""
|
||||
The AnnotationBuilder creates dictionaries representing PDF annotations.
|
||||
|
||||
Those dictionaries can be modified before they are added to a PdfWriter
|
||||
instance via `writer.add_annotation`.
|
||||
|
||||
See `adding PDF annotations <../user/adding-pdf-annotations.html>`_ for
|
||||
it's usage combined with PdfWriter.
|
||||
"""
|
||||
|
||||
from ..types import FitType, ZoomArgType
|
||||
|
||||
@staticmethod
|
||||
def text(
|
||||
rect: Union[RectangleObject, Tuple[float, float, float, float]],
|
||||
text: str,
|
||||
open: bool = False,
|
||||
flags: int = 0,
|
||||
) -> DictionaryObject:
|
||||
"""
|
||||
Add text annotation.
|
||||
|
||||
:param Tuple[int, int, int, int] rect:
|
||||
or array of four integers specifying the clickable rectangular area
|
||||
``[xLL, yLL, xUR, yUR]``
|
||||
:param bool open:
|
||||
:param int flags:
|
||||
"""
|
||||
# TABLE 8.23 Additional entries specific to a text annotation
|
||||
text_obj = DictionaryObject(
|
||||
{
|
||||
NameObject("/Type"): NameObject("/Annot"),
|
||||
NameObject("/Subtype"): NameObject("/Text"),
|
||||
NameObject("/Rect"): RectangleObject(rect),
|
||||
NameObject("/Contents"): TextStringObject(text),
|
||||
NameObject("/Open"): BooleanObject(open),
|
||||
NameObject("/Flags"): NumberObject(flags),
|
||||
}
|
||||
)
|
||||
return text_obj
|
||||
|
||||
@staticmethod
|
||||
def free_text(
|
||||
text: str,
|
||||
rect: Union[RectangleObject, Tuple[float, float, float, float]],
|
||||
font: str = "Helvetica",
|
||||
bold: bool = False,
|
||||
italic: bool = False,
|
||||
font_size: str = "14pt",
|
||||
font_color: str = "000000",
|
||||
border_color: str = "000000",
|
||||
background_color: str = "ffffff",
|
||||
) -> DictionaryObject:
|
||||
"""
|
||||
Add text in a rectangle to a page.
|
||||
|
||||
:param str text: Text to be added
|
||||
:param RectangleObject rect: or array of four integers
|
||||
specifying the clickable rectangular area ``[xLL, yLL, xUR, yUR]``
|
||||
:param str font: Name of the Font, e.g. 'Helvetica'
|
||||
:param bool bold: Print the text in bold
|
||||
:param bool italic: Print the text in italic
|
||||
:param str font_size: How big the text will be, e.g. '14pt'
|
||||
:param str font_color: Hex-string for the color
|
||||
:param str border_color: Hex-string for the border color
|
||||
:param str background_color: Hex-string for the background of the annotation
|
||||
"""
|
||||
font_str = "font: "
|
||||
if bold is True:
|
||||
font_str = font_str + "bold "
|
||||
if italic is True:
|
||||
font_str = font_str + "italic "
|
||||
font_str = font_str + font + " " + font_size
|
||||
font_str = font_str + ";text-align:left;color:#" + font_color
|
||||
|
||||
bg_color_str = ""
|
||||
for st in hex_to_rgb(border_color):
|
||||
bg_color_str = bg_color_str + str(st) + " "
|
||||
bg_color_str = bg_color_str + "rg"
|
||||
|
||||
free_text = DictionaryObject()
|
||||
free_text.update(
|
||||
{
|
||||
NameObject("/Type"): NameObject("/Annot"),
|
||||
NameObject("/Subtype"): NameObject("/FreeText"),
|
||||
NameObject("/Rect"): RectangleObject(rect),
|
||||
NameObject("/Contents"): TextStringObject(text),
|
||||
# font size color
|
||||
NameObject("/DS"): TextStringObject(font_str),
|
||||
# border color
|
||||
NameObject("/DA"): TextStringObject(bg_color_str),
|
||||
# background color
|
||||
NameObject("/C"): ArrayObject(
|
||||
[FloatObject(n) for n in hex_to_rgb(background_color)]
|
||||
),
|
||||
}
|
||||
)
|
||||
return free_text
|
||||
|
||||
@staticmethod
|
||||
def line(
|
||||
p1: Tuple[float, float],
|
||||
p2: Tuple[float, float],
|
||||
rect: Union[RectangleObject, Tuple[float, float, float, float]],
|
||||
text: str = "",
|
||||
title_bar: str = "",
|
||||
) -> DictionaryObject:
|
||||
"""
|
||||
Draw a line on the PDF.
|
||||
|
||||
:param Tuple[float, float] p1: First point
|
||||
:param Tuple[float, float] p2: Second point
|
||||
:param RectangleObject rect: or array of four
|
||||
integers specifying the clickable rectangular area
|
||||
``[xLL, yLL, xUR, yUR]``
|
||||
:param str text: Text to be displayed as the line annotation
|
||||
:param str title_bar: Text to be displayed in the title bar of the
|
||||
annotation; by convention this is the name of the author
|
||||
"""
|
||||
line_obj = DictionaryObject(
|
||||
{
|
||||
NameObject("/Type"): NameObject("/Annot"),
|
||||
NameObject("/Subtype"): NameObject("/Line"),
|
||||
NameObject("/Rect"): RectangleObject(rect),
|
||||
NameObject("/T"): TextStringObject(title_bar),
|
||||
NameObject("/L"): ArrayObject(
|
||||
[
|
||||
FloatObject(p1[0]),
|
||||
FloatObject(p1[1]),
|
||||
FloatObject(p2[0]),
|
||||
FloatObject(p2[1]),
|
||||
]
|
||||
),
|
||||
NameObject("/LE"): ArrayObject(
|
||||
[
|
||||
NameObject(None),
|
||||
NameObject(None),
|
||||
]
|
||||
),
|
||||
NameObject("/IC"): ArrayObject(
|
||||
[
|
||||
FloatObject(0.5),
|
||||
FloatObject(0.5),
|
||||
FloatObject(0.5),
|
||||
]
|
||||
),
|
||||
NameObject("/Contents"): TextStringObject(text),
|
||||
}
|
||||
)
|
||||
return line_obj
|
||||
|
||||
@staticmethod
|
||||
def rectangle(
|
||||
rect: Union[RectangleObject, Tuple[float, float, float, float]],
|
||||
interiour_color: Optional[str] = None,
|
||||
) -> DictionaryObject:
|
||||
"""
|
||||
Draw a rectangle on the PDF.
|
||||
|
||||
:param RectangleObject rect: or array of four
|
||||
integers specifying the clickable rectangular area
|
||||
``[xLL, yLL, xUR, yUR]``
|
||||
"""
|
||||
square_obj = DictionaryObject(
|
||||
{
|
||||
NameObject("/Type"): NameObject("/Annot"),
|
||||
NameObject("/Subtype"): NameObject("/Square"),
|
||||
NameObject("/Rect"): RectangleObject(rect),
|
||||
}
|
||||
)
|
||||
|
||||
if interiour_color:
|
||||
square_obj[NameObject("/IC")] = ArrayObject(
|
||||
[FloatObject(n) for n in hex_to_rgb(interiour_color)]
|
||||
)
|
||||
|
||||
return square_obj
|
||||
|
||||
@staticmethod
|
||||
def link(
|
||||
rect: Union[RectangleObject, Tuple[float, float, float, float]],
|
||||
border: Optional[ArrayObject] = None,
|
||||
url: Optional[str] = None,
|
||||
target_page_index: Optional[int] = None,
|
||||
fit: Fit = DEFAULT_FIT,
|
||||
) -> DictionaryObject:
|
||||
"""
|
||||
Add a link to the document.
|
||||
|
||||
The link can either be an external link or an internal link.
|
||||
|
||||
An external link requires the URL parameter.
|
||||
An internal link requires the target_page_index, fit, and fit args.
|
||||
|
||||
|
||||
:param RectangleObject rect: or array of four
|
||||
integers specifying the clickable rectangular area
|
||||
``[xLL, yLL, xUR, yUR]``
|
||||
:param border: if provided, an array describing border-drawing
|
||||
properties. See the PDF spec for details. No border will be
|
||||
drawn if this argument is omitted.
|
||||
- horizontal corner radius,
|
||||
- vertical corner radius, and
|
||||
- border width
|
||||
- Optionally: Dash
|
||||
:param str url: Link to a website (if you want to make an external link)
|
||||
:param int target_page_index: index of the page to which the link should go
|
||||
(if you want to make an internal link)
|
||||
:param Fit fit: Page fit or 'zoom' option.
|
||||
"""
|
||||
from ..types import BorderArrayType
|
||||
|
||||
is_external = url is not None
|
||||
is_internal = target_page_index is not None
|
||||
if not is_external and not is_internal:
|
||||
raise ValueError(
|
||||
"Either 'url' or 'target_page_index' have to be provided. Both were None."
|
||||
)
|
||||
if is_external and is_internal:
|
||||
raise ValueError(
|
||||
f"Either 'url' or 'target_page_index' have to be provided. url={url}, target_page_index={target_page_index}"
|
||||
)
|
||||
|
||||
border_arr: BorderArrayType
|
||||
if border is not None:
|
||||
border_arr = [NameObject(n) for n in border[:3]]
|
||||
if len(border) == 4:
|
||||
dash_pattern = ArrayObject([NameObject(n) for n in border[3]])
|
||||
border_arr.append(dash_pattern)
|
||||
else:
|
||||
border_arr = [NumberObject(0)] * 3
|
||||
|
||||
link_obj = DictionaryObject(
|
||||
{
|
||||
NameObject("/Type"): NameObject("/Annot"),
|
||||
NameObject("/Subtype"): NameObject("/Link"),
|
||||
NameObject("/Rect"): RectangleObject(rect),
|
||||
NameObject("/Border"): ArrayObject(border_arr),
|
||||
}
|
||||
)
|
||||
if is_external:
|
||||
link_obj[NameObject("/A")] = DictionaryObject(
|
||||
{
|
||||
NameObject("/S"): NameObject("/URI"),
|
||||
NameObject("/Type"): NameObject("/Action"),
|
||||
NameObject("/URI"): TextStringObject(url),
|
||||
}
|
||||
)
|
||||
if is_internal:
|
||||
# This needs to be updated later!
|
||||
dest_deferred = DictionaryObject(
|
||||
{
|
||||
"target_page_index": NumberObject(target_page_index),
|
||||
"fit": NameObject(fit.fit_type),
|
||||
"fit_args": fit.fit_args,
|
||||
}
|
||||
)
|
||||
link_obj[NameObject("/Dest")] = dest_deferred
|
||||
return link_obj
|
||||
@@ -0,0 +1,648 @@
|
||||
# Copyright (c) 2006, Mathieu Fenniak
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# * The name of the author may not be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import codecs
|
||||
import decimal
|
||||
import hashlib
|
||||
import re
|
||||
from binascii import unhexlify
|
||||
from typing import Any, Callable, List, Optional, Tuple, Union, cast
|
||||
|
||||
from .._codecs import _pdfdoc_encoding_rev
|
||||
from .._protocols import PdfObjectProtocol, PdfWriterProtocol
|
||||
from .._utils import (
|
||||
StreamType,
|
||||
b_,
|
||||
deprecation_with_replacement,
|
||||
hex_str,
|
||||
hexencode,
|
||||
logger_warning,
|
||||
read_non_whitespace,
|
||||
read_until_regex,
|
||||
str_,
|
||||
)
|
||||
from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError
|
||||
|
||||
__author__ = "Mathieu Fenniak"
|
||||
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||||
|
||||
|
||||
class PdfObject(PdfObjectProtocol):
|
||||
# function for calculating a hash value
|
||||
hash_func: Callable[..., "hashlib._Hash"] = hashlib.sha1
|
||||
indirect_reference: Optional["IndirectObject"]
|
||||
|
||||
def hash_value_data(self) -> bytes:
|
||||
return ("%s" % self).encode()
|
||||
|
||||
def hash_value(self) -> bytes:
|
||||
return (
|
||||
"%s:%s"
|
||||
% (
|
||||
self.__class__.__name__,
|
||||
self.hash_func(self.hash_value_data()).hexdigest(),
|
||||
)
|
||||
).encode()
|
||||
|
||||
def clone(
|
||||
self,
|
||||
pdf_dest: PdfWriterProtocol,
|
||||
force_duplicate: bool = False,
|
||||
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
|
||||
) -> "PdfObject":
|
||||
"""
|
||||
clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter)
|
||||
force_duplicate: in standard if the object has been already cloned and reference,
|
||||
the copy is returned; when force_duplicate == True, a new copy is always performed
|
||||
ignore_fields : list/tuple of Fields names (for dictionaries that will be ignored during cloning (apply also to childs duplication)
|
||||
in standard, clone function call _reference_clone (see _reference)
|
||||
"""
|
||||
raise Exception("clone PdfObject")
|
||||
|
||||
def _reference_clone(
|
||||
self, clone: Any, pdf_dest: PdfWriterProtocol
|
||||
) -> PdfObjectProtocol:
|
||||
"""
|
||||
reference the object within the _objects of pdf_dest only if
|
||||
indirect_reference attribute exists (which means the objects
|
||||
was already identified in xref/xobjstm)
|
||||
if object has been already referenced do nothing
|
||||
"""
|
||||
try:
|
||||
if clone.indirect_reference.pdf == pdf_dest:
|
||||
return clone
|
||||
except Exception:
|
||||
pass
|
||||
if hasattr(self, "indirect_reference"):
|
||||
ind = self.indirect_reference
|
||||
i = len(pdf_dest._objects) + 1
|
||||
if ind is not None:
|
||||
if id(ind.pdf) not in pdf_dest._id_translated:
|
||||
pdf_dest._id_translated[id(ind.pdf)] = {}
|
||||
if ind.idnum in pdf_dest._id_translated[id(ind.pdf)]:
|
||||
obj = pdf_dest.get_object(
|
||||
pdf_dest._id_translated[id(ind.pdf)][ind.idnum]
|
||||
)
|
||||
assert obj is not None
|
||||
return obj
|
||||
pdf_dest._id_translated[id(ind.pdf)][ind.idnum] = i
|
||||
pdf_dest._objects.append(clone)
|
||||
clone.indirect_reference = IndirectObject(i, 0, pdf_dest)
|
||||
return clone
|
||||
|
||||
def get_object(self) -> Optional["PdfObject"]:
|
||||
"""Resolve indirect references."""
|
||||
return self
|
||||
|
||||
def getObject(self) -> Optional["PdfObject"]: # pragma: no cover
|
||||
deprecation_with_replacement("getObject", "get_object", "3.0.0")
|
||||
return self.get_object()
|
||||
|
||||
def write_to_stream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class NullObject(PdfObject):
|
||||
def clone(
|
||||
self,
|
||||
pdf_dest: PdfWriterProtocol,
|
||||
force_duplicate: bool = False,
|
||||
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
|
||||
) -> "NullObject":
|
||||
"""clone object into pdf_dest"""
|
||||
return cast("NullObject", self._reference_clone(NullObject(), pdf_dest))
|
||||
|
||||
def write_to_stream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None:
|
||||
stream.write(b"null")
|
||||
|
||||
@staticmethod
|
||||
def read_from_stream(stream: StreamType) -> "NullObject":
|
||||
nulltxt = stream.read(4)
|
||||
if nulltxt != b"null":
|
||||
raise PdfReadError("Could not read Null object")
|
||||
return NullObject()
|
||||
|
||||
def writeToStream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("writeToStream", "write_to_stream", "3.0.0")
|
||||
self.write_to_stream(stream, encryption_key)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "NullObject"
|
||||
|
||||
@staticmethod
|
||||
def readFromStream(stream: StreamType) -> "NullObject": # pragma: no cover
|
||||
deprecation_with_replacement("readFromStream", "read_from_stream", "3.0.0")
|
||||
return NullObject.read_from_stream(stream)
|
||||
|
||||
|
||||
class BooleanObject(PdfObject):
|
||||
def __init__(self, value: Any) -> None:
|
||||
self.value = value
|
||||
|
||||
def clone(
|
||||
self,
|
||||
pdf_dest: PdfWriterProtocol,
|
||||
force_duplicate: bool = False,
|
||||
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
|
||||
) -> "BooleanObject":
|
||||
"""clone object into pdf_dest"""
|
||||
return cast(
|
||||
"BooleanObject", self._reference_clone(BooleanObject(self.value), pdf_dest)
|
||||
)
|
||||
|
||||
def __eq__(self, __o: object) -> bool:
|
||||
if isinstance(__o, BooleanObject):
|
||||
return self.value == __o.value
|
||||
elif isinstance(__o, bool):
|
||||
return self.value == __o
|
||||
else:
|
||||
return False
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "True" if self.value else "False"
|
||||
|
||||
def write_to_stream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None:
|
||||
if self.value:
|
||||
stream.write(b"true")
|
||||
else:
|
||||
stream.write(b"false")
|
||||
|
||||
def writeToStream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("writeToStream", "write_to_stream", "3.0.0")
|
||||
self.write_to_stream(stream, encryption_key)
|
||||
|
||||
@staticmethod
|
||||
def read_from_stream(stream: StreamType) -> "BooleanObject":
|
||||
word = stream.read(4)
|
||||
if word == b"true":
|
||||
return BooleanObject(True)
|
||||
elif word == b"fals":
|
||||
stream.read(1)
|
||||
return BooleanObject(False)
|
||||
else:
|
||||
raise PdfReadError("Could not read Boolean object")
|
||||
|
||||
@staticmethod
|
||||
def readFromStream(stream: StreamType) -> "BooleanObject": # pragma: no cover
|
||||
deprecation_with_replacement("readFromStream", "read_from_stream", "3.0.0")
|
||||
return BooleanObject.read_from_stream(stream)
|
||||
|
||||
|
||||
class IndirectObject(PdfObject):
|
||||
def __init__(self, idnum: int, generation: int, pdf: Any) -> None: # PdfReader
|
||||
self.idnum = idnum
|
||||
self.generation = generation
|
||||
self.pdf = pdf
|
||||
|
||||
def clone(
|
||||
self,
|
||||
pdf_dest: PdfWriterProtocol,
|
||||
force_duplicate: bool = False,
|
||||
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
|
||||
) -> "IndirectObject":
|
||||
"""clone object into pdf_dest"""
|
||||
if self.pdf == pdf_dest and not force_duplicate:
|
||||
# Already duplicated and no extra duplication required
|
||||
return self
|
||||
if id(self.pdf) not in pdf_dest._id_translated:
|
||||
pdf_dest._id_translated[id(self.pdf)] = {}
|
||||
|
||||
if not force_duplicate and self.idnum in pdf_dest._id_translated[id(self.pdf)]:
|
||||
dup = pdf_dest.get_object(pdf_dest._id_translated[id(self.pdf)][self.idnum])
|
||||
else:
|
||||
obj = self.get_object()
|
||||
assert obj is not None
|
||||
dup = obj.clone(pdf_dest, force_duplicate, ignore_fields)
|
||||
assert dup is not None
|
||||
assert dup.indirect_reference is not None
|
||||
return dup.indirect_reference
|
||||
|
||||
@property
|
||||
def indirect_reference(self) -> "IndirectObject": # type: ignore[override]
|
||||
return self
|
||||
|
||||
def get_object(self) -> Optional["PdfObject"]:
|
||||
obj = self.pdf.get_object(self)
|
||||
if obj is None:
|
||||
return None
|
||||
return obj.get_object()
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})"
|
||||
|
||||
def __eq__(self, other: Any) -> bool:
|
||||
return (
|
||||
other is not None
|
||||
and isinstance(other, IndirectObject)
|
||||
and self.idnum == other.idnum
|
||||
and self.generation == other.generation
|
||||
and self.pdf is other.pdf
|
||||
)
|
||||
|
||||
def __ne__(self, other: Any) -> bool:
|
||||
return not self.__eq__(other)
|
||||
|
||||
def write_to_stream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None:
|
||||
stream.write(b_(f"{self.idnum} {self.generation} R"))
|
||||
|
||||
def writeToStream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("writeToStream", "write_to_stream", "3.0.0")
|
||||
self.write_to_stream(stream, encryption_key)
|
||||
|
||||
@staticmethod
|
||||
def read_from_stream(stream: StreamType, pdf: Any) -> "IndirectObject": # PdfReader
|
||||
idnum = b""
|
||||
while True:
|
||||
tok = stream.read(1)
|
||||
if not tok:
|
||||
raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
|
||||
if tok.isspace():
|
||||
break
|
||||
idnum += tok
|
||||
generation = b""
|
||||
while True:
|
||||
tok = stream.read(1)
|
||||
if not tok:
|
||||
raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
|
||||
if tok.isspace():
|
||||
if not generation:
|
||||
continue
|
||||
break
|
||||
generation += tok
|
||||
r = read_non_whitespace(stream)
|
||||
if r != b"R":
|
||||
raise PdfReadError(
|
||||
f"Error reading indirect object reference at byte {hex_str(stream.tell())}"
|
||||
)
|
||||
return IndirectObject(int(idnum), int(generation), pdf)
|
||||
|
||||
@staticmethod
|
||||
def readFromStream(
|
||||
stream: StreamType, pdf: Any # PdfReader
|
||||
) -> "IndirectObject": # pragma: no cover
|
||||
deprecation_with_replacement("readFromStream", "read_from_stream", "3.0.0")
|
||||
return IndirectObject.read_from_stream(stream, pdf)
|
||||
|
||||
|
||||
class FloatObject(decimal.Decimal, PdfObject):
|
||||
def __new__(
|
||||
cls, value: Union[str, Any] = "0", context: Optional[Any] = None
|
||||
) -> "FloatObject":
|
||||
try:
|
||||
return decimal.Decimal.__new__(cls, str_(value), context)
|
||||
except Exception:
|
||||
# If this isn't a valid decimal (happens in malformed PDFs)
|
||||
# fallback to 0
|
||||
logger_warning(f"FloatObject ({value}) invalid; use 0.0 instead", __name__)
|
||||
return decimal.Decimal.__new__(cls, "0.0")
|
||||
|
||||
def clone(
|
||||
self,
|
||||
pdf_dest: Any,
|
||||
force_duplicate: bool = False,
|
||||
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
|
||||
) -> "FloatObject":
|
||||
"""clone object into pdf_dest"""
|
||||
return cast("FloatObject", self._reference_clone(FloatObject(self), pdf_dest))
|
||||
|
||||
def __repr__(self) -> str:
|
||||
if self == self.to_integral():
|
||||
# If this is an integer, format it with no decimal place.
|
||||
return str(self.quantize(decimal.Decimal(1)))
|
||||
else:
|
||||
# Otherwise, format it with a decimal place, taking care to
|
||||
# remove any extraneous trailing zeros.
|
||||
return f"{self:f}".rstrip("0")
|
||||
|
||||
def as_numeric(self) -> float:
|
||||
return float(repr(self).encode("utf8"))
|
||||
|
||||
def write_to_stream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None:
|
||||
stream.write(repr(self).encode("utf8"))
|
||||
|
||||
def writeToStream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("writeToStream", "write_to_stream", "3.0.0")
|
||||
self.write_to_stream(stream, encryption_key)
|
||||
|
||||
|
||||
class NumberObject(int, PdfObject):
|
||||
NumberPattern = re.compile(b"[^+-.0-9]")
|
||||
|
||||
def __new__(cls, value: Any) -> "NumberObject":
|
||||
try:
|
||||
return int.__new__(cls, int(value))
|
||||
except ValueError:
|
||||
logger_warning(f"NumberObject({value}) invalid; use 0 instead", __name__)
|
||||
return int.__new__(cls, 0)
|
||||
|
||||
def clone(
|
||||
self,
|
||||
pdf_dest: Any,
|
||||
force_duplicate: bool = False,
|
||||
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
|
||||
) -> "NumberObject":
|
||||
"""clone object into pdf_dest"""
|
||||
return cast("NumberObject", self._reference_clone(NumberObject(self), pdf_dest))
|
||||
|
||||
def as_numeric(self) -> int:
|
||||
return int(repr(self).encode("utf8"))
|
||||
|
||||
def write_to_stream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None:
|
||||
stream.write(repr(self).encode("utf8"))
|
||||
|
||||
def writeToStream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("writeToStream", "write_to_stream", "3.0.0")
|
||||
self.write_to_stream(stream, encryption_key)
|
||||
|
||||
@staticmethod
|
||||
def read_from_stream(stream: StreamType) -> Union["NumberObject", "FloatObject"]:
|
||||
num = read_until_regex(stream, NumberObject.NumberPattern)
|
||||
if num.find(b".") != -1:
|
||||
return FloatObject(num)
|
||||
return NumberObject(num)
|
||||
|
||||
@staticmethod
|
||||
def readFromStream(
|
||||
stream: StreamType,
|
||||
) -> Union["NumberObject", "FloatObject"]: # pragma: no cover
|
||||
deprecation_with_replacement("readFromStream", "read_from_stream", "3.0.0")
|
||||
return NumberObject.read_from_stream(stream)
|
||||
|
||||
|
||||
class ByteStringObject(bytes, PdfObject):
|
||||
"""
|
||||
Represents a string object where the text encoding could not be determined.
|
||||
This occurs quite often, as the PDF spec doesn't provide an alternate way to
|
||||
represent strings -- for example, the encryption data stored in files (like
|
||||
/O) is clearly not text, but is still stored in a "String" object.
|
||||
"""
|
||||
|
||||
def clone(
|
||||
self,
|
||||
pdf_dest: Any,
|
||||
force_duplicate: bool = False,
|
||||
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
|
||||
) -> "ByteStringObject":
|
||||
"""clone object into pdf_dest"""
|
||||
return cast(
|
||||
"ByteStringObject",
|
||||
self._reference_clone(ByteStringObject(bytes(self)), pdf_dest),
|
||||
)
|
||||
|
||||
@property
|
||||
def original_bytes(self) -> bytes:
|
||||
"""For compatibility with TextStringObject.original_bytes."""
|
||||
return self
|
||||
|
||||
def write_to_stream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None:
|
||||
bytearr = self
|
||||
if encryption_key:
|
||||
from .._security import RC4_encrypt
|
||||
|
||||
bytearr = RC4_encrypt(encryption_key, bytearr) # type: ignore
|
||||
stream.write(b"<")
|
||||
stream.write(hexencode(bytearr))
|
||||
stream.write(b">")
|
||||
|
||||
def writeToStream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("writeToStream", "write_to_stream", "3.0.0")
|
||||
self.write_to_stream(stream, encryption_key)
|
||||
|
||||
|
||||
class TextStringObject(str, PdfObject):
|
||||
"""
|
||||
Represents a string object that has been decoded into a real unicode string.
|
||||
If read from a PDF document, this string appeared to match the
|
||||
PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
|
||||
occur.
|
||||
"""
|
||||
|
||||
def clone(
|
||||
self,
|
||||
pdf_dest: Any,
|
||||
force_duplicate: bool = False,
|
||||
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
|
||||
) -> "TextStringObject":
|
||||
"""clone object into pdf_dest"""
|
||||
obj = TextStringObject(self)
|
||||
obj.autodetect_pdfdocencoding = self.autodetect_pdfdocencoding
|
||||
obj.autodetect_utf16 = self.autodetect_utf16
|
||||
return cast("TextStringObject", self._reference_clone(obj, pdf_dest))
|
||||
|
||||
autodetect_pdfdocencoding = False
|
||||
autodetect_utf16 = False
|
||||
|
||||
@property
|
||||
def original_bytes(self) -> bytes:
|
||||
"""
|
||||
It is occasionally possible that a text string object gets created where
|
||||
a byte string object was expected due to the autodetection mechanism --
|
||||
if that occurs, this "original_bytes" property can be used to
|
||||
back-calculate what the original encoded bytes were.
|
||||
"""
|
||||
return self.get_original_bytes()
|
||||
|
||||
def get_original_bytes(self) -> bytes:
|
||||
# We're a text string object, but the library is trying to get our raw
|
||||
# bytes. This can happen if we auto-detected this string as text, but
|
||||
# we were wrong. It's pretty common. Return the original bytes that
|
||||
# would have been used to create this object, based upon the autodetect
|
||||
# method.
|
||||
if self.autodetect_utf16:
|
||||
return codecs.BOM_UTF16_BE + self.encode("utf-16be")
|
||||
elif self.autodetect_pdfdocencoding:
|
||||
return encode_pdfdocencoding(self)
|
||||
else:
|
||||
raise Exception("no information about original bytes")
|
||||
|
||||
def write_to_stream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None:
|
||||
# Try to write the string out as a PDFDocEncoding encoded string. It's
|
||||
# nicer to look at in the PDF file. Sadly, we take a performance hit
|
||||
# here for trying...
|
||||
try:
|
||||
bytearr = encode_pdfdocencoding(self)
|
||||
except UnicodeEncodeError:
|
||||
bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
|
||||
if encryption_key:
|
||||
from .._security import RC4_encrypt
|
||||
|
||||
bytearr = RC4_encrypt(encryption_key, bytearr)
|
||||
obj = ByteStringObject(bytearr)
|
||||
obj.write_to_stream(stream, None)
|
||||
else:
|
||||
stream.write(b"(")
|
||||
for c in bytearr:
|
||||
if not chr(c).isalnum() and c != b" ":
|
||||
# This:
|
||||
# stream.write(b_(rf"\{c:0>3o}"))
|
||||
# gives
|
||||
# https://github.com/davidhalter/parso/issues/207
|
||||
stream.write(b_("\\%03o" % c))
|
||||
else:
|
||||
stream.write(b_(chr(c)))
|
||||
stream.write(b")")
|
||||
|
||||
def writeToStream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("writeToStream", "write_to_stream", "3.0.0")
|
||||
self.write_to_stream(stream, encryption_key)
|
||||
|
||||
|
||||
class NameObject(str, PdfObject):
|
||||
delimiter_pattern = re.compile(rb"\s+|[\(\)<>\[\]{}/%]")
|
||||
surfix = b"/"
|
||||
renumber_table = {
|
||||
"#": b"#23",
|
||||
"(": b"#28",
|
||||
")": b"#29",
|
||||
"/": b"#2F",
|
||||
**{chr(i): f"#{i:02X}".encode() for i in range(33)},
|
||||
}
|
||||
|
||||
def clone(
|
||||
self,
|
||||
pdf_dest: Any,
|
||||
force_duplicate: bool = False,
|
||||
ignore_fields: Union[Tuple[str, ...], List[str], None] = (),
|
||||
) -> "NameObject":
|
||||
"""clone object into pdf_dest"""
|
||||
return cast("NameObject", self._reference_clone(NameObject(self), pdf_dest))
|
||||
|
||||
def write_to_stream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None:
|
||||
stream.write(self.renumber()) # b_(renumber(self)))
|
||||
|
||||
def writeToStream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("writeToStream", "write_to_stream", "3.0.0")
|
||||
self.write_to_stream(stream, encryption_key)
|
||||
|
||||
def renumber(self) -> bytes:
|
||||
out = self[0].encode("utf-8")
|
||||
if out != b"/":
|
||||
logger_warning(f"Incorrect first char in NameObject:({self})", __name__)
|
||||
for c in self[1:]:
|
||||
if c > "~":
|
||||
for x in c.encode("utf-8"):
|
||||
out += f"#{x:02X}".encode()
|
||||
else:
|
||||
try:
|
||||
out += self.renumber_table[c]
|
||||
except KeyError:
|
||||
out += c.encode("utf-8")
|
||||
return out
|
||||
|
||||
@staticmethod
|
||||
def unnumber(sin: bytes) -> bytes:
|
||||
i = sin.find(b"#", 0)
|
||||
while i >= 0:
|
||||
try:
|
||||
sin = sin[:i] + unhexlify(sin[i + 1 : i + 3]) + sin[i + 3 :]
|
||||
i = sin.find(b"#", i + 1)
|
||||
except ValueError:
|
||||
# if the 2 characters after # can not be converted to hexa
|
||||
# we change nothing and carry on
|
||||
i = i + 1
|
||||
return sin
|
||||
|
||||
@staticmethod
|
||||
def read_from_stream(stream: StreamType, pdf: Any) -> "NameObject": # PdfReader
|
||||
name = stream.read(1)
|
||||
if name != NameObject.surfix:
|
||||
raise PdfReadError("name read error")
|
||||
name += read_until_regex(stream, NameObject.delimiter_pattern, ignore_eof=True)
|
||||
try:
|
||||
# Name objects should represent irregular characters
|
||||
# with a '#' followed by the symbol's hex number
|
||||
name = NameObject.unnumber(name)
|
||||
for enc in ("utf-8", "gbk"):
|
||||
try:
|
||||
ret = name.decode(enc)
|
||||
return NameObject(ret)
|
||||
except Exception:
|
||||
pass
|
||||
raise UnicodeDecodeError("", name, 0, 0, "Code Not Found")
|
||||
except (UnicodeEncodeError, UnicodeDecodeError) as e:
|
||||
if not pdf.strict:
|
||||
logger_warning(
|
||||
f"Illegal character in Name Object ({repr(name)})", __name__
|
||||
)
|
||||
return NameObject(name.decode("charmap"))
|
||||
else:
|
||||
raise PdfReadError(
|
||||
f"Illegal character in Name Object ({repr(name)})"
|
||||
) from e
|
||||
|
||||
@staticmethod
|
||||
def readFromStream(
|
||||
stream: StreamType, pdf: Any # PdfReader
|
||||
) -> "NameObject": # pragma: no cover
|
||||
deprecation_with_replacement("readFromStream", "read_from_stream", "3.0.0")
|
||||
return NameObject.read_from_stream(stream, pdf)
|
||||
|
||||
|
||||
def encode_pdfdocencoding(unicode_string: str) -> bytes:
|
||||
retval = b""
|
||||
for c in unicode_string:
|
||||
try:
|
||||
retval += b_(chr(_pdfdoc_encoding_rev[c]))
|
||||
except KeyError:
|
||||
raise UnicodeEncodeError(
|
||||
"pdfdocencoding", c, -1, -1, "does not exist in translation table"
|
||||
)
|
||||
return retval
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,129 @@
|
||||
from typing import Any, Optional, Tuple, Union
|
||||
|
||||
|
||||
class Fit:
|
||||
def __init__(
|
||||
self, fit_type: str, fit_args: Tuple[Union[None, float, Any], ...] = tuple()
|
||||
):
|
||||
from ._base import FloatObject, NameObject, NullObject
|
||||
|
||||
self.fit_type = NameObject(fit_type)
|
||||
self.fit_args = [
|
||||
NullObject() if a is None or isinstance(a, NullObject) else FloatObject(a)
|
||||
for a in fit_args
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def xyz(
|
||||
cls,
|
||||
left: Optional[float] = None,
|
||||
top: Optional[float] = None,
|
||||
zoom: Optional[float] = None,
|
||||
) -> "Fit":
|
||||
"""
|
||||
Display the page designated by page, with the coordinates ( left , top )
|
||||
positioned at the upper-left corner of the window and the contents
|
||||
of the page magnified by the factor zoom.
|
||||
|
||||
A null value for any of the parameters left, top, or zoom specifies
|
||||
that the current value of that parameter is to be retained unchanged.
|
||||
|
||||
A zoom value of 0 has the same meaning as a null value.
|
||||
"""
|
||||
return Fit(fit_type="/XYZ", fit_args=(left, top, zoom))
|
||||
|
||||
@classmethod
|
||||
def fit(cls) -> "Fit":
|
||||
"""
|
||||
Display the page designated by page, with its contents magnified just
|
||||
enough to fit the entire page within the window both horizontally and
|
||||
vertically. If the required horizontal and vertical magnification
|
||||
factors are different, use the smaller of the two, centering the page
|
||||
within the window in the other dimension.
|
||||
"""
|
||||
return Fit(fit_type="/Fit")
|
||||
|
||||
@classmethod
|
||||
def fit_horizontally(cls, top: Optional[float] = None) -> "Fit":
|
||||
"""
|
||||
Display the page designated by page , with the vertical coordinate top
|
||||
positioned at the top edge of the window and the contents of the page
|
||||
magnified just enough to fit the entire width of the page within the
|
||||
window.
|
||||
|
||||
A null value for `top` specifies that the current value of that
|
||||
parameter is to be retained unchanged.
|
||||
"""
|
||||
return Fit(fit_type="/FitH", fit_args=(top,))
|
||||
|
||||
@classmethod
|
||||
def fit_vertically(cls, left: Optional[float] = None) -> "Fit":
|
||||
return Fit(fit_type="/FitV", fit_args=(left,))
|
||||
|
||||
@classmethod
|
||||
def fit_rectangle(
|
||||
cls,
|
||||
left: Optional[float] = None,
|
||||
bottom: Optional[float] = None,
|
||||
right: Optional[float] = None,
|
||||
top: Optional[float] = None,
|
||||
) -> "Fit":
|
||||
"""
|
||||
Display the page designated by page , with its contents magnified
|
||||
just enough to fit the rectangle specified by the coordinates
|
||||
left , bottom , right , and top entirely within the window
|
||||
both horizontally and vertically.
|
||||
|
||||
If the required horizontal and vertical magnification factors are
|
||||
different, use the smaller of the two, centering the rectangle within
|
||||
the window in the other dimension.
|
||||
|
||||
A null value for any of the parameters may result in unpredictable
|
||||
behavior.
|
||||
"""
|
||||
return Fit(fit_type="/FitR", fit_args=(left, bottom, right, top))
|
||||
|
||||
@classmethod
|
||||
def fit_box(cls) -> "Fit":
|
||||
"""
|
||||
Display the page designated by page , with its contents magnified
|
||||
just enough to fit its bounding box entirely within the window both
|
||||
horizontally and vertically. If the required horizontal and vertical
|
||||
magnification factors are different, use the smaller of the two,
|
||||
centering the bounding box within the window in the other dimension.
|
||||
"""
|
||||
return Fit(fit_type="/FitB")
|
||||
|
||||
@classmethod
|
||||
def fit_box_horizontally(cls, top: Optional[float] = None) -> "Fit":
|
||||
"""
|
||||
Display the page designated by page , with the vertical coordinate
|
||||
top positioned at the top edge of the window and the contents of the
|
||||
page magnified just enough to fit the entire width of its bounding box
|
||||
within the window.
|
||||
|
||||
A null value for top specifies that the current value of that parameter
|
||||
is to be retained unchanged.
|
||||
"""
|
||||
return Fit(fit_type="/FitBH", fit_args=(top,))
|
||||
|
||||
@classmethod
|
||||
def fit_box_vertically(cls, left: Optional[float] = None) -> "Fit":
|
||||
"""
|
||||
Display the page designated by page , with the horizontal coordinate
|
||||
left positioned at the left edge of the window and the contents of
|
||||
the page magnified just enough to fit the entire height of its
|
||||
bounding box within the window.
|
||||
|
||||
A null value for left specifies that the current value of that
|
||||
parameter is to be retained unchanged.
|
||||
"""
|
||||
return Fit(fit_type="/FitBV", fit_args=(left,))
|
||||
|
||||
def __str__(self) -> str:
|
||||
if not self.fit_args:
|
||||
return f"Fit({self.fit_type})"
|
||||
return f"Fit({self.fit_type}, {self.fit_args})"
|
||||
|
||||
|
||||
DEFAULT_FIT = Fit.fit()
|
||||
@@ -0,0 +1,35 @@
|
||||
from typing import Any, Union
|
||||
|
||||
from .._utils import StreamType, deprecation_with_replacement
|
||||
from ._base import NameObject
|
||||
from ._data_structures import Destination
|
||||
|
||||
|
||||
class OutlineItem(Destination):
|
||||
def write_to_stream(
|
||||
self, stream: StreamType, encryption_key: Union[None, str, bytes]
|
||||
) -> None:
|
||||
stream.write(b"<<\n")
|
||||
for key in [
|
||||
NameObject(x)
|
||||
for x in ["/Title", "/Parent", "/First", "/Last", "/Next", "/Prev"]
|
||||
if x in self
|
||||
]:
|
||||
key.write_to_stream(stream, encryption_key)
|
||||
stream.write(b" ")
|
||||
value = self.raw_get(key)
|
||||
value.write_to_stream(stream, encryption_key)
|
||||
stream.write(b"\n")
|
||||
key = NameObject("/Dest")
|
||||
key.write_to_stream(stream, encryption_key)
|
||||
stream.write(b" ")
|
||||
value = self.dest_array
|
||||
value.write_to_stream(stream, encryption_key)
|
||||
stream.write(b"\n")
|
||||
stream.write(b">>")
|
||||
|
||||
|
||||
class Bookmark(OutlineItem): # pragma: no cover
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
deprecation_with_replacement("Bookmark", "OutlineItem", "3.0.0")
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -0,0 +1,265 @@
|
||||
import decimal
|
||||
from typing import Any, List, Tuple, Union
|
||||
|
||||
from .._utils import deprecation_no_replacement, deprecation_with_replacement
|
||||
from ._base import FloatObject, NumberObject
|
||||
from ._data_structures import ArrayObject
|
||||
|
||||
|
||||
class RectangleObject(ArrayObject):
|
||||
"""
|
||||
This class is used to represent *page boxes* in PyPDF2. These boxes include:
|
||||
* :attr:`artbox <PyPDF2._page.PageObject.artbox>`
|
||||
* :attr:`bleedbox <PyPDF2._page.PageObject.bleedbox>`
|
||||
* :attr:`cropbox <PyPDF2._page.PageObject.cropbox>`
|
||||
* :attr:`mediabox <PyPDF2._page.PageObject.mediabox>`
|
||||
* :attr:`trimbox <PyPDF2._page.PageObject.trimbox>`
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, arr: Union["RectangleObject", Tuple[float, float, float, float]]
|
||||
) -> None:
|
||||
# must have four points
|
||||
assert len(arr) == 4
|
||||
# automatically convert arr[x] into NumberObject(arr[x]) if necessary
|
||||
ArrayObject.__init__(self, [self._ensure_is_number(x) for x in arr]) # type: ignore
|
||||
|
||||
def _ensure_is_number(self, value: Any) -> Union[FloatObject, NumberObject]:
|
||||
if not isinstance(value, (NumberObject, FloatObject)):
|
||||
value = FloatObject(value)
|
||||
return value
|
||||
|
||||
def scale(self, sx: float, sy: float) -> "RectangleObject":
|
||||
return RectangleObject(
|
||||
(
|
||||
float(self.left) * sx,
|
||||
float(self.bottom) * sy,
|
||||
float(self.right) * sx,
|
||||
float(self.top) * sy,
|
||||
)
|
||||
)
|
||||
|
||||
def ensureIsNumber(
|
||||
self, value: Any
|
||||
) -> Union[FloatObject, NumberObject]: # pragma: no cover
|
||||
deprecation_no_replacement("ensureIsNumber", "3.0.0")
|
||||
return self._ensure_is_number(value)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"RectangleObject({repr(list(self))})"
|
||||
|
||||
@property
|
||||
def left(self) -> FloatObject:
|
||||
return self[0]
|
||||
|
||||
@left.setter
|
||||
def left(self, f: float) -> None:
|
||||
self[0] = FloatObject(f)
|
||||
|
||||
@property
|
||||
def bottom(self) -> FloatObject:
|
||||
return self[1]
|
||||
|
||||
@bottom.setter
|
||||
def bottom(self, f: float) -> None:
|
||||
self[1] = FloatObject(f)
|
||||
|
||||
@property
|
||||
def right(self) -> FloatObject:
|
||||
return self[2]
|
||||
|
||||
@right.setter
|
||||
def right(self, f: float) -> None:
|
||||
self[2] = FloatObject(f)
|
||||
|
||||
@property
|
||||
def top(self) -> FloatObject:
|
||||
return self[3]
|
||||
|
||||
@top.setter
|
||||
def top(self, f: float) -> None:
|
||||
self[3] = FloatObject(f)
|
||||
|
||||
def getLowerLeft_x(self) -> FloatObject: # pragma: no cover
|
||||
deprecation_with_replacement("getLowerLeft_x", "left", "3.0.0")
|
||||
return self.left
|
||||
|
||||
def getLowerLeft_y(self) -> FloatObject: # pragma: no cover
|
||||
deprecation_with_replacement("getLowerLeft_y", "bottom", "3.0.0")
|
||||
return self.bottom
|
||||
|
||||
def getUpperRight_x(self) -> FloatObject: # pragma: no cover
|
||||
deprecation_with_replacement("getUpperRight_x", "right", "3.0.0")
|
||||
return self.right
|
||||
|
||||
def getUpperRight_y(self) -> FloatObject: # pragma: no cover
|
||||
deprecation_with_replacement("getUpperRight_y", "top", "3.0.0")
|
||||
return self.top
|
||||
|
||||
def getUpperLeft_x(self) -> FloatObject: # pragma: no cover
|
||||
deprecation_with_replacement("getUpperLeft_x", "left", "3.0.0")
|
||||
return self.left
|
||||
|
||||
def getUpperLeft_y(self) -> FloatObject: # pragma: no cover
|
||||
deprecation_with_replacement("getUpperLeft_y", "top", "3.0.0")
|
||||
return self.top
|
||||
|
||||
def getLowerRight_x(self) -> FloatObject: # pragma: no cover
|
||||
deprecation_with_replacement("getLowerRight_x", "right", "3.0.0")
|
||||
return self.right
|
||||
|
||||
def getLowerRight_y(self) -> FloatObject: # pragma: no cover
|
||||
deprecation_with_replacement("getLowerRight_y", "bottom", "3.0.0")
|
||||
return self.bottom
|
||||
|
||||
@property
|
||||
def lower_left(self) -> Tuple[decimal.Decimal, decimal.Decimal]:
|
||||
"""
|
||||
Property to read and modify the lower left coordinate of this box
|
||||
in (x,y) form.
|
||||
"""
|
||||
return self.left, self.bottom
|
||||
|
||||
@lower_left.setter
|
||||
def lower_left(self, value: List[Any]) -> None:
|
||||
self[0], self[1] = (self._ensure_is_number(x) for x in value)
|
||||
|
||||
@property
|
||||
def lower_right(self) -> Tuple[decimal.Decimal, decimal.Decimal]:
|
||||
"""
|
||||
Property to read and modify the lower right coordinate of this box
|
||||
in (x,y) form.
|
||||
"""
|
||||
return self.right, self.bottom
|
||||
|
||||
@lower_right.setter
|
||||
def lower_right(self, value: List[Any]) -> None:
|
||||
self[2], self[1] = (self._ensure_is_number(x) for x in value)
|
||||
|
||||
@property
|
||||
def upper_left(self) -> Tuple[decimal.Decimal, decimal.Decimal]:
|
||||
"""
|
||||
Property to read and modify the upper left coordinate of this box
|
||||
in (x,y) form.
|
||||
"""
|
||||
return self.left, self.top
|
||||
|
||||
@upper_left.setter
|
||||
def upper_left(self, value: List[Any]) -> None:
|
||||
self[0], self[3] = (self._ensure_is_number(x) for x in value)
|
||||
|
||||
@property
|
||||
def upper_right(self) -> Tuple[decimal.Decimal, decimal.Decimal]:
|
||||
"""
|
||||
Property to read and modify the upper right coordinate of this box
|
||||
in (x,y) form.
|
||||
"""
|
||||
return self.right, self.top
|
||||
|
||||
@upper_right.setter
|
||||
def upper_right(self, value: List[Any]) -> None:
|
||||
self[2], self[3] = (self._ensure_is_number(x) for x in value)
|
||||
|
||||
def getLowerLeft(
|
||||
self,
|
||||
) -> Tuple[decimal.Decimal, decimal.Decimal]: # pragma: no cover
|
||||
deprecation_with_replacement("getLowerLeft", "lower_left", "3.0.0")
|
||||
return self.lower_left
|
||||
|
||||
def getLowerRight(
|
||||
self,
|
||||
) -> Tuple[decimal.Decimal, decimal.Decimal]: # pragma: no cover
|
||||
deprecation_with_replacement("getLowerRight", "lower_right", "3.0.0")
|
||||
return self.lower_right
|
||||
|
||||
def getUpperLeft(
|
||||
self,
|
||||
) -> Tuple[decimal.Decimal, decimal.Decimal]: # pragma: no cover
|
||||
deprecation_with_replacement("getUpperLeft", "upper_left", "3.0.0")
|
||||
return self.upper_left
|
||||
|
||||
def getUpperRight(
|
||||
self,
|
||||
) -> Tuple[decimal.Decimal, decimal.Decimal]: # pragma: no cover
|
||||
deprecation_with_replacement("getUpperRight", "upper_right", "3.0.0")
|
||||
return self.upper_right
|
||||
|
||||
def setLowerLeft(self, value: Tuple[float, float]) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("setLowerLeft", "lower_left", "3.0.0")
|
||||
self.lower_left = value # type: ignore
|
||||
|
||||
def setLowerRight(self, value: Tuple[float, float]) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("setLowerRight", "lower_right", "3.0.0")
|
||||
self[2], self[1] = (self._ensure_is_number(x) for x in value)
|
||||
|
||||
def setUpperLeft(self, value: Tuple[float, float]) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("setUpperLeft", "upper_left", "3.0.0")
|
||||
self[0], self[3] = (self._ensure_is_number(x) for x in value)
|
||||
|
||||
def setUpperRight(self, value: Tuple[float, float]) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("setUpperRight", "upper_right", "3.0.0")
|
||||
self[2], self[3] = (self._ensure_is_number(x) for x in value)
|
||||
|
||||
@property
|
||||
def width(self) -> decimal.Decimal:
|
||||
return self.right - self.left
|
||||
|
||||
def getWidth(self) -> decimal.Decimal: # pragma: no cover
|
||||
deprecation_with_replacement("getWidth", "width", "3.0.0")
|
||||
return self.width
|
||||
|
||||
@property
|
||||
def height(self) -> decimal.Decimal:
|
||||
return self.top - self.bottom
|
||||
|
||||
def getHeight(self) -> decimal.Decimal: # pragma: no cover
|
||||
deprecation_with_replacement("getHeight", "height", "3.0.0")
|
||||
return self.height
|
||||
|
||||
@property
|
||||
def lowerLeft(self) -> Tuple[decimal.Decimal, decimal.Decimal]: # pragma: no cover
|
||||
deprecation_with_replacement("lowerLeft", "lower_left", "3.0.0")
|
||||
return self.lower_left
|
||||
|
||||
@lowerLeft.setter
|
||||
def lowerLeft(
|
||||
self, value: Tuple[decimal.Decimal, decimal.Decimal]
|
||||
) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("lowerLeft", "lower_left", "3.0.0")
|
||||
self.lower_left = value
|
||||
|
||||
@property
|
||||
def lowerRight(self) -> Tuple[decimal.Decimal, decimal.Decimal]: # pragma: no cover
|
||||
deprecation_with_replacement("lowerRight", "lower_right", "3.0.0")
|
||||
return self.lower_right
|
||||
|
||||
@lowerRight.setter
|
||||
def lowerRight(
|
||||
self, value: Tuple[decimal.Decimal, decimal.Decimal]
|
||||
) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("lowerRight", "lower_right", "3.0.0")
|
||||
self.lower_right = value
|
||||
|
||||
@property
|
||||
def upperLeft(self) -> Tuple[decimal.Decimal, decimal.Decimal]: # pragma: no cover
|
||||
deprecation_with_replacement("upperLeft", "upper_left", "3.0.0")
|
||||
return self.upper_left
|
||||
|
||||
@upperLeft.setter
|
||||
def upperLeft(
|
||||
self, value: Tuple[decimal.Decimal, decimal.Decimal]
|
||||
) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("upperLeft", "upper_left", "3.0.0")
|
||||
self.upper_left = value
|
||||
|
||||
@property
|
||||
def upperRight(self) -> Tuple[decimal.Decimal, decimal.Decimal]: # pragma: no cover
|
||||
deprecation_with_replacement("upperRight", "upper_right", "3.0.0")
|
||||
return self.upper_right
|
||||
|
||||
@upperRight.setter
|
||||
def upperRight(
|
||||
self, value: Tuple[decimal.Decimal, decimal.Decimal]
|
||||
) -> None: # pragma: no cover
|
||||
deprecation_with_replacement("upperRight", "upper_right", "3.0.0")
|
||||
self.upper_right = value
|
||||
@@ -0,0 +1,172 @@
|
||||
import codecs
|
||||
from typing import Dict, List, Tuple, Union
|
||||
|
||||
from .._codecs import _pdfdoc_encoding
|
||||
from .._utils import StreamType, b_, logger_warning, read_non_whitespace
|
||||
from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfStreamError
|
||||
from ._base import ByteStringObject, TextStringObject
|
||||
|
||||
|
||||
def hex_to_rgb(value: str) -> Tuple[float, float, float]:
|
||||
return tuple(int(value.lstrip("#")[i : i + 2], 16) / 255.0 for i in (0, 2, 4)) # type: ignore
|
||||
|
||||
|
||||
def read_hex_string_from_stream(
|
||||
stream: StreamType,
|
||||
forced_encoding: Union[None, str, List[str], Dict[int, str]] = None,
|
||||
) -> Union["TextStringObject", "ByteStringObject"]:
|
||||
stream.read(1)
|
||||
txt = ""
|
||||
x = b""
|
||||
while True:
|
||||
tok = read_non_whitespace(stream)
|
||||
if not tok:
|
||||
raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
|
||||
if tok == b">":
|
||||
break
|
||||
x += tok
|
||||
if len(x) == 2:
|
||||
txt += chr(int(x, base=16))
|
||||
x = b""
|
||||
if len(x) == 1:
|
||||
x += b"0"
|
||||
if len(x) == 2:
|
||||
txt += chr(int(x, base=16))
|
||||
return create_string_object(b_(txt), forced_encoding)
|
||||
|
||||
|
||||
def read_string_from_stream(
|
||||
stream: StreamType,
|
||||
forced_encoding: Union[None, str, List[str], Dict[int, str]] = None,
|
||||
) -> Union["TextStringObject", "ByteStringObject"]:
|
||||
tok = stream.read(1)
|
||||
parens = 1
|
||||
txt = []
|
||||
while True:
|
||||
tok = stream.read(1)
|
||||
if not tok:
|
||||
raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
|
||||
if tok == b"(":
|
||||
parens += 1
|
||||
elif tok == b")":
|
||||
parens -= 1
|
||||
if parens == 0:
|
||||
break
|
||||
elif tok == b"\\":
|
||||
tok = stream.read(1)
|
||||
escape_dict = {
|
||||
b"n": b"\n",
|
||||
b"r": b"\r",
|
||||
b"t": b"\t",
|
||||
b"b": b"\b",
|
||||
b"f": b"\f",
|
||||
b"c": rb"\c",
|
||||
b"(": b"(",
|
||||
b")": b")",
|
||||
b"/": b"/",
|
||||
b"\\": b"\\",
|
||||
b" ": b" ",
|
||||
b"%": b"%",
|
||||
b"<": b"<",
|
||||
b">": b">",
|
||||
b"[": b"[",
|
||||
b"]": b"]",
|
||||
b"#": b"#",
|
||||
b"_": b"_",
|
||||
b"&": b"&",
|
||||
b"$": b"$",
|
||||
}
|
||||
try:
|
||||
tok = escape_dict[tok]
|
||||
except KeyError:
|
||||
if b"0" <= tok and tok <= b"7":
|
||||
# "The number ddd may consist of one, two, or three
|
||||
# octal digits; high-order overflow shall be ignored.
|
||||
# Three octal digits shall be used, with leading zeros
|
||||
# as needed, if the next character of the string is also
|
||||
# a digit." (PDF reference 7.3.4.2, p 16)
|
||||
for _ in range(2):
|
||||
ntok = stream.read(1)
|
||||
if b"0" <= ntok and ntok <= b"7":
|
||||
tok += ntok
|
||||
else:
|
||||
stream.seek(-1, 1) # ntok has to be analysed
|
||||
break
|
||||
tok = b_(chr(int(tok, base=8)))
|
||||
elif tok in b"\n\r":
|
||||
# This case is hit when a backslash followed by a line
|
||||
# break occurs. If it's a multi-char EOL, consume the
|
||||
# second character:
|
||||
tok = stream.read(1)
|
||||
if tok not in b"\n\r":
|
||||
stream.seek(-1, 1)
|
||||
# Then don't add anything to the actual string, since this
|
||||
# line break was escaped:
|
||||
tok = b""
|
||||
else:
|
||||
msg = rf"Unexpected escaped string: {tok.decode('utf8')}"
|
||||
logger_warning(msg, __name__)
|
||||
txt.append(tok)
|
||||
return create_string_object(b"".join(txt), forced_encoding)
|
||||
|
||||
|
||||
def create_string_object(
|
||||
string: Union[str, bytes],
|
||||
forced_encoding: Union[None, str, List[str], Dict[int, str]] = None,
|
||||
) -> Union[TextStringObject, ByteStringObject]:
|
||||
"""
|
||||
Create a ByteStringObject or a TextStringObject from a string to represent the string.
|
||||
|
||||
:param Union[str, bytes] string: A string
|
||||
|
||||
:raises TypeError: If string is not of type str or bytes.
|
||||
"""
|
||||
if isinstance(string, str):
|
||||
return TextStringObject(string)
|
||||
elif isinstance(string, bytes):
|
||||
if isinstance(forced_encoding, (list, dict)):
|
||||
out = ""
|
||||
for x in string:
|
||||
try:
|
||||
out += forced_encoding[x]
|
||||
except Exception:
|
||||
out += bytes((x,)).decode("charmap")
|
||||
return TextStringObject(out)
|
||||
elif isinstance(forced_encoding, str):
|
||||
if forced_encoding == "bytes":
|
||||
return ByteStringObject(string)
|
||||
return TextStringObject(string.decode(forced_encoding))
|
||||
else:
|
||||
try:
|
||||
if string.startswith(codecs.BOM_UTF16_BE):
|
||||
retval = TextStringObject(string.decode("utf-16"))
|
||||
retval.autodetect_utf16 = True
|
||||
return retval
|
||||
else:
|
||||
# This is probably a big performance hit here, but we need to
|
||||
# convert string objects into the text/unicode-aware version if
|
||||
# possible... and the only way to check if that's possible is
|
||||
# to try. Some strings are strings, some are just byte arrays.
|
||||
retval = TextStringObject(decode_pdfdocencoding(string))
|
||||
retval.autodetect_pdfdocencoding = True
|
||||
return retval
|
||||
except UnicodeDecodeError:
|
||||
return ByteStringObject(string)
|
||||
else:
|
||||
raise TypeError("create_string_object should have str or unicode arg")
|
||||
|
||||
|
||||
def decode_pdfdocencoding(byte_array: bytes) -> str:
|
||||
retval = ""
|
||||
for b in byte_array:
|
||||
c = _pdfdoc_encoding[b]
|
||||
if c == "\u0000":
|
||||
raise UnicodeDecodeError(
|
||||
"pdfdocencoding",
|
||||
bytearray(b),
|
||||
-1,
|
||||
-1,
|
||||
"does not exist in translation table",
|
||||
)
|
||||
retval += c
|
||||
return retval
|
||||
Reference in New Issue
Block a user