#include <cpage.h>
Public Types | |
| typedef CPageFonts::FontList | FontList |
| typedef CPageAnnots::Annotations | Annotations |
| typedef observer::BasicChangeContext < CPage > | BasicObserverContext |
| typedef std::vector < boost::shared_ptr < ICPageModule > > | Modules |
Public Member Functions | |
| CPage (boost::shared_ptr< CDict > &pageDict) | |
| ~CPage () | |
| bool | operator== (const CPage &page) |
| void | invalidate () |
| boost::shared_ptr< CDict > | getDictionary () const |
| size_t | getPagePosition () const |
| template<typename T > | |
| void | getAllAnnotations (T &container) const |
| void | addAnnotation (boost::shared_ptr< CAnnotation > annot) |
| bool | delAnnotation (boost::shared_ptr< CAnnotation > annot) |
| boost::shared_ptr< CContentStream > | getContentStream (CContentStream *cc) |
| template<typename Container > | |
| void | getContentStreams (Container &container) |
| template<typename OpContainer > | |
| void | getObjectsAtPosition (OpContainer &opContainer, const libs::Rectangle &rc) |
| template<typename OpContainer > | |
| void | getObjectsAtPosition (OpContainer &opContainer, const Point &pt) |
| template<typename OpContainer , typename PositionComparator > | |
| void | getObjectsAtPosition (OpContainer &opContainer, PositionComparator cmp) |
| template<typename WordEngine , typename LineEngine , typename ColumnEngine > | |
| void | convert (textoutput::OutputBuilder &out) |
| void | getFontIdsAndNames (FontList &cont) const |
| std::string | addSystemType1Font (const std::string &fontname, bool winansienc=true) |
| int | getRotation () const |
| void | setRotation (int rot) |
| libs::Rectangle | getMediabox () const |
| void | setMediabox (const libs::Rectangle &rc) |
| void | setTransformMatrix (double tm[6]) |
| void | setDisplayParams (const DisplayParams &dp) |
| void | displayPage (::OutputDev &out, const DisplayParams ¶ms, int x=-1, int y=-1, int w=-1, int h=-1) |
| void | displayPage (::OutputDev &out, int x=-1, int y=-1, int w=-1, int h=-1) |
| void | displayPage (::OutputDev &out, boost::shared_ptr< CDict > dict=boost::shared_ptr< CDict >(), int x=-1, int y=-1, int w=-1, int h=-1) const |
| template<typename Container > | |
| void | addContentStreamToFront (const Container &cont) |
| template<typename Container > | |
| void | addContentStreamToBack (const Container &cont) |
| void | removeContentStream (size_t csnum) |
| void | getText (std::string &text, const std::string *encoding=NULL, const libs::Rectangle *rc=NULL) const |
| template<typename RectangleContainer > | |
| size_t | findText (std::string text, RectangleContainer &recs, const TextSearchParams ¶ms=TextSearchParams()) const |
| void | moveAbove (boost::shared_ptr< const CContentStream > ct) |
| void | moveAbove (size_t pos) |
| void | moveBelow (boost::shared_ptr< const CContentStream > ct) |
| void | moveBelow (size_t pos) |
| boost::shared_ptr< CContentStream > | getChange (size_t nthchange=0) const |
| template<typename Container > | |
| void | getChanges (Container &cont) const |
| size_t | getChangeCount () const |
| template<typename Container > | |
| void | displayChange (::OutputDev &out, const Container &cont) const |
| void | displayChange (::OutputDev &out, const std::vector< size_t > cs) const |
| void | replaceText (const std::string &what, const std::string &with) |
| void | addText (const std::string &what, const libs::Point &where, const std::string &font_id) |
| void | addInlineImage (const CStream::Buffer &what, const libs::Point &dim, const libs::Point &where) |
| bool | isValid () const |
Private Member Functions | |
| boost::shared_ptr< CPageContents > | contents () const |
| boost::shared_ptr< CPageDisplay > | display () const |
| boost::shared_ptr< CPageFonts > | fonts () const |
| boost::shared_ptr< CPageChanges > | changes () const |
| boost::shared_ptr< CPageAnnots > | annotations () const |
| void | _objectChanged (bool invalid=false) |
| bool | _check_validity (const char *err=NULL) const |
Private Attributes | |
| boost::shared_ptr< CDict > | _dict |
| bool | _valid |
| Modules | _modules |
| boost::shared_ptr< CPageContents > | _contents |
| boost::shared_ptr< CPageDisplay > | _display |
| boost::shared_ptr< CPageFonts > | _fonts |
| boost::shared_ptr< CPageChanges > | _changes |
| boost::shared_ptr< CPageAnnots > | _annots |
Friends | |
| class | CPageContents |
| class | CPageDisplay |
| class | CPageFonts |
| class | CPageChanges |
This object represents page object from pdf specification v1.5. Pdf page object is a dictionary reachable from page tree structure with several required properties. It is responsible just for one single page.
Every pdf page contains all information required for displaying the page (e.g. page metrics, page contents etc.) Page properties can be inherited from its parent in the page tree. The first encountered during page tree traversal is used. This feature can cause problems because it is no well defined what does it mean to change a property that is inherited (it is not present in the page dictionary but in a parent)
We display a page using xpdf code. The argument to this function is an output device which can draw graphical objects. The contents of a page is specified by a "Contents" entry in the page dictionary. If empty the page is blank.
CPage is a subject that can be observed. This is important when a change leads to content stream reparsing (e.g. deleting an entry from "Contents" property in the page dictionary)
Content stream consists of a sequence of operators which should be processed sequentially. The operators define what is really on a page. The pdf specification is too general about pdf operators and that is why working with operators is difficult. According to pdf specification text is split neither to sentences nor words. Letters of a word can occur randomly in the content stream because the position of a letter (text) is absolute. (e.g. it is very likely that a word "humor" will be split into "hu" "m" "or" because of the "m" beeing wider than other letters.) This makes searching and exporting page text a problem. We use xpdf code to perform both actions. Xpdf parses a page to lines and words with a rough approuch when a more letters are claimed as one word when they are close enough. This algorithm is working ok for normal pdf files, but if the pdf creator would like to disable text exporting it could produce such sequence of pdfoperators, that hardly any program could export text correctly.
Pdf operators are in one or more streams. Problem with this approach is that these operators can be split into streams at almost arbitrary place.
Processing pdf operators can be very expensive so they are parsed only on demand. Each operator can be placed in a bounding box. These bounding boxes are used when searching the page for a text, selecting objects, drawing the page.
Each page content stream is a selfcontained entity that can not use resources defined in another page. It can use only inherited resources from a parent in the page tree. Which means we can not simply change fonts on a page to match another page, use images from another page etc.
Type of page observer context.
| typedef std::vector<boost::shared_ptr<ICPageModule> > pdfobjects::CPage::Modules |
| pdfobjects::CPage::CPage | ( | boost::shared_ptr< CDict > & | pageDict | ) |
Constructor.
| pageDict | Dictionary representing pdf page. |
References _annots, _changes, _contents, _dict, _display, _fonts, _modules, pdfobjects::Specification::Page::CONTENTS, CPageChanges, CPageContents, CPageDisplay, CPageFonts, debug::DBG_DBG, kernelPrintDbg, and pdfobjects::CPageAttributes::setInheritable().
| pdfobjects::CPage::~CPage | ( | ) |
Destructor.
References _annots, _changes, _contents, _dict, _display, _fonts, _modules, debug::DBG_INFO, invalidate(), and kernelPrintDbg.
| bool pdfobjects::CPage::_check_validity | ( | const char * | err = NULL |
) | const [inline, private] |
References _dict, _valid, debug::DBG_ERR, pdfobjects::hasValidPdf(), pdfobjects::hasValidRef(), and kernelPrintDbg.
Referenced by addInlineImage(), addText(), getObjectsAtPosition(), and replaceText().
| void pdfobjects::CPage::_objectChanged | ( | bool | invalid = false |
) | [private] |
References _dict, pdfobjects::hasValidPdf(), pdfobjects::hasValidRef(), and observer::ObserverHandler< CPage >::notifyObservers().
Referenced by pdfobjects::CPageContents::change(), and invalidate().
| void pdfobjects::CPage::addAnnotation | ( | boost::shared_ptr< CAnnotation > | annot | ) | [inline] |
Adds new annotation to this page.
Inserts deep copy of given annotation and stores its reference to Annots array in page dictionary (if this doesn't exist, it is created). User has to call getAllAnnotations to get current annotations state (we don't have identifier for annotations - there are some mechanisms how to do it according pdf specification, but there is no explicit identifier).
Given annotation may come from different CPdf or may belong to nowhere.
As a result _annotations is updated. New indirect object representing annotation dictionary is added to same pdf (dictionary is same as given one except P field is updated to contain correct reference to this page).
Note that this page must belong to pdf and has to have valid indirect reference. This is neccessary, because annotation is indirect object page keeps reference to it. Reference without pdf doesn't make sense.
| annot | Annotation to add. |
| CObjInvalidObject | if this page doesn't have valid pdf or indirect reference. | |
| ElementBadTypeException | if Annots field from page dictionary is not an array (or reference with array indirect target). |
References _annots.
| void pdfobjects::CPage::addContentStreamToBack | ( | const Container & | cont | ) | [inline] |
Add new content stream to the back. This function adds new entry in the "Contents" property of a page. The container of provided operators must form a valid contentstream. This function should be used when supplied operators should be handled at the end e.g. should be drawn at the end which means they will appear "above" other objects.
This function can be used to separate our changes from original content stream.
| cont | Container of operators to add. |
References _contents.
| void pdfobjects::CPage::addContentStreamToFront | ( | const Container & | cont | ) | [inline] |
Add new content stream to the front. This function adds new entry in the "Contents" property of a page. The container of provided operators must form a valid contentstream. This function should be used when supplied operators should be handled at the beginning end e.g. should be drawn first which means they will appear the "below" other object.
This function can be used to separate our changes from original content stream.
| cont | Container of operators to add. |
References _contents.
| void pdfobjects::CPage::addInlineImage | ( | const CStream::Buffer & | what, | |
| const libs::Point & | dim, | |||
| const libs::Point & | where | |||
| ) | [inline] |
References _check_validity(), and _contents.
| std::string pdfobjects::CPage::addSystemType1Font | ( | const std::string & | fontname, | |
| bool | winansienc = true | |||
| ) |
Add new simple type 1 font item to the page resource dictionary.
The id of this font is arbitrary but it has to be unique. It will be generated as PDFEDIT_F#, where # is the lowest free number so that name is unique.
We supposed that the font name is a standard system font avaliable to all viewers.
| fontname | Name of the font to add. | |
| winansienc | Set encoding to standard WinAnsiEnconding. |
References _fonts.
| void pdfobjects::CPage::addText | ( | const std::string & | what, | |
| const libs::Point & | where, | |||
| const std::string & | font_id | |||
| ) | [inline] |
Adds text to specified position.
References _check_validity(), and _contents.
| boost::shared_ptr<CPageAnnots> pdfobjects::CPage::annotations | ( | ) | const [inline, private] |
Returns the annotation module.
References _annots.
| boost::shared_ptr<CPageChanges> pdfobjects::CPage::changes | ( | ) | const [inline, private] |
Returns the changes module.
References _changes.
| boost::shared_ptr<CPageContents> pdfobjects::CPage::contents | ( | ) | const [inline, private] |
Returns the contents module.
References _contents.
Referenced by pdfobjects::CPageChanges::displayChange(), pdfobjects::CPageChanges::getChanges(), pdfobjects::CPageContents::moveAbove(), pdfobjects::CPageContents::moveBelow(), pdfobjects::CPageDisplay::setDisplayParams(), and pdfobjects::CPageDisplay::setTransformMatrix().
| void pdfobjects::CPage::convert | ( | textoutput::OutputBuilder & | out | ) | [inline] |
Get text source of a page.
References _contents.
| bool pdfobjects::CPage::delAnnotation | ( | boost::shared_ptr< CAnnotation > | annot | ) | [inline] |
Removes given annotation from page.
Tries to find given annotation and if found, removes it.
As a result, the removed annotation is invalidated and not accessible. User has to call getAllAnnotations method to get current state (the same way as in addAnnotation case).
| annot | Annotation to remove. |
References _annots.
| boost::shared_ptr<CPageDisplay> pdfobjects::CPage::display | ( | ) | const [inline, private] |
Returns the display module.
References _display.
Referenced by pdfobjects::CPageContents::_xpdf_display_params(), pdfobjects::CPageFonts::addSystemType1Font(), pdfobjects::CPageChanges::displayChange(), and pdfobjects::CPageContents::findText().
| void pdfobjects::CPage::displayChange | ( | ::OutputDev & | out, | |
| const std::vector< size_t > | cs | |||
| ) | const [inline] |
References _changes.
| void pdfobjects::CPage::displayChange | ( | ::OutputDev & | out, | |
| const Container & | cont | |||
| ) | const [inline] |
Draw nth change on an output device with last used display parameters.
| out | Output device. | |
| cont | Container of content streams to display |
References _changes.
| void pdfobjects::CPage::displayPage | ( | ::OutputDev & | out, | |
| boost::shared_ptr< CDict > | dict = boost::shared_ptr<CDict> (), |
|||
| int | x = -1, |
|||
| int | y = -1, |
|||
| int | w = -1, |
|||
| int | h = -1 | |||
| ) | const |
Draw page on an output device with last used display parameters.
| out | Output device. | |
| dict | If not null, page is created from dict otherwise this page dictionary is used. But still some information is gathered from this page dictionary. |
References _display.
| void pdfobjects::CPage::displayPage | ( | ::OutputDev & | out, | |
| int | x = -1, |
|||
| int | y = -1, |
|||
| int | w = -1, |
|||
| int | h = -1 | |||
| ) |
Draw page on an output device. Use old display params.
References _display.
| void pdfobjects::CPage::displayPage | ( | ::OutputDev & | out, | |
| const DisplayParams & | params, | |||
| int | x = -1, |
|||
| int | y = -1, |
|||
| int | w = -1, |
|||
| int | h = -1 | |||
| ) |
Draw page on an output device.
We use xpdf code to draw a page. It uses insane global parameters and many local parameters.
| out | Output device. | |
| params | Display parameters. |
References _display.
| size_t pdfobjects::CPage::findText | ( | std::string | text, | |
| RectangleContainer & | recs, | |||
| const TextSearchParams & | params = TextSearchParams() | |||
| ) | const [inline] |
Find all occurences of a text on this page.
It uses xpdf TextOutputDevice to get the bounding box of found text.
| text | Text to find. | |
| recs | Output container of rectangles of all occurences of the text. | |
| params | Search parameters. |
References _contents.
| boost::shared_ptr<CPageFonts> pdfobjects::CPage::fonts | ( | ) | const [inline, private] |
Returns the fonts module.
References _fonts.
| void pdfobjects::CPage::getAllAnnotations | ( | T & | container | ) | const [inline] |
Fills given container with all page's annotations.
Copies _annotations content to given container (which is cleared at first).
Given container must support clear and insert operations and store shared_ptr<CAnnotation$gt; elements.
| container | Container which is filled in. |
References _annots.
| boost::shared_ptr<CContentStream> pdfobjects::CPage::getChange | ( | size_t | nthchange = 0 |
) | const [inline] |
Get n-th change. Higher change means older change.
References _changes.
| size_t pdfobjects::CPage::getChangeCount | ( | ) | const [inline] |
Get count of our changes.
References _changes.
| void pdfobjects::CPage::getChanges | ( | Container & | cont | ) | const [inline] |
Get our changes sorted. The first change is the last change. If there are no changes container is empty.
References _changes.
| boost::shared_ptr<CContentStream> pdfobjects::CPage::getContentStream | ( | CContentStream * | cc | ) | [inline] |
Returns shared pointer to the specified content stream.
References _contents.
| void pdfobjects::CPage::getContentStreams | ( | Container & | container | ) | [inline] |
Fills container with contents streams.
References _contents.
Referenced by pdfobjects::CPageFonts::addSystemType1Font().
| boost::shared_ptr<CDict> pdfobjects::CPage::getDictionary | ( | ) | const [inline] |
Returns the dictionary representing this object.
References _dict.
Referenced by pdfobjects::CPageAnnots::add(), pdfobjects::CPageFonts::addSystemType1Font(), pdfobjects::CPageAnnots::CPageAnnots(), pdfobjects::CPageContents::CPageContents(), pdfobjects::CPageAnnots::del(), pdfobjects::CPageChanges::displayChange(), pdfobjects::CPageDisplay::displayPage(), pdfobjects::CPageFonts::getFontIdsAndNames(), pdfobjects::CPageDisplay::getMediabox(), pdfobjects::CPageDisplay::getRotation(), pdfobjects::CPageDisplay::setMediabox(), pdfobjects::CPageDisplay::setRotation(), and pdfobjects::CPageAnnots::unreg_observers().
| void pdfobjects::CPage::getFontIdsAndNames | ( | FontList & | cont | ) | const |
Get all font ids and base names that are in the resource dictionary of a page.
The resource can be inherited from a parent in the page tree dictionary. Base names should be human readable or at least standard system fonts defined in the pdf specification. We must choose from these items to make a font change valid. Otherwise, we have to add standard system font or manually a font object.
| cont | Output container of font id and basename pairs (FontList container type should be prefered). |
References _fonts.
| libs::Rectangle pdfobjects::CPage::getMediabox | ( | ) | const |
Return media box of this page.
It is a required item in page dictionary (spec p.119) but can be inherited from a parent in the page tree.
References _display.
Referenced by pdfobjects::CPageDisplay::setDisplayParams().
| void pdfobjects::CPage::getObjectsAtPosition | ( | OpContainer & | opContainer, | |
| PositionComparator | cmp | |||
| ) | [inline] |
Get pdf operators at specified position. This call will be delegated to content stream object.
| opContainer | Operator container where operators in specified are wil be stored. | |
| cmp | Null if default kernel area comparator should be used otherwise points to an object which will decide whether an operator is "near" a point. |
References _check_validity(), and _contents.
| void pdfobjects::CPage::getObjectsAtPosition | ( | OpContainer & | opContainer, | |
| const Point & | pt | |||
| ) | [inline] |
Get pdf operators at position specified by point.
References getObjectsAtPosition().
Referenced by getObjectsAtPosition().
| void pdfobjects::CPage::getObjectsAtPosition | ( | OpContainer & | opContainer, | |
| const libs::Rectangle & | rc | |||
| ) | [inline] |
Get pdf operators at position specified by rectangle.
References getObjectsAtPosition().
Referenced by getObjectsAtPosition().
| size_t pdfobjects::CPage::getPagePosition | ( | ) | const |
Returns page position.
References _dict, and pdfobjects::hasValidPdf().
Referenced by pdfobjects::CPageContents::_page_pos().
| int pdfobjects::CPage::getRotation | ( | ) | const |
Returns rotation in degrees.
References _display.
| void pdfobjects::CPage::getText | ( | std::string & | text, | |
| const std::string * | encoding = NULL, |
|||
| const libs::Rectangle * | rc = NULL | |||
| ) | const [inline] |
Returns plain text extracted from a page using xpdf code.
| text | Output string where the text will be saved. | |
| encoding | Encoding format. | |
| rc | Rectangle from which to extract the text. |
References _contents.
| void pdfobjects::CPage::invalidate | ( | ) |
Inform all obsevers that this page is not valid.
References _objectChanged(), and _valid.
Referenced by ~CPage().
| bool pdfobjects::CPage::isValid | ( | ) | const [inline] |
Returns page object valid flag value.
References _valid.
| void pdfobjects::CPage::moveAbove | ( | size_t | pos | ) | [inline] |
References _contents.
| void pdfobjects::CPage::moveAbove | ( | boost::shared_ptr< const CContentStream > | ct | ) | [inline] |
Move contentstream up one level. Which means it will be repainted by less objects.
References _contents.
| void pdfobjects::CPage::moveBelow | ( | size_t | pos | ) | [inline] |
References _contents.
| void pdfobjects::CPage::moveBelow | ( | boost::shared_ptr< const CContentStream > | ct | ) | [inline] |
Move contentstream below one level. Which means it will be repainted by more objects.
References _contents.
| bool pdfobjects::CPage::operator== | ( | const CPage & | page | ) | [inline] |
Equality operator.
| page | Another page object. |
| void pdfobjects::CPage::removeContentStream | ( | size_t | csnum | ) | [inline] |
Remove content stream. This function removes all objects from "Contents" entry which form specified contentstream.
| csnum | Number of content stream to remove. |
References _contents.
| void pdfobjects::CPage::replaceText | ( | const std::string & | what, | |
| const std::string & | with | |||
| ) | [inline] |
Replaces text in the whole page.
References _check_validity(), and _contents.
| void pdfobjects::CPage::setDisplayParams | ( | const DisplayParams & | dp | ) |
Set display params.
References _display.
| void pdfobjects::CPage::setMediabox | ( | const libs::Rectangle & | rc | ) |
Seta media box of this page.
References _display.
| void pdfobjects::CPage::setRotation | ( | int | rot | ) |
Sets rotation in degrees.
References _display.
| void pdfobjects::CPage::setTransformMatrix | ( | double | tm[6] | ) |
Set transform matrix of a page. This operator will be preceding first cm operator (see pdf specification), if not found it will be the first operator.
| tm | Six number representing transform matrix. |
References _display.
friend class CPageChanges [friend] |
Referenced by CPage().
friend class CPageContents [friend] |
Referenced by CPage().
friend class CPageDisplay [friend] |
Referenced by CPage().
friend class CPageFonts [friend] |
Referenced by CPage().
boost::shared_ptr<CPageAnnots> pdfobjects::CPage::_annots [private] |
Object managing annotations.
Referenced by addAnnotation(), annotations(), CPage(), delAnnotation(), getAllAnnotations(), and ~CPage().
boost::shared_ptr<CPageChanges> pdfobjects::CPage::_changes [private] |
Object managing changes.
Referenced by changes(), CPage(), displayChange(), getChange(), getChangeCount(), getChanges(), and ~CPage().
boost::shared_ptr<CPageContents> pdfobjects::CPage::_contents [private] |
Object managing Contents entry.
Referenced by addContentStreamToBack(), addContentStreamToFront(), addInlineImage(), addText(), contents(), convert(), CPage(), findText(), getContentStream(), getContentStreams(), getObjectsAtPosition(), getText(), moveAbove(), moveBelow(), removeContentStream(), replaceText(), and ~CPage().
boost::shared_ptr<CDict> pdfobjects::CPage::_dict [private] |
Pdf dictionary representing a page.
Referenced by _check_validity(), _objectChanged(), CPage(), getDictionary(), getPagePosition(), and ~CPage().
boost::shared_ptr<CPageDisplay> pdfobjects::CPage::_display [private] |
Object managing Contents entry.
Referenced by CPage(), display(), displayPage(), getMediabox(), getRotation(), setDisplayParams(), setMediabox(), setRotation(), setTransformMatrix(), and ~CPage().
boost::shared_ptr<CPageFonts> pdfobjects::CPage::_fonts [private] |
Object managing Contents entry.
Referenced by addSystemType1Font(), CPage(), fonts(), getFontIdsAndNames(), and ~CPage().
Modules pdfobjects::CPage::_modules [private] |
bool pdfobjects::CPage::_valid [private] |
Is page valid. Modifications of page object are not allowed and no observers can be registered on this object if the flag is false.
Referenced by _check_validity(), invalidate(), and isValid().