[go: nahoru, domu]

blob: 31c2ffefa51485d53ff0d5fde0ab046bc949898a [file] [log] [blame]
Avi Drissmandb497b32022-09-15 19:47:281// Copyright 2010 The Chromium Authors
jam@chromium.org1b1e9eff2014-05-20 01:56:402// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef PDF_PDFIUM_PDFIUM_PAGE_H_
6#define PDF_PDFIUM_PDFIUM_PAGE_H_
7
Pratish Kumar8f9d6d6a2019-08-29 03:26:298#include <map>
9#include <set>
jam@chromium.org1b1e9eff2014-05-20 01:56:4010#include <string>
11#include <vector>
12
Arthur Sonzogni59ac8222023-11-10 09:46:5413#include <optional>
Avi Drissman821ca3092023-01-11 22:42:1514#include "base/functional/callback.h"
15#include "base/functional/callback_forward.h"
Virender Singh5bee21042019-08-13 07:10:0116#include "base/gtest_prod_util.h"
Keishi Hattori0e45c022021-11-27 09:25:5217#include "base/memory/raw_ptr.h"
K Moon9a62bf42019-08-07 20:05:3618#include "pdf/page_orientation.h"
Henrique Nakashimafd7edf2a2017-11-29 22:03:4919#include "pdf/pdf_engine.h"
Tom Sepezb0048f12018-05-14 22:56:4720#include "third_party/pdfium/public/cpp/fpdf_scopers.h"
tsepez350243562015-05-12 01:08:4521#include "third_party/pdfium/public/fpdf_doc.h"
22#include "third_party/pdfium/public/fpdf_formfill.h"
23#include "third_party/pdfium/public/fpdf_text.h"
Nektarios Paisios87429712022-03-15 06:10:5324#include "third_party/skia/include/core/SkBitmap.h"
Henrique Nakashima1a49dbc2018-02-08 21:00:3325#include "ui/gfx/geometry/point_f.h"
Ankit Kumar 🌪️8cd111c2020-08-22 00:02:4626#include "ui/gfx/geometry/rect.h"
jam@chromium.org1b1e9eff2014-05-20 01:56:4027
Ankit Kumar 🌪️aecc9f92020-08-18 19:11:2228namespace gfx {
29class Point;
Ankit Kumar 🌪️1f081d802020-09-18 19:29:0130class RectF;
Ankit Kumar 🌪️aecc9f92020-08-18 19:11:2231} // namespace gfx
32
jam@chromium.org1b1e9eff2014-05-20 01:56:4033namespace chrome_pdf {
34
35class PDFiumEngine;
Daniel Hosseinian4222f88d2020-09-10 23:12:3836class Thumbnail;
Ankit Kumar 🌪️d6469b102021-02-12 16:54:0637struct AccessibilityHighlightInfo;
Ankit Kumar 🌪️76b1b722021-02-12 11:32:5938struct AccessibilityImageInfo;
Lei Zhang7399c252021-06-23 17:19:0239struct AccessibilityLinkInfo;
Ankit Kumar 🌪️371d77b2021-02-12 17:00:4040struct AccessibilityTextFieldInfo;
Ankit Kumar 🌪️c3421522020-12-03 16:27:3441struct AccessibilityTextRunInfo;
jam@chromium.org1b1e9eff2014-05-20 01:56:4042
43// Wrapper around a page from the document.
44class PDFiumPage {
45 public:
Lei Zhange250e8eb2023-09-19 20:13:2846 class ScopedUnloadPreventer {
47 public:
48 explicit ScopedUnloadPreventer(PDFiumPage* page);
49 ScopedUnloadPreventer(const ScopedUnloadPreventer& that);
50 ScopedUnloadPreventer& operator=(const ScopedUnloadPreventer& that);
51 ~ScopedUnloadPreventer();
52
53 private:
54 raw_ptr<PDFiumPage> page_;
55 };
56
K Moonb15a6a02019-08-30 01:18:4057 PDFiumPage(PDFiumEngine* engine, int i);
Hui Yingsteb451e42020-08-11 18:41:2458 PDFiumPage(const PDFiumPage&) = delete;
59 PDFiumPage& operator=(const PDFiumPage&) = delete;
Tom Sepezb0048f12018-05-14 22:56:4760 PDFiumPage(PDFiumPage&& that);
jam@chromium.org1b1e9eff2014-05-20 01:56:4061 ~PDFiumPage();
thestigccb5fc8f2016-01-05 05:32:5162
jam@chromium.org1b1e9eff2014-05-20 01:56:4063 // Unloads the PDFium data for this page from memory.
64 void Unload();
65 // Gets the FPDF_PAGE for this page, loading and parsing it if necessary.
66 FPDF_PAGE GetPage();
jam@chromium.org1b1e9eff2014-05-20 01:56:4067
68 // Returns FPDF_TEXTPAGE for the page, loading and parsing it if necessary.
69 FPDF_TEXTPAGE GetTextPage();
70
Lei Zhangf2aefe22019-08-27 22:08:0271 // See definition of PDFEngine::GetTextRunInfo().
Arthur Sonzogni59ac8222023-11-10 09:46:5472 std::optional<AccessibilityTextRunInfo> GetTextRunInfo(int start_char_index);
Lei Zhangae5fbcd2021-11-19 02:15:5573
dmazzonic3547a32016-06-02 05:47:1574 // Get a unicode character from the page.
75 uint32_t GetCharUnicode(int char_index);
Lei Zhangae5fbcd2021-11-19 02:15:5576
dmazzonid48d9322016-06-13 19:37:4277 // Get the bounds of a character in page pixels.
Ankit Kumar 🌪️1f081d802020-09-18 19:29:0178 gfx::RectF GetCharBounds(int char_index);
Lei Zhangae5fbcd2021-11-19 02:15:5579
80 // Get the bounds of the page with the crop box applied, in page pixels.
81 gfx::RectF GetCroppedRect();
82
Andy Phanfef93ce2023-04-06 00:18:2583 // Get the bounding box of the page in page pixels. The bounding box is the
84 // largest rectangle containing all visible content in the effective crop box.
85 // If the bounding box can't be calculated, returns the effective crop box.
86 // The resulting bounding box is relative to the effective crop box.
87 gfx::RectF GetBoundingBox();
88
Lei Zhangcb83ff12022-07-11 22:12:1389 // Returns if the character at `char_index` is within `page_bounds`.
90 bool IsCharInPageBounds(int char_index, const gfx::RectF& page_bounds);
91
Lei Zhang99ce91952019-11-18 20:57:5692 // For all the links on the page, get their urls, underlying text ranges and
93 // bounding boxes.
Ankit Kumar 🌪️ab90ffc2021-02-12 10:41:3394 std::vector<AccessibilityLinkInfo> GetLinkInfo(
95 const std::vector<AccessibilityTextRunInfo>& text_runs);
Nektarios Paisios87429712022-03-15 06:10:5396 // For all the images on the page, get their alt texts and bounding boxes. If
97 // the alt text is empty or unavailable, and if the user has requested that
98 // the OCR service tag the PDF so that it is made accessible, transfer the raw
99 // image pixels in the `image_data` field. Otherwise do not populate the
100 // `image_data` field.
Ankit Kumar 🌪️76b1b722021-02-12 11:32:59101 std::vector<AccessibilityImageInfo> GetImageInfo(uint32_t text_run_count);
Lei Zhangae5fbcd2021-11-19 02:15:55102
Kyungjun Lee8209e2c2023-08-11 23:06:47103 // Returns the image as a 32-bit bitmap format for OCR.
104 SkBitmap GetImageForOcr(int page_object_index);
105
Kalpak Tapasd99d6762019-12-03 09:12:43106 // For all the highlights on the page, get their underlying text ranges and
107 // bounding boxes.
Ankit Kumar 🌪️d6469b102021-02-12 16:54:06108 std::vector<AccessibilityHighlightInfo> GetHighlightInfo(
109 const std::vector<AccessibilityTextRunInfo>& text_runs);
Lei Zhangae5fbcd2021-11-19 02:15:55110
Mansi Awasthi3297cfb2020-03-17 19:19:32111 // For all the text fields on the page, get their properties like name,
112 // value, bounding boxes, etc.
Ankit Kumar 🌪️371d77b2021-02-12 17:00:40113 std::vector<AccessibilityTextFieldInfo> GetTextFieldInfo(
114 uint32_t text_run_count);
dmazzonic3547a32016-06-02 05:47:15115
jam@chromium.org1b1e9eff2014-05-20 01:56:40116 enum Area {
117 NONSELECTABLE_AREA,
drgagec32fae262017-06-24 00:17:49118 TEXT_AREA, // Area contains regular, selectable text not
119 // within form fields.
120 WEBLINK_AREA, // Area is a hyperlink.
121 DOCLINK_AREA, // Area is a link to a different part of the same
122 // document.
123 FORM_TEXT_AREA, // Area is a form text field or form combobox text
124 // field.
jam@chromium.org1b1e9eff2014-05-20 01:56:40125 };
126
127 struct LinkTarget {
Henrique Nakashima9d9e0632017-10-06 21:38:18128 LinkTarget();
129 LinkTarget(const LinkTarget& other);
130 ~LinkTarget();
131
132 // Valid for WEBLINK_AREA only.
133 std::string url;
134
135 // Valid for DOCLINK_AREA only.
136 int page;
Ankit Kumar 🌪️51cc8c492019-09-24 19:10:39137 // Valid for DOCLINK_AREA only. From the top-left of the page.
Arthur Sonzogni59ac8222023-11-10 09:46:54138 std::optional<float> x_in_pixels;
139 std::optional<float> y_in_pixels;
Ankit Kumar 🌪️495acdd2019-10-04 03:44:43140 // Valid for DOCLINK_AREA only.
Arthur Sonzogni59ac8222023-11-10 09:46:54141 std::optional<float> zoom;
jam@chromium.org1b1e9eff2014-05-20 01:56:40142 };
143
Daniel Hosseiniane257d962021-04-23 21:18:35144 // Given a `link_index`, returns the type of underlying area and the link
145 // target. `target` must be valid. Returns NONSELECTABLE_AREA if
146 // `link_index` is invalid.
Mansi Awasthi6f4aa4a2019-09-07 05:34:07147 Area GetLinkTargetAtIndex(int link_index, LinkTarget* target);
148
Badhri Ravikumar670c4362020-05-14 19:29:55149 // Returns link type and fills target associated with a link. Returns
150 // NONSELECTABLE_AREA if link detection failed.
151 Area GetLinkTarget(FPDF_LINK link, LinkTarget* target);
152
Hui Yingst5c0a05852021-01-19 04:23:39153 // Fills the output params with the in-page coordinates and the zoom value of
154 // the destination.
Ankit Kumar 🌪️495acdd2019-10-04 03:44:43155 void GetPageDestinationTarget(FPDF_DEST destination,
Arthur Sonzogni59ac8222023-11-10 09:46:54156 std::optional<float>* dest_x,
157 std::optional<float>* dest_y,
158 std::optional<float>* zoom_value);
Henrique Nakashima97f071c2018-01-11 19:56:02159
Hui Yingsta7fe60e42021-01-22 19:36:19160 // For a named destination with "XYZ" view fit type, pre-processes the in-page
Lei Zhang2065dd02021-03-24 19:59:28161 // x/y coordinate in case it's out of the range of the page dimension. Then
162 // transform it to a screen coordinate.
163 float PreProcessAndTransformInPageCoordX(float x);
164 float PreProcessAndTransformInPageCoordY(float y);
Hui Yingsta7fe60e42021-01-22 19:36:19165
Henrique Nakashima97f071c2018-01-11 19:56:02166 // Transforms an (x, y) position in page coordinates to screen coordinates.
Henrique Nakashima1a49dbc2018-02-08 21:00:33167 gfx::PointF TransformPageToScreenXY(const gfx::PointF& xy);
Henrique Nakashima9d9e0632017-10-06 21:38:18168
Hui Yingst5c0a05852021-01-19 04:23:39169 // Transforms an in-page x coordinate to its value in screen coordinates.
170 float TransformPageToScreenX(float x);
171
172 // Transforms an in-page y coordinate to its value in screen coordinates.
173 float TransformPageToScreenY(float y);
174
jam@chromium.org1b1e9eff2014-05-20 01:56:40175 // Given a point in the document that's in this page, returns its character
176 // index if it's near a character, and also the type of text.
177 // Target is optional. It will be filled in for WEBLINK_AREA or
178 // DOCLINK_AREA only.
Ankit Kumar 🌪️aecc9f92020-08-18 19:11:22179 Area GetCharIndex(const gfx::Point& point,
K Moon9a62bf42019-08-07 20:05:36180 PageOrientation orientation,
thestig98913ba2017-04-21 19:03:25181 int* char_index,
182 int* form_type,
183 LinkTarget* target);
jam@chromium.org1b1e9eff2014-05-20 01:56:40184
drgagec32fae262017-06-24 00:17:49185 // Converts a form type to its corresponding Area.
186 static Area FormTypeToArea(int form_type);
187
jam@chromium.org1b1e9eff2014-05-20 01:56:40188 // Gets the character at the given index.
Jan Wilken Dörriea17b04b2021-03-09 20:51:01189 char16_t GetCharAtIndex(int index);
jam@chromium.org1b1e9eff2014-05-20 01:56:40190
191 // Gets the number of characters in the page.
192 int GetCharCount();
193
Daniel Hosseiniane257d962021-04-23 21:18:35194 // Returns true if the given `char_index` lies within the character range
Mansi Awasthiabbdd542019-12-21 11:17:54195 // of the page.
196 bool IsCharIndexInBounds(int char_index);
197
Pratish Kumare70329732019-07-22 18:07:08198 // Given a rectangle in page coordinates, computes the range of continuous
199 // characters which lie inside that rectangle. Returns false without
200 // modifying the out parameters if no character lies inside the rectangle.
Ankit Kumar 🌪️1f081d802020-09-18 19:29:01201 bool GetUnderlyingTextRangeForRect(const gfx::RectF& rect,
Pratish Kumare70329732019-07-22 18:07:08202 int* start_index,
Lei Zhangcf993012019-08-22 18:24:23203 int* char_len);
Pratish Kumare70329732019-07-22 18:07:08204
jam@chromium.org1b1e9eff2014-05-20 01:56:40205 // Converts from page coordinates to screen coordinates.
Ankit Kumar 🌪️8cd111c2020-08-22 00:02:46206 gfx::Rect PageToScreen(const gfx::Point& page_point,
207 double zoom,
208 double left,
209 double top,
210 double right,
211 double bottom,
212 PageOrientation orientation) const;
jam@chromium.org1b1e9eff2014-05-20 01:56:40213
Daniel Hosseinian6ed110f2020-09-25 21:43:43214 // Sets the callbacks for sending the thumbnail.
215 void RequestThumbnail(float device_pixel_ratio,
216 SendThumbnailCallback send_callback);
217
Daniel Hosseiniane257d962021-04-23 21:18:35218 // Generates a page thumbnail accommodating a specific `device_pixel_ratio`.
Daniel Hosseinian4222f88d2020-09-10 23:12:38219 Thumbnail GenerateThumbnail(float device_pixel_ratio);
220
jam@chromium.org1b1e9eff2014-05-20 01:56:40221 int index() const { return index_; }
K Moonb15a6a02019-08-30 01:18:40222
Ankit Kumar 🌪️8cd111c2020-08-22 00:02:46223 const gfx::Rect& rect() const { return rect_; }
224 void set_rect(const gfx::Rect& r) { rect_ = r; }
K Moonb15a6a02019-08-30 01:18:40225
226 // Availability is a one-way transition: A page can become available, but it
227 // cannot become unavailable (unless deleted entirely).
jam@chromium.org1b1e9eff2014-05-20 01:56:40228 bool available() const { return available_; }
Daniel Hosseinian6ed110f2020-09-25 21:43:43229 void MarkAvailable();
K Moonb15a6a02019-08-30 01:18:40230
jam@chromium.org1b1e9eff2014-05-20 01:56:40231 void set_calculated_links(bool calculated_links) {
thestig98913ba2017-04-21 19:03:25232 calculated_links_ = calculated_links;
jam@chromium.org1b1e9eff2014-05-20 01:56:40233 }
234
Tom Sepezb0048f12018-05-14 22:56:47235 FPDF_PAGE page() const { return page_.get(); }
236 FPDF_TEXTPAGE text_page() const { return text_page_.get(); }
237
jam@chromium.org1b1e9eff2014-05-20 01:56:40238 private:
Virender Singh5bee21042019-08-13 07:10:01239 friend class PDFiumPageLinkTest;
Pratish Kumar7e49da292019-08-21 19:10:30240 friend class PDFiumTestBase;
241
Lei Zhang72b2a8aa22021-11-13 00:58:37242 FRIEND_TEST_ALL_PREFIXES(PDFiumPageButtonTest, PopulateButtons);
243 FRIEND_TEST_ALL_PREFIXES(PDFiumPageChoiceFieldTest, PopulateChoiceFields);
244 FRIEND_TEST_ALL_PREFIXES(PDFiumPageHighlightTest, PopulateHighlights);
245 FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, CalculateImages);
246 FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageTest, ImageAltText);
Nektarios Paisios87429712022-03-15 06:10:53247 FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageDataTest, ImageData);
Kyungjun Lee8209e2c2023-08-11 23:06:47248 FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageDataTest, ImageDataForNonImage);
Ramin Halavati6e0174a2023-08-07 06:05:13249 FRIEND_TEST_ALL_PREFIXES(PDFiumPageImageDataTest, RotatedPageImageData);
Lei Zhang72b2a8aa22021-11-13 00:58:37250 FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, AnnotLinkGeneration);
251 FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, GetLinkTarget);
Lei Zhang96e99c32022-12-09 05:29:46252 FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, GetUTF8LinkTarget);
Lei Zhang72b2a8aa22021-11-13 00:58:37253 FRIEND_TEST_ALL_PREFIXES(PDFiumPageLinkTest, LinkGeneration);
Ankit Kumar9eafc3a2020-04-24 22:23:06254 FRIEND_TEST_ALL_PREFIXES(PDFiumPageOverlappingTest, CountCompleteOverlaps);
Hui Yingstb5147062021-07-23 00:18:29255 FRIEND_TEST_ALL_PREFIXES(PDFiumPageOverlappingTest, CountPartialOverlaps);
Lei Zhang72b2a8aa22021-11-13 00:58:37256 FRIEND_TEST_ALL_PREFIXES(PDFiumPageTextFieldTest, PopulateTextFields);
Virender Singh5bee21042019-08-13 07:10:01257
jam@chromium.org1b1e9eff2014-05-20 01:56:40258 struct Link {
259 Link();
thestigfa6edbc72016-08-23 08:07:00260 Link(const Link& that);
jam@chromium.org1b1e9eff2014-05-20 01:56:40261 ~Link();
262
Virender Singh5bee21042019-08-13 07:10:01263 // Represents start index of underlying text range. Should be -1 if the link
264 // is not over text.
265 int32_t start_char_index = -1;
266 // Represents the number of characters that the link overlaps with.
267 int32_t char_count = 0;
Ankit Kumar 🌪️8cd111c2020-08-22 00:02:46268 std::vector<gfx::Rect> bounding_rects;
Mansi Awasthi9f374ba12019-09-10 18:59:09269 LinkTarget target;
jam@chromium.org1b1e9eff2014-05-20 01:56:40270 };
271
Pratish Kumar7e49da292019-08-21 19:10:30272 // Represents an Image inside the page.
273 struct Image {
274 Image();
275 Image(const Image& other);
276 ~Image();
277
Kyungjun Lee8209e2c2023-08-11 23:06:47278 // Index of the object in its page.
Nektarios Paisios87429712022-03-15 06:10:53279 int page_object_index;
Kyungjun Lee8209e2c2023-08-11 23:06:47280
Nektarios Paisios87429712022-03-15 06:10:53281 // Alt text is available only for PDFs that are tagged for accessibility.
Pratish Kumar8f9d6d6a2019-08-29 03:26:29282 std::string alt_text;
Nektarios Paisios87429712022-03-15 06:10:53283 gfx::Rect bounding_rect;
Pratish Kumar7e49da292019-08-21 19:10:30284 };
285
Kalpak Tapas6eb2e8d2019-11-22 10:49:59286 // Represents a highlight within the page.
287 struct Highlight {
288 Highlight();
289 Highlight(const Highlight& other);
290 ~Highlight();
291
292 // Start index of underlying text range. -1 indicates invalid value.
293 int32_t start_char_index = -1;
294 // Number of characters encompassed by this highlight.
295 int32_t char_count = 0;
Ankit Kumar 🌪️8cd111c2020-08-22 00:02:46296 gfx::Rect bounding_rect;
Ankit Kumar 🌪️221befc2020-01-30 07:13:21297
298 // Color of the highlight in ARGB. Alpha is stored in the first 8 MSBs. RGB
299 // follows after it with each using 8 bytes.
300 uint32_t color;
Ankit Kumare390bd62020-05-22 09:13:33301
302 // Text of the popup note associated with highlight.
303 std::string note_text;
Kalpak Tapas6eb2e8d2019-11-22 10:49:59304 };
305
Mansi Awasthicfc83f82020-06-22 17:37:52306 // Represents a form field within the page.
307 struct FormField {
308 FormField();
309 FormField(const FormField& other);
310 ~FormField();
311
Ankit Kumar 🌪️8cd111c2020-08-22 00:02:46312 gfx::Rect bounding_rect;
Mansi Awasthicfc83f82020-06-22 17:37:52313 // Represents the name of form field as defined in the field dictionary.
314 std::string name;
315 // Represents the flags of form field as defined in the field dictionary.
316 int flags;
317 };
318
Mansi Awasthid6afb932020-02-27 13:02:39319 // Represents a text field within the page.
Mansi Awasthicfc83f82020-06-22 17:37:52320 struct TextField : FormField {
Mansi Awasthid6afb932020-02-27 13:02:39321 TextField();
322 TextField(const TextField& other);
323 ~TextField();
324
Mansi Awasthid6afb932020-02-27 13:02:39325 std::string value;
Mansi Awasthid6afb932020-02-27 13:02:39326 };
327
Mansi Awasthib62d1d12020-06-23 01:34:34328 // Represents a choice field option.
329 struct ChoiceFieldOption {
330 ChoiceFieldOption();
331 ChoiceFieldOption(const ChoiceFieldOption& other);
332 ~ChoiceFieldOption();
333
334 std::string name;
335 bool is_selected;
336 };
337
338 // Represents a choice field within the page.
339 struct ChoiceField : FormField {
340 ChoiceField();
341 ChoiceField(const ChoiceField& other);
342 ~ChoiceField();
343
344 std::vector<ChoiceFieldOption> options;
345 };
346
Mansi Awasthi98c39cd2020-07-01 15:15:36347 // Represents a button within the page.
348 struct Button : FormField {
349 Button();
350 Button(const Button& other);
351 ~Button();
352
353 std::string value;
354 // A button can be of type radio, checkbox or push button.
355 int type;
356 // Represents if the radio button or checkbox is checked.
357 bool is_checked = false;
358 // Represents count of controls in the control group. A group of
359 // interactive form annotations is collectively called a form control
360 // group. Here an interactive form annotation should be either a radio
361 // button or a checkbox.
362 uint32_t control_count = 0;
363 // Represents index of the control in the control group. A group of
364 // interactive form annotations is collectively called a form control
365 // group. Here an interactive form annotation should be either a radio
Daniel Hosseiniane257d962021-04-23 21:18:35366 // button or a checkbox. Value of `control_index` is -1 for push button.
Mansi Awasthi98c39cd2020-07-01 15:15:36367 int control_index = -1;
368 };
369
Mansi Awasthi0850d412020-06-22 16:59:43370 // Returns a link index if the given character index is over a link, or -1
371 // otherwise.
372 int GetLink(int char_index, LinkTarget* target);
373 // Calculate the locations of any links on the page.
374 void CalculateLinks();
375 // Populates weblinks on the page.
376 void PopulateWebLinks();
377 // Populates annotation links on the page.
378 void PopulateAnnotationLinks();
379 // Calculate the locations of images on the page.
380 void CalculateImages();
381 // Populate annotations like highlight and text field on the page.
382 void PopulateAnnotations();
Daniel Hosseiniane257d962021-04-23 21:18:35383 // Populate `highlights_` with `annot`.
Mansi Awasthi0850d412020-06-22 16:59:43384 void PopulateHighlight(FPDF_ANNOTATION annot);
Daniel Hosseiniane257d962021-04-23 21:18:35385 // Populate `text_fields_` with `annot`.
Mansi Awasthi0850d412020-06-22 16:59:43386 void PopulateTextField(FPDF_ANNOTATION annot);
Daniel Hosseiniane257d962021-04-23 21:18:35387 // Populate `choice_fields_` with `annot`.
Mansi Awasthib62d1d12020-06-23 01:34:34388 void PopulateChoiceField(FPDF_ANNOTATION annot);
Daniel Hosseiniane257d962021-04-23 21:18:35389 // Populate `buttons_` with `annot`.
Mansi Awasthi98c39cd2020-07-01 15:15:36390 void PopulateButton(FPDF_ANNOTATION annot);
391 // Populate form fields like text field, choice field and button on the page.
Mansi Awasthicfc83f82020-06-22 17:37:52392 void PopulateFormField(FPDF_ANNOTATION annot);
Mansi Awasthi0850d412020-06-22 16:59:43393 // Returns link type and fills target associated with a destination. Returns
394 // NONSELECTABLE_AREA if detection failed.
395 Area GetDestinationTarget(FPDF_DEST destination, LinkTarget* target);
396 // Returns link type and fills target associated with a URI action. Returns
397 // NONSELECTABLE_AREA if detection failed.
398 Area GetURITarget(FPDF_ACTION uri_action, LinkTarget* target) const;
399 // Calculates the set of character indices on which text runs need to be
400 // broken for page objects such as links and images.
401 void CalculatePageObjectTextRunBreaks();
Mansi Awasthi0850d412020-06-22 16:59:43402
403 // Key : Marked content id for the image element as specified in the
404 // struct tree.
Daniel Hosseiniane257d962021-04-23 21:18:35405 // Value : Index of image in the `images_` vector.
Mansi Awasthi0850d412020-06-22 16:59:43406 using MarkedContentIdToImageMap = std::map<int, size_t>;
407 // Traverses the entire struct tree of the page recursively and extracts the
408 // alt text from struct tree elements corresponding to the marked content IDs
Daniel Hosseiniane257d962021-04-23 21:18:35409 // present in `marked_content_id_image_map`.
Mansi Awasthi0850d412020-06-22 16:59:43410 void PopulateImageAltText(
411 const MarkedContentIdToImageMap& marked_content_id_image_map);
412 // Traverses a struct element and its sub-tree recursively and extracts the
413 // alt text from struct elements corresponding to the marked content IDs
Daniel Hosseiniane257d962021-04-23 21:18:35414 // present in `marked_content_id_image_map`. Uses `visited_elements` to guard
Mansi Awasthi0850d412020-06-22 16:59:43415 // against malformed struct trees.
416 void PopulateImageAltTextForStructElement(
417 const MarkedContentIdToImageMap& marked_content_id_image_map,
418 FPDF_STRUCTELEMENT current_element,
419 std::set<FPDF_STRUCTELEMENT>* visited_elements);
Mansi Awasthicfc83f82020-06-22 17:37:52420 bool PopulateFormFieldProperties(FPDF_ANNOTATION annot,
421 FormField* form_field);
Daniel Hosseiniane257d962021-04-23 21:18:35422 // Generates and sends the thumbnail using `send_callback`.
Daniel Hosseinian6ed110f2020-09-25 21:43:43423 void GenerateAndSendThumbnail(float device_pixel_ratio,
424 SendThumbnailCallback send_callback);
Ankit Kumar9eafc3a2020-04-24 22:23:06425
Keishi Hattori0e45c022021-11-27 09:25:52426 raw_ptr<PDFiumEngine> engine_;
Tom Sepezb0048f12018-05-14 22:56:47427 ScopedFPDFPage page_;
428 ScopedFPDFTextPage text_page_;
jam@chromium.org1b1e9eff2014-05-20 01:56:40429 int index_;
Artem Stryginfd53f2f2018-07-13 13:21:05430 int preventing_unload_count_ = 0;
Ankit Kumar 🌪️8cd111c2020-08-22 00:02:46431 gfx::Rect rect_;
Tom Sepezb0048f12018-05-14 22:56:47432 bool calculated_links_ = false;
jam@chromium.org1b1e9eff2014-05-20 01:56:40433 std::vector<Link> links_;
Pratish Kumar7e49da292019-08-21 19:10:30434 bool calculated_images_ = false;
435 std::vector<Image> images_;
Mansi Awasthid6afb932020-02-27 13:02:39436 bool calculated_annotations_ = false;
Kalpak Tapas6eb2e8d2019-11-22 10:49:59437 std::vector<Highlight> highlights_;
Mansi Awasthid6afb932020-02-27 13:02:39438 std::vector<TextField> text_fields_;
Mansi Awasthib62d1d12020-06-23 01:34:34439 std::vector<ChoiceField> choice_fields_;
Mansi Awasthi98c39cd2020-07-01 15:15:36440 std::vector<Button> buttons_;
Pratish Kumar860e8832019-08-29 00:13:56441 bool calculated_page_object_text_run_breaks_ = false;
442 // The set of character indices on which text runs need to be broken for page
443 // objects.
444 std::set<int> page_object_text_run_breaks_;
Daniel Hosseinian6ed110f2020-09-25 21:43:43445 base::OnceClosure thumbnail_callback_;
jam@chromium.org1b1e9eff2014-05-20 01:56:40446 bool available_;
jam@chromium.org1b1e9eff2014-05-20 01:56:40447};
448
K Moon9a62bf42019-08-07 20:05:36449// Converts page orientations to the PDFium equivalents, as defined by
450// FPDF_RenderPage().
Lei Zhang38b99bcf2022-02-17 20:27:55451constexpr int ToPDFiumRotation(PageOrientation orientation) {
452 // Could use static_cast<int>(orientation), but using an exhaustive switch
453 // will trigger an error if we ever change the definition of
454 // `PageOrientation`.
455 switch (orientation) {
456 case PageOrientation::kOriginal:
457 return 0;
458 case PageOrientation::kClockwise90:
459 return 1;
460 case PageOrientation::kClockwise180:
461 return 2;
462 case PageOrientation::kClockwise270:
463 return 3;
464 }
465}
K Moon9a62bf42019-08-07 20:05:36466
Ankit Kumar 🌪️221befc2020-01-30 07:13:21467constexpr uint32_t MakeARGB(unsigned int a,
468 unsigned int r,
469 unsigned int g,
470 unsigned int b) {
471 return (a << 24) | (r << 16) | (g << 8) | b;
472}
473
jam@chromium.org1b1e9eff2014-05-20 01:56:40474} // namespace chrome_pdf
475
476#endif // PDF_PDFIUM_PDFIUM_PAGE_H_