1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117

pdf / accessibility.cc [blame]

// Copyright 2019 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "pdf/accessibility.h"

#include <algorithm>
#include <utility>
#include <vector>

#include "base/check_op.h"
#include "base/numerics/safe_math.h"
#include "pdf/accessibility_helper.h"
#include "pdf/accessibility_structs.h"
#include "pdf/pdfium/pdfium_engine.h"
#include "ui/gfx/geometry/rect_f.h"

namespace chrome_pdf {

namespace {

AccessibilityFormFieldInfo GetAccessibilityFormFieldInfo(
    PDFiumPage* page,
    uint32_t text_run_count) {
  AccessibilityFormFieldInfo form_field_info;
  form_field_info.text_fields = page->GetTextFieldInfo(text_run_count);
  return form_field_info;
}

}  // namespace

void GetAccessibilityInfo(PDFiumEngine* engine,
                          int32_t page_index,
                          AccessibilityPageInfo& page_info,
                          std::vector<AccessibilityTextRunInfo>& text_runs,
                          std::vector<AccessibilityCharInfo>& chars,
                          AccessibilityPageObjects& page_objects) {
  PDFiumPage* page = engine->GetPage(page_index);
  CHECK(page);

  const int raw_char_count = page->GetCharCount();

  // Treat a char count of -1 (error) as 0 (an empty page), since
  // other pages might have valid content.
  const uint32_t char_count = std::max<uint32_t>(raw_char_count, 0);

  page_info.page_index = page_index;
  page_info.bounds = page->rect();
  page_info.char_count = char_count;
  page_info.is_searchified = page->IsPageSearchified();

  chars.resize(page_info.char_count);
  for (uint32_t i = 0; i < char_count; ++i) {
    chars[i].unicode_character = page->GetCharUnicode(i);
  }

  uint32_t char_index = 0;
  while (char_index < char_count) {
    std::optional<AccessibilityTextRunInfo> text_run_info_result =
        page->GetTextRunInfo(char_index);
    CHECK(text_run_info_result.has_value());
    const auto& text_run_info = text_run_info_result.value();
    uint32_t text_run_end = char_index + text_run_info.len;
    CHECK_LE(text_run_end, char_count);
    text_runs.push_back(text_run_info);

    // We need to provide enough information to draw a bounding box
    // around any arbitrary text range, but the bounding boxes of characters
    // we get from PDFium don't necessarily "line up".
    // Example for LTR text direction: walk through the
    // characters in each text run and let the width of each character be
    // the difference between the x coordinate of one character and the
    // x coordinate of the next. The rest of the bounds of each character
    // can be computed from the bounds of the text run.
    // The same idea is used for RTL, TTB and BTT text direction.
    gfx::RectF char_bounds = page->GetCharBounds(char_index);
    for (uint32_t i = char_index; i < text_run_end - 1; i++) {
      CHECK_LT(i + 1, char_count);
      gfx::RectF next_char_bounds = page->GetCharBounds(i + 1);
      double& char_width = chars[i].char_width;
      switch (text_run_info.direction) {
        case AccessibilityTextDirection::kNone:
        case AccessibilityTextDirection::kLeftToRight:
          char_width = next_char_bounds.x() - char_bounds.x();
          break;
        case AccessibilityTextDirection::kTopToBottom:
          char_width = next_char_bounds.y() - char_bounds.y();
          break;
        case AccessibilityTextDirection::kRightToLeft:
          char_width = char_bounds.right() - next_char_bounds.right();
          break;
        case AccessibilityTextDirection::kBottomToTop:
          char_width = char_bounds.bottom() - next_char_bounds.bottom();
          break;
      }
      char_bounds = next_char_bounds;
    }
    double& char_width = chars[text_run_end - 1].char_width;
    if (text_run_info.direction == AccessibilityTextDirection::kBottomToTop ||
        text_run_info.direction == AccessibilityTextDirection::kTopToBottom) {
      char_width = char_bounds.height();
    } else {
      char_width = char_bounds.width();
    }

    char_index += text_run_info.len;
  }

  page_info.text_run_count = text_runs.size();
  page_objects.links = page->GetLinkInfo(text_runs);
  page_objects.images = page->GetImageInfo(page_info.text_run_count);
  page_objects.highlights = page->GetHighlightInfo(text_runs);
  page_objects.form_fields =
      GetAccessibilityFormFieldInfo(page, page_info.text_run_count);
}

}  // namespace chrome_pdf