[go: nahoru, domu]

blob: f7419551dad271c75380dfa7dbf88190f5061800 [file] [log] [blame]
rvargas@google.com90509cb2011-03-25 18:46:381// Copyright (c) 2011 The Chromium Authors. All rights reserved.
pkasting@chromium.orgb9f93832009-11-13 19:27:482// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
brettw@chromium.orga3f721892013-02-07 03:59:065#ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
6#define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_
pkasting@chromium.orgb9f93832009-11-13 19:27:487
avi84f37e12015-12-25 09:31:428#include <stddef.h>
9
pkasting@chromium.orgb9f93832009-11-13 19:27:4810#include <string>
mrossetti@chromium.org421de2a2011-04-13 18:43:0511#include <vector>
pkasting@chromium.orgb9f93832009-11-13 19:27:4812
darin@chromium.org0bea7252011-08-05 15:34:0013#include "base/base_export.h"
avi@chromium.orgc851cfd2013-06-10 20:11:1414#include "base/strings/string16.h"
tfarina@chromium.orgeb62f7262013-03-30 14:29:0015#include "base/strings/string_piece.h"
pkasting@chromium.orgb9f93832009-11-13 19:27:4816
brettw@chromium.orga3f721892013-02-07 03:59:0617namespace base {
18
mpearson@chromium.orga97376e2014-04-18 20:54:4419// A helper class and associated data structures to adjust offsets into a
20// string in response to various adjustments one might do to that string
21// (e.g., eliminating a range). For details on offsets, see the comments by
22// the AdjustOffsets() function below.
23class BASE_EXPORT OffsetAdjuster {
24 public:
25 struct BASE_EXPORT Adjustment {
26 Adjustment(size_t original_offset,
27 size_t original_length,
28 size_t output_length);
29
30 size_t original_offset;
31 size_t original_length;
32 size_t output_length;
33 };
34 typedef std::vector<Adjustment> Adjustments;
35
36 // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments
tommycli7a4241c2017-07-13 01:37:3237 // recorded in |adjustments|. Adjusted offsets greater than |limit| will be
38 // set to string16::npos.
mpearson@chromium.orga97376e2014-04-18 20:54:4439 //
40 // Offsets represents insertion/selection points between characters: if |src|
41 // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the
42 // end of the string. Valid input offsets range from 0 to |src_len|. On
43 // exit, each offset will have been modified to point at the same logical
44 // position in the output string. If an offset cannot be successfully
45 // adjusted (e.g., because it points into the middle of a multibyte sequence),
46 // it will be set to string16::npos.
47 static void AdjustOffsets(const Adjustments& adjustments,
tommycli7a4241c2017-07-13 01:37:3248 std::vector<size_t>* offsets_for_adjustment,
49 size_t limit = string16::npos);
mpearson@chromium.orga97376e2014-04-18 20:54:4450
51 // Adjusts the single |offset| to reflect the adjustments recorded in
52 // |adjustments|.
53 static void AdjustOffset(const Adjustments& adjustments,
tommycli7a4241c2017-07-13 01:37:3254 size_t* offset,
55 size_t limit = string16::npos);
mpearson@chromium.orga97376e2014-04-18 20:54:4456
mpearson@chromium.org529d4b52014-04-28 19:37:2357 // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse
58 // of the adjustments recorded in |adjustments|. In other words, the offsets
59 // provided represent offsets into an adjusted string and the caller wants
60 // to know the offsets they correspond to in the original string. If an
61 // offset cannot be successfully unadjusted (e.g., because it points into
62 // the middle of a multibyte sequence), it will be set to string16::npos.
63 static void UnadjustOffsets(const Adjustments& adjustments,
64 std::vector<size_t>* offsets_for_unadjustment);
65
66 // Adjusts the single |offset| to reflect the reverse of the adjustments
67 // recorded in |adjustments|.
68 static void UnadjustOffset(const Adjustments& adjustments,
69 size_t* offset);
70
mpearson@chromium.orga97376e2014-04-18 20:54:4471 // Combines two sequential sets of adjustments, storing the combined revised
72 // adjustments in |adjustments_on_adjusted_string|. That is, suppose a
73 // string was altered in some way, with the alterations recorded as
74 // adjustments in |first_adjustments|. Then suppose the resulting string is
75 // further altered, with the alterations recorded as adjustments scored in
76 // |adjustments_on_adjusted_string|, with the offsets recorded in these
77 // adjustments being with respect to the intermediate string. This function
78 // combines the two sets of adjustments into one, storing the result in
79 // |adjustments_on_adjusted_string|, whose offsets are correct with respect
80 // to the original string.
81 //
82 // Assumes both parameters are sorted by increasing offset.
83 //
84 // WARNING: Only supports |first_adjustments| that involve collapsing ranges
85 // of text, not expanding ranges.
86 static void MergeSequentialAdjustments(
87 const Adjustments& first_adjustments,
88 Adjustments* adjustments_on_adjusted_string);
89};
90
91// Like the conversions in utf_string_conversions.h, but also fills in an
92// |adjustments| parameter that reflects the alterations done to the string.
93// It may be NULL.
94BASE_EXPORT bool UTF8ToUTF16WithAdjustments(
mrossetti@chromium.org421de2a2011-04-13 18:43:0595 const char* src,
96 size_t src_len,
pkasting@chromium.org04866c42011-05-03 20:03:5097 string16* output,
mpearson@chromium.orga97376e2014-04-18 20:54:4498 base::OffsetAdjuster::Adjustments* adjustments);
99BASE_EXPORT string16 UTF8ToUTF16WithAdjustments(
100 const base::StringPiece& utf8,
101 base::OffsetAdjuster::Adjustments* adjustments);
102// As above, but instead internally examines the adjustments and applies them
tommycli7a4241c2017-07-13 01:37:32103// to |offsets_for_adjustment|. Input offsets greater than the length of the
104// input string will be set to string16::npos. See comments by AdjustOffsets().
darin@chromium.org0bea7252011-08-05 15:34:00105BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets(
mrossetti@chromium.org421de2a2011-04-13 18:43:05106 const base::StringPiece& utf8,
107 std::vector<size_t>* offsets_for_adjustment);
kinaba@chromium.orgcbf35e172011-09-08 02:18:10108BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets(
109 const base::StringPiece16& utf16,
110 std::vector<size_t>* offsets_for_adjustment);
111
brettw@chromium.orga3f721892013-02-07 03:59:06112} // namespace base
113
114#endif // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_