rvargas@google.com | 90509cb | 2011-03-25 18:46:38 | [diff] [blame] | 1 | // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
pkasting@chromium.org | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
brettw@chromium.org | a3f72189 | 2013-02-07 03:59:06 | [diff] [blame] | 5 | #ifndef BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ |
| 6 | #define BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ |
pkasting@chromium.org | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 7 | |
avi | 84f37e1 | 2015-12-25 09:31:42 | [diff] [blame] | 8 | #include <stddef.h> |
| 9 | |
pkasting@chromium.org | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 10 | #include <string> |
mrossetti@chromium.org | 421de2a | 2011-04-13 18:43:05 | [diff] [blame] | 11 | #include <vector> |
pkasting@chromium.org | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 12 | |
darin@chromium.org | 0bea725 | 2011-08-05 15:34:00 | [diff] [blame] | 13 | #include "base/base_export.h" |
avi@chromium.org | c851cfd | 2013-06-10 20:11:14 | [diff] [blame] | 14 | #include "base/strings/string16.h" |
tfarina@chromium.org | eb62f726 | 2013-03-30 14:29:00 | [diff] [blame] | 15 | #include "base/strings/string_piece.h" |
pkasting@chromium.org | b9f9383 | 2009-11-13 19:27:48 | [diff] [blame] | 16 | |
brettw@chromium.org | a3f72189 | 2013-02-07 03:59:06 | [diff] [blame] | 17 | namespace base { |
| 18 | |
mpearson@chromium.org | a97376e | 2014-04-18 20:54:44 | [diff] [blame] | 19 | // A helper class and associated data structures to adjust offsets into a |
| 20 | // string in response to various adjustments one might do to that string |
| 21 | // (e.g., eliminating a range). For details on offsets, see the comments by |
| 22 | // the AdjustOffsets() function below. |
| 23 | class BASE_EXPORT OffsetAdjuster { |
| 24 | public: |
| 25 | struct BASE_EXPORT Adjustment { |
| 26 | Adjustment(size_t original_offset, |
| 27 | size_t original_length, |
| 28 | size_t output_length); |
| 29 | |
| 30 | size_t original_offset; |
| 31 | size_t original_length; |
| 32 | size_t output_length; |
| 33 | }; |
| 34 | typedef std::vector<Adjustment> Adjustments; |
| 35 | |
| 36 | // Adjusts all offsets in |offsets_for_adjustment| to reflect the adjustments |
tommycli | 7a4241c | 2017-07-13 01:37:32 | [diff] [blame] | 37 | // recorded in |adjustments|. Adjusted offsets greater than |limit| will be |
| 38 | // set to string16::npos. |
mpearson@chromium.org | a97376e | 2014-04-18 20:54:44 | [diff] [blame] | 39 | // |
| 40 | // Offsets represents insertion/selection points between characters: if |src| |
| 41 | // is "abcd", then 0 is before 'a', 2 is between 'b' and 'c', and 4 is at the |
| 42 | // end of the string. Valid input offsets range from 0 to |src_len|. On |
| 43 | // exit, each offset will have been modified to point at the same logical |
| 44 | // position in the output string. If an offset cannot be successfully |
| 45 | // adjusted (e.g., because it points into the middle of a multibyte sequence), |
| 46 | // it will be set to string16::npos. |
| 47 | static void AdjustOffsets(const Adjustments& adjustments, |
tommycli | 7a4241c | 2017-07-13 01:37:32 | [diff] [blame] | 48 | std::vector<size_t>* offsets_for_adjustment, |
| 49 | size_t limit = string16::npos); |
mpearson@chromium.org | a97376e | 2014-04-18 20:54:44 | [diff] [blame] | 50 | |
| 51 | // Adjusts the single |offset| to reflect the adjustments recorded in |
| 52 | // |adjustments|. |
| 53 | static void AdjustOffset(const Adjustments& adjustments, |
tommycli | 7a4241c | 2017-07-13 01:37:32 | [diff] [blame] | 54 | size_t* offset, |
| 55 | size_t limit = string16::npos); |
mpearson@chromium.org | a97376e | 2014-04-18 20:54:44 | [diff] [blame] | 56 | |
mpearson@chromium.org | 529d4b5 | 2014-04-28 19:37:23 | [diff] [blame] | 57 | // Adjusts all offsets in |offsets_for_unadjustment| to reflect the reverse |
| 58 | // of the adjustments recorded in |adjustments|. In other words, the offsets |
| 59 | // provided represent offsets into an adjusted string and the caller wants |
| 60 | // to know the offsets they correspond to in the original string. If an |
| 61 | // offset cannot be successfully unadjusted (e.g., because it points into |
| 62 | // the middle of a multibyte sequence), it will be set to string16::npos. |
| 63 | static void UnadjustOffsets(const Adjustments& adjustments, |
| 64 | std::vector<size_t>* offsets_for_unadjustment); |
| 65 | |
| 66 | // Adjusts the single |offset| to reflect the reverse of the adjustments |
| 67 | // recorded in |adjustments|. |
| 68 | static void UnadjustOffset(const Adjustments& adjustments, |
| 69 | size_t* offset); |
| 70 | |
mpearson@chromium.org | a97376e | 2014-04-18 20:54:44 | [diff] [blame] | 71 | // Combines two sequential sets of adjustments, storing the combined revised |
| 72 | // adjustments in |adjustments_on_adjusted_string|. That is, suppose a |
| 73 | // string was altered in some way, with the alterations recorded as |
| 74 | // adjustments in |first_adjustments|. Then suppose the resulting string is |
| 75 | // further altered, with the alterations recorded as adjustments scored in |
| 76 | // |adjustments_on_adjusted_string|, with the offsets recorded in these |
| 77 | // adjustments being with respect to the intermediate string. This function |
| 78 | // combines the two sets of adjustments into one, storing the result in |
| 79 | // |adjustments_on_adjusted_string|, whose offsets are correct with respect |
| 80 | // to the original string. |
| 81 | // |
| 82 | // Assumes both parameters are sorted by increasing offset. |
| 83 | // |
| 84 | // WARNING: Only supports |first_adjustments| that involve collapsing ranges |
| 85 | // of text, not expanding ranges. |
| 86 | static void MergeSequentialAdjustments( |
| 87 | const Adjustments& first_adjustments, |
| 88 | Adjustments* adjustments_on_adjusted_string); |
| 89 | }; |
| 90 | |
| 91 | // Like the conversions in utf_string_conversions.h, but also fills in an |
| 92 | // |adjustments| parameter that reflects the alterations done to the string. |
| 93 | // It may be NULL. |
| 94 | BASE_EXPORT bool UTF8ToUTF16WithAdjustments( |
mrossetti@chromium.org | 421de2a | 2011-04-13 18:43:05 | [diff] [blame] | 95 | const char* src, |
| 96 | size_t src_len, |
pkasting@chromium.org | 04866c4 | 2011-05-03 20:03:50 | [diff] [blame] | 97 | string16* output, |
mpearson@chromium.org | a97376e | 2014-04-18 20:54:44 | [diff] [blame] | 98 | base::OffsetAdjuster::Adjustments* adjustments); |
| 99 | BASE_EXPORT string16 UTF8ToUTF16WithAdjustments( |
| 100 | const base::StringPiece& utf8, |
| 101 | base::OffsetAdjuster::Adjustments* adjustments); |
| 102 | // As above, but instead internally examines the adjustments and applies them |
tommycli | 7a4241c | 2017-07-13 01:37:32 | [diff] [blame] | 103 | // to |offsets_for_adjustment|. Input offsets greater than the length of the |
| 104 | // input string will be set to string16::npos. See comments by AdjustOffsets(). |
darin@chromium.org | 0bea725 | 2011-08-05 15:34:00 | [diff] [blame] | 105 | BASE_EXPORT string16 UTF8ToUTF16AndAdjustOffsets( |
mrossetti@chromium.org | 421de2a | 2011-04-13 18:43:05 | [diff] [blame] | 106 | const base::StringPiece& utf8, |
| 107 | std::vector<size_t>* offsets_for_adjustment); |
kinaba@chromium.org | cbf35e17 | 2011-09-08 02:18:10 | [diff] [blame] | 108 | BASE_EXPORT std::string UTF16ToUTF8AndAdjustOffsets( |
| 109 | const base::StringPiece16& utf16, |
| 110 | std::vector<size_t>* offsets_for_adjustment); |
| 111 | |
brettw@chromium.org | a3f72189 | 2013-02-07 03:59:06 | [diff] [blame] | 112 | } // namespace base |
| 113 | |
| 114 | #endif // BASE_STRINGS_UTF_OFFSET_STRING_CONVERSIONS_H_ |