[go: nahoru, domu]

blob: 4a98592012532f7ba6654b1437ec38e293430f8f [file] [log] [blame]
Avi Drissman3e1a26c2022-09-15 20:26:031// Copyright 2013 The Chromium Authors
tfarina@chromium.org51bcc5d2013-04-24 01:41:372// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
brettw@chromium.orge7bba5f2013-04-10 20:10:524
tfarina@chromium.org318076b2013-04-18 21:19:455#include "url/url_canon_ip.h"
brettw@chromium.orge7bba5f2013-04-10 20:10:526
tfarina65d26e02015-05-11 08:22:417#include <stdint.h>
brettw@chromium.orge7bba5f2013-04-10 20:10:528#include <stdlib.h>
Matt Menkef21b7242021-08-25 20:43:249
tfarina5595f3b2015-05-07 22:06:3210#include <limits>
brettw@chromium.orge7bba5f2013-04-10 20:10:5211
Hans Wennborg0e223682020-04-27 21:51:2912#include "base/check.h"
tfarina@chromium.org318076b2013-04-18 21:19:4513#include "url/url_canon_internal.h"
Jiacheng Guoed519e442023-02-17 05:46:2314#include "url/url_features.h"
brettw@chromium.orge7bba5f2013-04-10 20:10:5215
vitalybuka@chromium.org0318f922014-04-22 00:09:2316namespace url {
brettw@chromium.orge7bba5f2013-04-10 20:10:5217
18namespace {
19
20// Converts one of the character types that represent a numerical base to the
21// corresponding base.
22int BaseForType(SharedCharTypes type) {
23 switch (type) {
24 case CHAR_HEX:
25 return 16;
26 case CHAR_DEC:
27 return 10;
28 case CHAR_OCT:
29 return 8;
30 default:
31 return 0;
32 }
33}
34
brettw@chromium.orge7bba5f2013-04-10 20:10:5235// Converts an IPv4 component to a 32-bit number, while checking for overflow.
36//
37// Possible return values:
38// - IPV4 - The number was valid, and did not overflow.
39// - BROKEN - The input was numeric, but too large for a 32-bit field.
40// - NEUTRAL - Input was not numeric.
41//
Matt Menkef21b7242021-08-25 20:43:2442// The input is assumed to be ASCII. The components are assumed to be non-empty.
brettw@chromium.orge7bba5f2013-04-10 20:10:5243template<typename CHAR>
vitalybuka@chromium.org0318f922014-04-22 00:09:2344CanonHostInfo::Family IPv4ComponentToNumber(const CHAR* spec,
45 const Component& component,
tfarina65d26e02015-05-11 08:22:4146 uint32_t* number) {
Matt Menkef21b7242021-08-25 20:43:2447 // Empty components are considered non-numeric.
Tom Sepez40fbf43e2022-11-15 00:11:0348 if (component.is_empty())
Matt Menkef21b7242021-08-25 20:43:2449 return CanonHostInfo::NEUTRAL;
50
brettw@chromium.orge7bba5f2013-04-10 20:10:5251 // Figure out the base
52 SharedCharTypes base;
53 int base_prefix_len = 0; // Size of the prefix for this base.
54 if (spec[component.begin] == '0') {
55 // Either hex or dec, or a standalone zero.
56 if (component.len == 1) {
57 base = CHAR_DEC;
58 } else if (spec[component.begin + 1] == 'X' ||
59 spec[component.begin + 1] == 'x') {
60 base = CHAR_HEX;
61 base_prefix_len = 2;
62 } else {
63 base = CHAR_OCT;
64 base_prefix_len = 1;
65 }
66 } else {
67 base = CHAR_DEC;
68 }
69
70 // Extend the prefix to consume all leading zeros.
71 while (base_prefix_len < component.len &&
72 spec[component.begin + base_prefix_len] == '0')
73 base_prefix_len++;
74
75 // Put the component, minus any base prefix, into a NULL-terminated buffer so
qyearsley2bc727d2015-08-14 20:17:1576 // we can call the standard library. Because leading zeros have already been
brettw@chromium.orge7bba5f2013-04-10 20:10:5277 // discarded, filling the entire buffer is guaranteed to trigger the 32-bit
78 // overflow check.
79 const int kMaxComponentLen = 16;
80 char buf[kMaxComponentLen + 1]; // digits + '\0'
81 int dest_i = 0;
Matt Menkef21b7242021-08-25 20:43:2482 bool may_be_broken_octal = false;
brettw@chromium.orge7bba5f2013-04-10 20:10:5283 for (int i = component.begin + base_prefix_len; i < component.end(); i++) {
Matt Menkef21b7242021-08-25 20:43:2484 if (spec[i] >= 0x80)
85 return CanonHostInfo::NEUTRAL;
86
brettw@chromium.orge7bba5f2013-04-10 20:10:5287 // We know the input is 7-bit, so convert to narrow (if this is the wide
88 // version of the template) by casting.
89 char input = static_cast<char>(spec[i]);
90
91 // Validate that this character is OK for the given base.
Matt Menkef21b7242021-08-25 20:43:2492 if (!IsCharOfType(input, base)) {
93 if (IsCharOfType(input, CHAR_DEC)) {
94 // Entirely numeric components with leading 0s that aren't octal are
95 // considered broken.
96 may_be_broken_octal = true;
97 } else {
98 return CanonHostInfo::NEUTRAL;
99 }
100 }
brettw@chromium.orge7bba5f2013-04-10 20:10:52101
qyearsley2bc727d2015-08-14 20:17:15102 // Fill the buffer, if there's space remaining. This check allows us to
brettw@chromium.orge7bba5f2013-04-10 20:10:52103 // verify that all characters are numeric, even those that don't fit.
104 if (dest_i < kMaxComponentLen)
105 buf[dest_i++] = input;
106 }
107
Matt Menkef21b7242021-08-25 20:43:24108 if (may_be_broken_octal)
109 return CanonHostInfo::BROKEN;
110
brettw@chromium.orge7bba5f2013-04-10 20:10:52111 buf[dest_i] = '\0';
112
113 // Use the 64-bit strtoi so we get a big number (no hex, decimal, or octal
114 // number can overflow a 64-bit number in <= 16 characters).
tfarina65d26e02015-05-11 08:22:41115 uint64_t num = _strtoui64(buf, NULL, BaseForType(base));
brettw@chromium.orge7bba5f2013-04-10 20:10:52116
117 // Check for 32-bit overflow.
tfarina5595f3b2015-05-07 22:06:32118 if (num > std::numeric_limits<uint32_t>::max())
brettw@chromium.orge7bba5f2013-04-10 20:10:52119 return CanonHostInfo::BROKEN;
120
qyearsley2bc727d2015-08-14 20:17:15121 // No overflow. Success!
tfarina65d26e02015-05-11 08:22:41122 *number = static_cast<uint32_t>(num);
brettw@chromium.orge7bba5f2013-04-10 20:10:52123 return CanonHostInfo::IPV4;
124}
125
126// See declaration of IPv4AddressToNumber for documentation.
Matt Menkef21b7242021-08-25 20:43:24127template <typename CHAR, typename UCHAR>
brettw@chromium.orge7bba5f2013-04-10 20:10:52128CanonHostInfo::Family DoIPv4AddressToNumber(const CHAR* spec,
Matt Menkef21b7242021-08-25 20:43:24129 Component host,
brettw@chromium.orge7bba5f2013-04-10 20:10:52130 unsigned char address[4],
131 int* num_ipv4_components) {
Matt Menkef21b7242021-08-25 20:43:24132 // Ignore terminal dot, if present.
133 if (host.is_nonempty() && spec[host.end() - 1] == '.')
134 --host.len;
135
136 // Do nothing if empty.
Tom Sepez40fbf43e2022-11-15 00:11:03137 if (host.is_empty())
brettw@chromium.orge7bba5f2013-04-10 20:10:52138 return CanonHostInfo::NEUTRAL;
139
Matt Menkef21b7242021-08-25 20:43:24140 // Read component values. The first `existing_components` of them are
141 // populated front to back, with the first one corresponding to the last
142 // component, which allows for early exit if the last component isn't a
143 // number.
tfarina65d26e02015-05-11 08:22:41144 uint32_t component_values[4];
brettw@chromium.orge7bba5f2013-04-10 20:10:52145 int existing_components = 0;
146
Matt Menkef21b7242021-08-25 20:43:24147 int current_component_end = host.end();
148 int current_position = current_component_end;
149 while (true) {
150 // If this is not the first character of a component, go to the next
151 // component.
152 if (current_position != host.begin && spec[current_position - 1] != '.') {
153 --current_position;
brettw@chromium.orge7bba5f2013-04-10 20:10:52154 continue;
brettw@chromium.orge7bba5f2013-04-10 20:10:52155 }
156
Matt Menkef21b7242021-08-25 20:43:24157 CanonHostInfo::Family family = IPv4ComponentToNumber(
158 spec,
159 Component(current_position, current_component_end - current_position),
160 &component_values[existing_components]);
161
162 // If `family` is NEUTRAL and this is the last component, return NEUTRAL. If
163 // `family` is NEUTRAL but not the last component, this is considered a
164 // BROKEN IPv4 address, as opposed to a non-IPv4 hostname.
165 if (family == CanonHostInfo::NEUTRAL && existing_components == 0)
166 return CanonHostInfo::NEUTRAL;
167
168 if (family != CanonHostInfo::IPV4)
169 return CanonHostInfo::BROKEN;
170
171 ++existing_components;
172
173 // If this is the final component, nothing else to do.
174 if (current_position == host.begin)
175 break;
176
177 // If there are more than 4 components, fail.
178 if (existing_components == 4)
179 return CanonHostInfo::BROKEN;
180
181 current_component_end = current_position - 1;
182 --current_position;
brettw@chromium.orge7bba5f2013-04-10 20:10:52183 }
184
Matt Menkef21b7242021-08-25 20:43:24185 // Use `component_values` to fill out the 4-component IP address.
brettw@chromium.orge7bba5f2013-04-10 20:10:52186
187 // First, process all components but the last, while making sure each fits
188 // within an 8-bit field.
Matt Menkef21b7242021-08-25 20:43:24189 for (int i = existing_components - 1; i > 0; i--) {
tfarina5595f3b2015-05-07 22:06:32190 if (component_values[i] > std::numeric_limits<uint8_t>::max())
brettw@chromium.orge7bba5f2013-04-10 20:10:52191 return CanonHostInfo::BROKEN;
Matt Menkef21b7242021-08-25 20:43:24192 address[existing_components - i - 1] =
193 static_cast<unsigned char>(component_values[i]);
brettw@chromium.orge7bba5f2013-04-10 20:10:52194 }
195
Matt Menkef21b7242021-08-25 20:43:24196 uint32_t last_value = component_values[0];
brettw@chromium.orge7bba5f2013-04-10 20:10:52197 for (int i = 3; i >= existing_components - 1; i--) {
198 address[i] = static_cast<unsigned char>(last_value);
199 last_value >>= 8;
200 }
201
202 // If the last component has residual bits, report overflow.
203 if (last_value != 0)
204 return CanonHostInfo::BROKEN;
205
206 // Tell the caller how many components we saw.
207 *num_ipv4_components = existing_components;
208
209 // Success!
210 return CanonHostInfo::IPV4;
211}
212
213// Return true if we've made a final IPV4/BROKEN decision, false if the result
214// is NEUTRAL, and we could use a second opinion.
215template<typename CHAR, typename UCHAR>
216bool DoCanonicalizeIPv4Address(const CHAR* spec,
vitalybuka@chromium.org0318f922014-04-22 00:09:23217 const Component& host,
brettw@chromium.orge7bba5f2013-04-10 20:10:52218 CanonOutput* output,
219 CanonHostInfo* host_info) {
220 host_info->family = IPv4AddressToNumber(
221 spec, host, host_info->address, &host_info->num_ipv4_components);
222
223 switch (host_info->family) {
224 case CanonHostInfo::IPV4:
225 // Definitely an IPv4 address.
226 host_info->out_host.begin = output->length();
227 AppendIPv4Address(host_info->address, output);
228 host_info->out_host.len = output->length() - host_info->out_host.begin;
229 return true;
230 case CanonHostInfo::BROKEN:
231 // Definitely broken.
232 return true;
233 default:
234 // Could be IPv6 or a hostname.
235 return false;
236 }
237}
238
239// Helper class that describes the main components of an IPv6 input string.
240// See the following examples to understand how it breaks up an input string:
241//
242// [Example 1]: input = "[::aa:bb]"
243// ==> num_hex_components = 2
244// ==> hex_components[0] = Component(3,2) "aa"
245// ==> hex_components[1] = Component(6,2) "bb"
246// ==> index_of_contraction = 0
247// ==> ipv4_component = Component(0, -1)
248//
249// [Example 2]: input = "[1:2::3:4:5]"
250// ==> num_hex_components = 5
251// ==> hex_components[0] = Component(1,1) "1"
252// ==> hex_components[1] = Component(3,1) "2"
253// ==> hex_components[2] = Component(6,1) "3"
254// ==> hex_components[3] = Component(8,1) "4"
255// ==> hex_components[4] = Component(10,1) "5"
256// ==> index_of_contraction = 2
257// ==> ipv4_component = Component(0, -1)
258//
259// [Example 3]: input = "[::ffff:192.168.0.1]"
260// ==> num_hex_components = 1
261// ==> hex_components[0] = Component(3,4) "ffff"
262// ==> index_of_contraction = 0
263// ==> ipv4_component = Component(8, 11) "192.168.0.1"
264//
265// [Example 4]: input = "[1::]"
266// ==> num_hex_components = 1
267// ==> hex_components[0] = Component(1,1) "1"
268// ==> index_of_contraction = 1
269// ==> ipv4_component = Component(0, -1)
270//
271// [Example 5]: input = "[::192.168.0.1]"
272// ==> num_hex_components = 0
273// ==> index_of_contraction = 0
274// ==> ipv4_component = Component(8, 11) "192.168.0.1"
275//
276struct IPv6Parsed {
277 // Zero-out the parse information.
278 void reset() {
279 num_hex_components = 0;
280 index_of_contraction = -1;
281 ipv4_component.reset();
282 }
283
284 // There can be up to 8 hex components (colon separated) in the literal.
vitalybuka@chromium.org0318f922014-04-22 00:09:23285 Component hex_components[8];
brettw@chromium.orge7bba5f2013-04-10 20:10:52286
287 // The count of hex components present. Ranges from [0,8].
288 int num_hex_components;
289
290 // The index of the hex component that the "::" contraction precedes, or
291 // -1 if there is no contraction.
292 int index_of_contraction;
293
294 // The range of characters which are an IPv4 literal.
vitalybuka@chromium.org0318f922014-04-22 00:09:23295 Component ipv4_component;
brettw@chromium.orge7bba5f2013-04-10 20:10:52296};
297
298// Parse the IPv6 input string. If parsing succeeded returns true and fills
299// |parsed| with the information. If parsing failed (because the input is
300// invalid) returns false.
301template<typename CHAR, typename UCHAR>
vitalybuka@chromium.org0318f922014-04-22 00:09:23302bool DoParseIPv6(const CHAR* spec, const Component& host, IPv6Parsed* parsed) {
brettw@chromium.orge7bba5f2013-04-10 20:10:52303 // Zero-out the info.
304 parsed->reset();
305
Tom Sepez40fbf43e2022-11-15 00:11:03306 if (host.is_empty())
brettw@chromium.orge7bba5f2013-04-10 20:10:52307 return false;
308
309 // The index for start and end of address range (no brackets).
310 int begin = host.begin;
311 int end = host.end();
312
313 int cur_component_begin = begin; // Start of the current component.
314
315 // Scan through the input, searching for hex components, "::" contractions,
316 // and IPv4 components.
317 for (int i = begin; /* i <= end */; i++) {
318 bool is_colon = spec[i] == ':';
319 bool is_contraction = is_colon && i < end - 1 && spec[i + 1] == ':';
320
321 // We reached the end of the current component if we encounter a colon
322 // (separator between hex components, or start of a contraction), or end of
323 // input.
324 if (is_colon || i == end) {
325 int component_len = i - cur_component_begin;
326
327 // A component should not have more than 4 hex digits.
328 if (component_len > 4)
329 return false;
330
331 // Don't allow empty components.
332 if (component_len == 0) {
333 // The exception is when contractions appear at beginning of the
334 // input or at the end of the input.
335 if (!((is_contraction && i == begin) || (i == end &&
336 parsed->index_of_contraction == parsed->num_hex_components)))
337 return false;
338 }
339
340 // Add the hex component we just found to running list.
341 if (component_len > 0) {
342 // Can't have more than 8 components!
343 if (parsed->num_hex_components >= 8)
344 return false;
345
346 parsed->hex_components[parsed->num_hex_components++] =
vitalybuka@chromium.org0318f922014-04-22 00:09:23347 Component(cur_component_begin, component_len);
brettw@chromium.orge7bba5f2013-04-10 20:10:52348 }
349 }
350
351 if (i == end)
352 break; // Reached the end of the input, DONE.
353
354 // We found a "::" contraction.
355 if (is_contraction) {
356 // There can be at most one contraction in the literal.
357 if (parsed->index_of_contraction != -1)
358 return false;
359 parsed->index_of_contraction = parsed->num_hex_components;
360 ++i; // Consume the colon we peeked.
361 }
362
363 if (is_colon) {
364 // Colons are separators between components, keep track of where the
365 // current component started (after this colon).
366 cur_component_begin = i + 1;
367 } else {
368 if (static_cast<UCHAR>(spec[i]) >= 0x80)
369 return false; // Not ASCII.
370
371 if (!IsHexChar(static_cast<unsigned char>(spec[i]))) {
372 // Regular components are hex numbers. It is also possible for
373 // a component to be an IPv4 address in dotted form.
374 if (IsIPv4Char(static_cast<unsigned char>(spec[i]))) {
375 // Since IPv4 address can only appear at the end, assume the rest
376 // of the string is an IPv4 address. (We will parse this separately
377 // later).
vitalybuka@chromium.org0318f922014-04-22 00:09:23378 parsed->ipv4_component =
379 Component(cur_component_begin, end - cur_component_begin);
brettw@chromium.orge7bba5f2013-04-10 20:10:52380 break;
381 } else {
382 // The character was neither a hex digit, nor an IPv4 character.
383 return false;
384 }
385 }
386 }
387 }
388
389 return true;
390}
391
392// Verifies the parsed IPv6 information, checking that the various components
393// add up to the right number of bits (hex components are 16 bits, while
394// embedded IPv4 formats are 32 bits, and contractions are placeholdes for
395// 16 or more bits). Returns true if sizes match up, false otherwise. On
396// success writes the length of the contraction (if any) to
397// |out_num_bytes_of_contraction|.
398bool CheckIPv6ComponentsSize(const IPv6Parsed& parsed,
399 int* out_num_bytes_of_contraction) {
400 // Each group of four hex digits contributes 16 bits.
401 int num_bytes_without_contraction = parsed.num_hex_components * 2;
402
403 // If an IPv4 address was embedded at the end, it contributes 32 bits.
404 if (parsed.ipv4_component.is_valid())
405 num_bytes_without_contraction += 4;
406
407 // If there was a "::" contraction, its size is going to be:
408 // MAX([16bits], [128bits] - num_bytes_without_contraction).
409 int num_bytes_of_contraction = 0;
410 if (parsed.index_of_contraction != -1) {
411 num_bytes_of_contraction = 16 - num_bytes_without_contraction;
412 if (num_bytes_of_contraction < 2)
413 num_bytes_of_contraction = 2;
414 }
415
416 // Check that the numbers add up.
417 if (num_bytes_without_contraction + num_bytes_of_contraction != 16)
418 return false;
419
420 *out_num_bytes_of_contraction = num_bytes_of_contraction;
421 return true;
422}
423
qyearsley2bc727d2015-08-14 20:17:15424// Converts a hex component into a number. This cannot fail since the caller has
brettw@chromium.orge7bba5f2013-04-10 20:10:52425// already verified that each character in the string was a hex digit, and
426// that there were no more than 4 characters.
tfarina65d26e02015-05-11 08:22:41427template <typename CHAR>
428uint16_t IPv6HexComponentToNumber(const CHAR* spec,
429 const Component& component) {
brettw@chromium.orge7bba5f2013-04-10 20:10:52430 DCHECK(component.len <= 4);
431
432 // Copy the hex string into a C-string.
433 char buf[5];
434 for (int i = 0; i < component.len; ++i)
435 buf[i] = static_cast<char>(spec[component.begin + i]);
436 buf[component.len] = '\0';
437
438 // Convert it to a number (overflow is not possible, since with 4 hex
439 // characters we can at most have a 16 bit number).
tfarina65d26e02015-05-11 08:22:41440 return static_cast<uint16_t>(_strtoui64(buf, NULL, 16));
brettw@chromium.orge7bba5f2013-04-10 20:10:52441}
442
443// Converts an IPv6 address to a 128-bit number (network byte order), returning
444// true on success. False means that the input was not a valid IPv6 address.
445template<typename CHAR, typename UCHAR>
446bool DoIPv6AddressToNumber(const CHAR* spec,
vitalybuka@chromium.org0318f922014-04-22 00:09:23447 const Component& host,
brettw@chromium.orge7bba5f2013-04-10 20:10:52448 unsigned char address[16]) {
449 // Make sure the component is bounded by '[' and ']'.
450 int end = host.end();
Tom Sepez40fbf43e2022-11-15 00:11:03451 if (host.is_empty() || spec[host.begin] != '[' || spec[end - 1] != ']')
brettw@chromium.orge7bba5f2013-04-10 20:10:52452 return false;
453
454 // Exclude the square brackets.
vitalybuka@chromium.org0318f922014-04-22 00:09:23455 Component ipv6_comp(host.begin + 1, host.len - 2);
brettw@chromium.orge7bba5f2013-04-10 20:10:52456
457 // Parse the IPv6 address -- identify where all the colon separated hex
458 // components are, the "::" contraction, and the embedded IPv4 address.
459 IPv6Parsed ipv6_parsed;
460 if (!DoParseIPv6<CHAR, UCHAR>(spec, ipv6_comp, &ipv6_parsed))
461 return false;
462
463 // Do some basic size checks to make sure that the address doesn't
464 // specify more than 128 bits or fewer than 128 bits. This also resolves
465 // how may zero bytes the "::" contraction represents.
466 int num_bytes_of_contraction;
467 if (!CheckIPv6ComponentsSize(ipv6_parsed, &num_bytes_of_contraction))
468 return false;
469
470 int cur_index_in_address = 0;
471
472 // Loop through each hex components, and contraction in order.
473 for (int i = 0; i <= ipv6_parsed.num_hex_components; ++i) {
474 // Append the contraction if it appears before this component.
475 if (i == ipv6_parsed.index_of_contraction) {
476 for (int j = 0; j < num_bytes_of_contraction; ++j)
477 address[cur_index_in_address++] = 0;
478 }
479 // Append the hex component's value.
480 if (i != ipv6_parsed.num_hex_components) {
481 // Get the 16-bit value for this hex component.
tfarina65d26e02015-05-11 08:22:41482 uint16_t number = IPv6HexComponentToNumber<CHAR>(
brettw@chromium.orge7bba5f2013-04-10 20:10:52483 spec, ipv6_parsed.hex_components[i]);
484 // Append to |address|, in network byte order.
485 address[cur_index_in_address++] = (number & 0xFF00) >> 8;
486 address[cur_index_in_address++] = (number & 0x00FF);
487 }
488 }
489
490 // If there was an IPv4 section, convert it into a 32-bit number and append
491 // it to |address|.
492 if (ipv6_parsed.ipv4_component.is_valid()) {
493 // Append the 32-bit number to |address|.
Jiacheng Guoed519e442023-02-17 05:46:23494 int num_ipv4_components = 0;
Jiacheng Guo978179502023-03-07 16:46:31495 // IPv4AddressToNumber will remove the trailing dot from the component.
496 bool trailing_dot = ipv6_parsed.ipv4_component.is_nonempty() &&
497 spec[ipv6_parsed.ipv4_component.end() - 1] == '.';
Jiacheng Guoed519e442023-02-17 05:46:23498 // The URL standard requires the embedded IPv4 address to be concisely
Jiacheng Guo978179502023-03-07 16:46:31499 // composed of 4 parts and disallows terminal dots.
500 // See https://url.spec.whatwg.org/#concept-ipv6-parser
brettw@chromium.orge7bba5f2013-04-10 20:10:52501 if (CanonHostInfo::IPV4 !=
Jiacheng Guoed519e442023-02-17 05:46:23502 IPv4AddressToNumber(spec, ipv6_parsed.ipv4_component,
503 &address[cur_index_in_address],
504 &num_ipv4_components)) {
brettw@chromium.orge7bba5f2013-04-10 20:10:52505 return false;
Jiacheng Guoed519e442023-02-17 05:46:23506 }
Hayato Ito38692512023-10-23 04:35:09507 if ((num_ipv4_components != 4 || trailing_dot)) {
Jiacheng Guoed519e442023-02-17 05:46:23508 return false;
509 }
brettw@chromium.orge7bba5f2013-04-10 20:10:52510 }
511
512 return true;
513}
514
515// Searches for the longest sequence of zeros in |address|, and writes the
516// range into |contraction_range|. The run of zeros must be at least 16 bits,
517// and if there is a tie the first is chosen.
518void ChooseIPv6ContractionRange(const unsigned char address[16],
vitalybuka@chromium.org0318f922014-04-22 00:09:23519 Component* contraction_range) {
brettw@chromium.orge7bba5f2013-04-10 20:10:52520 // The longest run of zeros in |address| seen so far.
vitalybuka@chromium.org0318f922014-04-22 00:09:23521 Component max_range;
brettw@chromium.orge7bba5f2013-04-10 20:10:52522
523 // The current run of zeros in |address| being iterated over.
vitalybuka@chromium.org0318f922014-04-22 00:09:23524 Component cur_range;
brettw@chromium.orge7bba5f2013-04-10 20:10:52525
526 for (int i = 0; i < 16; i += 2) {
527 // Test for 16 bits worth of zero.
528 bool is_zero = (address[i] == 0 && address[i + 1] == 0);
529
530 if (is_zero) {
531 // Add the zero to the current range (or start a new one).
532 if (!cur_range.is_valid())
vitalybuka@chromium.org0318f922014-04-22 00:09:23533 cur_range = Component(i, 0);
brettw@chromium.orge7bba5f2013-04-10 20:10:52534 cur_range.len += 2;
535 }
536
537 if (!is_zero || i == 14) {
538 // Just completed a run of zeros. If the run is greater than 16 bits,
539 // it is a candidate for the contraction.
540 if (cur_range.len > 2 && cur_range.len > max_range.len) {
541 max_range = cur_range;
542 }
543 cur_range.reset();
544 }
545 }
546 *contraction_range = max_range;
547}
548
549// Return true if we've made a final IPV6/BROKEN decision, false if the result
550// is NEUTRAL, and we could use a second opinion.
551template<typename CHAR, typename UCHAR>
552bool DoCanonicalizeIPv6Address(const CHAR* spec,
vitalybuka@chromium.org0318f922014-04-22 00:09:23553 const Component& host,
brettw@chromium.orge7bba5f2013-04-10 20:10:52554 CanonOutput* output,
555 CanonHostInfo* host_info) {
556 // Turn the IP address into a 128 bit number.
557 if (!IPv6AddressToNumber(spec, host, host_info->address)) {
558 // If it's not an IPv6 address, scan for characters that should *only*
559 // exist in an IPv6 address.
560 for (int i = host.begin; i < host.end(); i++) {
561 switch (spec[i]) {
562 case '[':
563 case ']':
564 case ':':
565 host_info->family = CanonHostInfo::BROKEN;
566 return true;
567 }
568 }
569
qyearsley2bc727d2015-08-14 20:17:15570 // No invalid characters. Could still be IPv4 or a hostname.
brettw@chromium.orge7bba5f2013-04-10 20:10:52571 host_info->family = CanonHostInfo::NEUTRAL;
572 return false;
573 }
574
575 host_info->out_host.begin = output->length();
576 output->push_back('[');
577 AppendIPv6Address(host_info->address, output);
578 output->push_back(']');
579 host_info->out_host.len = output->length() - host_info->out_host.begin;
580
581 host_info->family = CanonHostInfo::IPV6;
582 return true;
583}
584
585} // namespace
586
587void AppendIPv4Address(const unsigned char address[4], CanonOutput* output) {
588 for (int i = 0; i < 4; i++) {
589 char str[16];
590 _itoa_s(address[i], str, 10);
591
592 for (int ch = 0; str[ch] != 0; ch++)
593 output->push_back(str[ch]);
594
595 if (i != 3)
596 output->push_back('.');
597 }
598}
599
600void AppendIPv6Address(const unsigned char address[16], CanonOutput* output) {
601 // We will output the address according to the rules in:
602 // http://tools.ietf.org/html/draft-kawamura-ipv6-text-representation-01#section-4
603
604 // Start by finding where to place the "::" contraction (if any).
vitalybuka@chromium.org0318f922014-04-22 00:09:23605 Component contraction_range;
brettw@chromium.orge7bba5f2013-04-10 20:10:52606 ChooseIPv6ContractionRange(address, &contraction_range);
607
608 for (int i = 0; i <= 14;) {
609 // We check 2 bytes at a time, from bytes (0, 1) to (14, 15), inclusive.
610 DCHECK(i % 2 == 0);
611 if (i == contraction_range.begin && contraction_range.len > 0) {
612 // Jump over the contraction.
613 if (i == 0)
614 output->push_back(':');
615 output->push_back(':');
616 i = contraction_range.end();
617 } else {
618 // Consume the next 16 bits from |address|.
619 int x = address[i] << 8 | address[i + 1];
620
621 i += 2;
622
623 // Stringify the 16 bit number (at most requires 4 hex digits).
624 char str[5];
625 _itoa_s(x, str, 16);
626 for (int ch = 0; str[ch] != 0; ++ch)
627 output->push_back(str[ch]);
628
629 // Put a colon after each number, except the last.
630 if (i < 16)
631 output->push_back(':');
632 }
633 }
634}
635
brettw@chromium.orge7bba5f2013-04-10 20:10:52636void CanonicalizeIPAddress(const char* spec,
vitalybuka@chromium.org0318f922014-04-22 00:09:23637 const Component& host,
brettw@chromium.orge7bba5f2013-04-10 20:10:52638 CanonOutput* output,
639 CanonHostInfo* host_info) {
640 if (DoCanonicalizeIPv4Address<char, unsigned char>(
641 spec, host, output, host_info))
642 return;
643 if (DoCanonicalizeIPv6Address<char, unsigned char>(
644 spec, host, output, host_info))
645 return;
646}
647
Jan Wilken Dörrie5aad5c22021-03-08 21:44:12648void CanonicalizeIPAddress(const char16_t* spec,
vitalybuka@chromium.org0318f922014-04-22 00:09:23649 const Component& host,
brettw@chromium.orge7bba5f2013-04-10 20:10:52650 CanonOutput* output,
651 CanonHostInfo* host_info) {
Jan Wilken Dörrie5aad5c22021-03-08 21:44:12652 if (DoCanonicalizeIPv4Address<char16_t, char16_t>(spec, host, output,
653 host_info))
brettw@chromium.orge7bba5f2013-04-10 20:10:52654 return;
Jan Wilken Dörrie5aad5c22021-03-08 21:44:12655 if (DoCanonicalizeIPv6Address<char16_t, char16_t>(spec, host, output,
656 host_info))
brettw@chromium.orge7bba5f2013-04-10 20:10:52657 return;
658}
659
Hayato Ito203c3022023-12-11 04:58:57660void CanonicalizeIPv6Address(const char* spec,
661 const Component& host,
662 CanonOutput& output,
663 CanonHostInfo& host_info) {
664 DoCanonicalizeIPv6Address<char, unsigned char>(spec, host, &output,
665 &host_info);
666}
667
668void CanonicalizeIPv6Address(const char16_t* spec,
669 const Component& host,
670 CanonOutput& output,
671 CanonHostInfo& host_info) {
672 DoCanonicalizeIPv6Address<char16_t, char16_t>(spec, host, &output,
673 &host_info);
674}
675
brettw@chromium.orge7bba5f2013-04-10 20:10:52676CanonHostInfo::Family IPv4AddressToNumber(const char* spec,
vitalybuka@chromium.org0318f922014-04-22 00:09:23677 const Component& host,
brettw@chromium.orge7bba5f2013-04-10 20:10:52678 unsigned char address[4],
679 int* num_ipv4_components) {
Matt Menkef21b7242021-08-25 20:43:24680 return DoIPv4AddressToNumber<char, unsigned char>(spec, host, address,
681 num_ipv4_components);
brettw@chromium.orge7bba5f2013-04-10 20:10:52682}
683
Jan Wilken Dörrie5aad5c22021-03-08 21:44:12684CanonHostInfo::Family IPv4AddressToNumber(const char16_t* spec,
vitalybuka@chromium.org0318f922014-04-22 00:09:23685 const Component& host,
brettw@chromium.orge7bba5f2013-04-10 20:10:52686 unsigned char address[4],
687 int* num_ipv4_components) {
Matt Menkef21b7242021-08-25 20:43:24688 return DoIPv4AddressToNumber<char16_t, char16_t>(spec, host, address,
689 num_ipv4_components);
brettw@chromium.orge7bba5f2013-04-10 20:10:52690}
691
692bool IPv6AddressToNumber(const char* spec,
vitalybuka@chromium.org0318f922014-04-22 00:09:23693 const Component& host,
brettw@chromium.orge7bba5f2013-04-10 20:10:52694 unsigned char address[16]) {
695 return DoIPv6AddressToNumber<char, unsigned char>(spec, host, address);
696}
697
Jan Wilken Dörrie5aad5c22021-03-08 21:44:12698bool IPv6AddressToNumber(const char16_t* spec,
vitalybuka@chromium.org0318f922014-04-22 00:09:23699 const Component& host,
brettw@chromium.orge7bba5f2013-04-10 20:10:52700 unsigned char address[16]) {
Jan Wilken Dörrie5aad5c22021-03-08 21:44:12701 return DoIPv6AddressToNumber<char16_t, char16_t>(spec, host, address);
brettw@chromium.orge7bba5f2013-04-10 20:10:52702}
703
vitalybuka@chromium.org0318f922014-04-22 00:09:23704} // namespace url