Introduce 'url::Origin'. https://docs.google.com/document/d/19NACt9PXOUTJi60klT2ZGcFlgHM5wM1Owtcw2GQOKPI/edit describes the plan. BUG=490074 Review URL: https://codereview.chromium.org/1224293002 Cr-Commit-Position: refs/heads/master@{#339841}

commit: 9f2cc898386458c3fab414666eb205ecb5b88277 [log] [tgz]
author: mkwst <mkwst@chromium.org> Wed Jul 22 06:03:25 2015
committer: Commit bot <commit-bot@chromium.org> Wed Jul 22 06:03:58 2015
tree: 20533a6844ab7d3257ea80872bac51bd2ee6aa0f
parent: 5b864132129fe01715e190d14aaa59e2f3c2a9d6 [diff]
diff --git a/url/BUILD.gn b/url/BUILD.gn
index db5a6ad5..2beed041 100644
--- a/url/BUILD.gn
+++ b/url/BUILD.gn

@@ -24,6 +24,8 @@
     "deprecated_serialized_origin.h",
     "gurl.cc",
     "gurl.h",
+    "origin.cc",
+    "origin.h",
     "scheme_host_port.cc",
     "scheme_host_port.h",
     "third_party/mozilla/url_parse.cc",
@@ -97,6 +99,7 @@
     sources = [
       "deprecated_serialized_origin_unittest.cc",
       "gurl_unittest.cc",
+      "origin_unittest.cc",
       "scheme_host_port_unittest.cc",
       "url_canon_icu_unittest.cc",
       "url_canon_unittest.cc",

diff --git a/url/origin.cc b/url/origin.cc
new file mode 100644
index 0000000..e80eb72b
--- /dev/null
+++ b/url/origin.cc

@@ -0,0 +1,69 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/origin.h"
+
+#include <string.h>
+
+#include "base/logging.h"
+#include "base/strings/string_number_conversions.h"
+#include "url/gurl.h"
+#include "url/url_canon.h"
+#include "url/url_canon_stdstring.h"
+#include "url/url_constants.h"
+#include "url/url_util.h"
+
+namespace url {
+
+Origin::Origin() : unique_(true) {
+}
+
+Origin::Origin(const GURL& url) : unique_(true) {
+  if (!url.is_valid() || (!url.IsStandard() && !url.SchemeIsBlob()))
+    return;
+
+  if (url.SchemeIsFileSystem()) {
+    tuple_ = SchemeHostPort(*url.inner_url());
+  } else if (url.SchemeIsBlob()) {
+    // TODO(mkwst): This relies on the fact that GURL pushes the unparseable
+    // bits and pieces of a non-standard scheme into the GURL's path. It seems
+    // fairly fragile, so it might be worth teaching GURL about blobs' data in
+    // the same way it's been taught about filesystems' inner URLs.
+    tuple_ = SchemeHostPort(GURL(url.path()));
+  } else {
+    tuple_ = SchemeHostPort(url);
+  }
+
+  unique_ = tuple_.IsInvalid();
+}
+
+Origin::~Origin() {
+}
+
+std::string Origin::Serialize() const {
+  if (unique())
+    return "null";
+
+  if (scheme() == kFileScheme)
+    return "file://";
+
+  return tuple_.Serialize();
+}
+
+bool Origin::IsSameOriginWith(const Origin& other) const {
+  if (unique_ || other.unique_)
+    return false;
+
+  return tuple_.Equals(other.tuple_);
+}
+
+bool Origin::operator<(const Origin& other) const {
+  return tuple_ < other.tuple_;
+}
+
+std::ostream& operator<<(std::ostream& out, const url::Origin& origin) {
+  return out << origin.Serialize();
+}
+
+}  // namespace url

diff --git a/url/origin.h b/url/origin.h
new file mode 100644
index 0000000..15fe0ea
--- /dev/null
+++ b/url/origin.h

@@ -0,0 +1,125 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef URL_ORIGIN_H_
+#define URL_ORIGIN_H_
+
+#include <string>
+
+#include "base/strings/string16.h"
+#include "url/scheme_host_port.h"
+#include "url/third_party/mozilla/url_parse.h"
+#include "url/url_canon.h"
+#include "url/url_constants.h"
+#include "url/url_export.h"
+
+class GURL;
+
+namespace url {
+
+// An Origin is a tuple of (scheme, host, port), as described in RFC 6454.
+//
+// TL;DR: If you need to make a security-relevant decision, use 'url::Origin'.
+// If you only need to extract the bits of a URL which are relevant for a
+// network connection, use 'url::SchemeHostPort'.
+//
+// STL;SDR: If you aren't making actual network connections, use 'url::Origin'.
+//
+// 'Origin', like 'SchemeHostPort', is composed of a tuple of (scheme, host,
+// port), but contains a number of additional concepts which make it appropriate
+// for use as a security boundary and access control mechanism between contexts.
+//
+// This class ought to be used when code needs to determine if two resources
+// are "same-origin", and when a canonical serialization of an origin is
+// required. Note that some origins are "unique", meaning that they are not
+// same-origin with any other origin (including themselves).
+//
+// There are a few subtleties to note:
+//
+// * Invalid and non-standard GURLs are parsed as unique origins. This includes
+//   non-hierarchical URLs like 'data:text/html,...' and 'javascript:alert(1)'.
+//
+// * GURLs with schemes of 'filesystem' or 'blob' parse the origin out of the
+//   internals of the URL. That is, 'filesystem:https://example.com/temporary/f'
+//   is parsed as ('https', 'example.com', 443).
+//
+// * Unique origins all serialize to the string "null"; this means that the
+//   serializations of two unique origins are identical to each other, though
+//   the origins themselves are not "the same". This means that origins'
+//   serializations must not be relied upon for security checks.
+//
+// * GURLs with a 'file' scheme are tricky. They are parsed as ('file', '', 0),
+//   but their behavior may differ from embedder to embedder.
+//
+// * The host component of an IPv6 address includes brackets, just like the URL
+//   representation.
+//
+// Usage:
+//
+// * Origins are generally constructed from an already-canonicalized GURL:
+//
+//     GURL url("https://example.com/");
+//     url::Origin origin(url);
+//     origin.scheme(); // "https"
+//     origin.host(); // "example.com"
+//     origin.port(); // 443
+//     origin.IsUnique(); // false
+//
+// * To answer the question "Are |this| and |that| "same-origin" with each
+//   other?", use |Origin::IsSameOriginWith|:
+//
+//     if (this.IsSameOriginWith(that)) {
+//       // Amazingness goes here.
+//     }
+class URL_EXPORT Origin {
+ public:
+  // Creates a unique Origin.
+  Origin();
+
+  // Creates an Origin from |url|, as described at
+  // https://url.spec.whatwg.org/#origin, with the following additions:
+  //
+  // 1. If |url| is invalid or non-standard, a unique Origin is constructed.
+  // 2. 'filesystem' URLs behave as 'blob' URLs (that is, the origin is parsed
+  //    out of everything in the URL which follows the scheme).
+  // 3. 'file' URLs all parse as ("file", "", 0).
+  explicit Origin(const GURL& url);
+
+  ~Origin();
+
+  // For unique origins, these return ("", "", 0).
+  //
+  // TODO(mkwst): These should be 'const std::string&', along with their
+  // 'url::SchemeHostPort' analogs.
+  std::string scheme() const { return tuple_.scheme(); }
+  std::string host() const { return tuple_.host(); }
+  uint16 port() const { return tuple_.port(); }
+
+  bool unique() const { return unique_; }
+
+  // An ASCII serialization of the Origin as per Section 6.2 of RFC 6454, with
+  // the addition that all Origins with a 'file' scheme serialize to "file://".
+  std::string Serialize() const;
+
+  // Two Origins are "same-origin" if their schemes, hosts, and ports are exact
+  // matches; and neither is unique.
+  bool IsSameOriginWith(const Origin& other) const;
+
+  // Allows SchemeHostPort to used as a key in STL (for example, a std::set or
+  // std::map).
+  bool operator<(const Origin& other) const;
+
+ private:
+  SchemeHostPort tuple_;
+  bool unique_;
+
+  DISALLOW_COPY_AND_ASSIGN(Origin);
+};
+
+URL_EXPORT std::ostream& operator<<(std::ostream& out,
+                                    const Origin& origin);
+
+}  // namespace url
+
+#endif  // URL_SCHEME_HOST_PORT_H_

diff --git a/url/origin_unittest.cc b/url/origin_unittest.cc
new file mode 100644
index 0000000..a774c62
--- /dev/null
+++ b/url/origin_unittest.cc

@@ -0,0 +1,160 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "url/origin.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl.h"
+
+namespace {
+
+TEST(OriginTest, UniqueOriginComparison) {
+  url::Origin unique_origin;
+  EXPECT_EQ("", unique_origin.scheme());
+  EXPECT_EQ("", unique_origin.host());
+  EXPECT_EQ(0, unique_origin.port());
+  EXPECT_TRUE(unique_origin.unique());
+  EXPECT_FALSE(unique_origin.IsSameOriginWith(unique_origin));
+
+  const char* const urls[] = {"data:text/html,Hello!",
+                              "javascript:alert(1)",
+                              "file://example.com:443/etc/passwd",
+                              "yay",
+                              "http::///invalid.example.com/"};
+
+  for (const auto& test_url : urls) {
+    SCOPED_TRACE(test_url);
+    GURL url(test_url);
+    url::Origin origin(url);
+    EXPECT_EQ("", origin.scheme());
+    EXPECT_EQ("", origin.host());
+    EXPECT_EQ(0, origin.port());
+    EXPECT_TRUE(origin.unique());
+    EXPECT_FALSE(origin.IsSameOriginWith(origin));
+    EXPECT_FALSE(unique_origin.IsSameOriginWith(origin));
+    EXPECT_FALSE(origin.IsSameOriginWith(unique_origin));
+  }
+}
+
+TEST(OriginTest, ConstructFromGURL) {
+  url::Origin different_origin(GURL("https://not-in-the-list.test/"));
+
+  struct TestCases {
+    const char* const url;
+    const char* const expected_scheme;
+    const char* const expected_host;
+    const uint16 expected_port;
+  } cases[] = {
+      // IP Addresses
+      {"http://192.168.9.1/", "http", "192.168.9.1", 80},
+      {"http://[2001:db8::1]/", "http", "[2001:db8::1]", 80},
+
+      // Punycode
+      {"http://☃.net/", "http", "xn--n3h.net", 80},
+      {"blob:http://☃.net/", "http", "xn--n3h.net", 80},
+
+      // Generic URLs
+      {"http://example.com/", "http", "example.com", 80},
+      {"http://example.com:123/", "http", "example.com", 123},
+      {"https://example.com/", "https", "example.com", 443},
+      {"https://example.com:123/", "https", "example.com", 123},
+      {"http://user:pass@example.com/", "http", "example.com", 80},
+      {"http://example.com:123/?query", "http", "example.com", 123},
+      {"https://example.com/#1234", "https", "example.com", 443},
+      {"https://u:p@example.com:123/?query#1234", "https", "example.com", 123},
+
+      // Registered URLs
+      {"ftp://example.com/", "ftp", "example.com", 21},
+      {"gopher://example.com/", "gopher", "example.com", 70},
+      {"ws://example.com/", "ws", "example.com", 80},
+      {"wss://example.com/", "wss", "example.com", 443},
+
+      // file: URLs
+      {"file:///etc/passwd", "file", "", 0},
+      {"file://example.com/etc/passwd", "file", "example.com", 0},
+
+      // Filesystem:
+      {"filesystem:http://example.com/type/", "http", "example.com", 80},
+      {"filesystem:http://example.com:123/type/", "http", "example.com", 123},
+      {"filesystem:https://example.com/type/", "https", "example.com", 443},
+      {"filesystem:https://example.com:123/type/", "https", "example.com", 123},
+
+      // Blob:
+      {"blob:http://example.com/guid-goes-here", "http", "example.com", 80},
+      {"blob:http://example.com:123/guid-goes-here", "http", "example.com", 123},
+      {"blob:https://example.com/guid-goes-here", "https", "example.com", 443},
+      {"blob:http://u:p@example.com/guid-goes-here", "http", "example.com", 80},
+  };
+
+  for (const auto& test_case : cases) {
+    SCOPED_TRACE(test_case.url);
+    GURL url(test_case.url);
+    EXPECT_TRUE(url.is_valid());
+    url::Origin origin(url);
+    EXPECT_EQ(test_case.expected_scheme, origin.scheme());
+    EXPECT_EQ(test_case.expected_host, origin.host());
+    EXPECT_EQ(test_case.expected_port, origin.port());
+    EXPECT_FALSE(origin.unique());
+    EXPECT_TRUE(origin.IsSameOriginWith(origin));
+    EXPECT_FALSE(different_origin.IsSameOriginWith(origin));
+    EXPECT_FALSE(origin.IsSameOriginWith(different_origin));
+  }
+}
+
+TEST(OriginTest, Serialization) {
+  struct TestCases {
+    const char* const url;
+    const char* const expected;
+  } cases[] = {
+      {"http://192.168.9.1/", "http://192.168.9.1"},
+      {"http://[2001:db8::1]/", "http://[2001:db8::1]"},
+      {"http://☃.net/", "http://xn--n3h.net"},
+      {"http://example.com/", "http://example.com"},
+      {"http://example.com:123/", "http://example.com:123"},
+      {"https://example.com/", "https://example.com"},
+      {"https://example.com:123/", "https://example.com:123"},
+      {"file:///etc/passwd", "file://"},
+      {"file://example.com/etc/passwd", "file://"},
+  };
+
+  for (const auto& test_case : cases) {
+    SCOPED_TRACE(test_case.url);
+    GURL url(test_case.url);
+    EXPECT_TRUE(url.is_valid());
+    url::Origin origin(url);
+    EXPECT_EQ(test_case.expected, origin.Serialize());
+
+    // The '<<' operator should produce the same serialization as Serialize().
+    std::stringstream out;
+    out << origin;
+    EXPECT_EQ(test_case.expected, out.str());
+  }
+}
+
+TEST(OriginTest, Comparison) {
+  // These URLs are arranged in increasing order:
+  const char* const urls[] = {
+      "data:uniqueness",
+      "http://a:80",
+      "http://b:80",
+      "https://a:80",
+      "https://b:80",
+      "http://a:81",
+      "http://b:81",
+      "https://a:81",
+      "https://b:81",
+  };
+
+  for (size_t i = 0; i < arraysize(urls); i++) {
+    GURL current_url(urls[i]);
+    url::Origin current(current_url);
+    for (size_t j = i; j < arraysize(urls); j++) {
+      GURL compare_url(urls[j]);
+      url::Origin to_compare(compare_url);
+      EXPECT_EQ(i < j, current < to_compare) << i << " < " << j;
+      EXPECT_EQ(j < i, to_compare < current) << j << " < " << i;
+    }
+  }
+}
+
+}  // namespace url

diff --git a/url/url.gyp b/url/url.gyp
index 198d448..b8355a87 100644
--- a/url/url.gyp
+++ b/url/url.gyp

@@ -46,8 +46,9 @@
         'url_lib',
       ],
       'sources': [
-        'gurl_unittest.cc',
         'deprecated_serialized_origin_unittest.cc',
+        'gurl_unittest.cc',
+        'origin_unittest.cc',
         'scheme_host_port_unittest.cc',
         'url_canon_icu_unittest.cc',
         'url_canon_unittest.cc',

diff --git a/url/url_srcs.gypi b/url/url_srcs.gypi
index 525598b..21d2fe1 100644
--- a/url/url_srcs.gypi
+++ b/url/url_srcs.gypi

@@ -11,6 +11,8 @@
       'deprecated_serialized_origin.h',
       'gurl.cc',
       'gurl.h',
+      'origin.cc',
+      'origin.h',
       'scheme_host_port.cc',
       'scheme_host_port.h',
       'third_party/mozilla/url_parse.cc',
commit	9f2cc898386458c3fab414666eb205ecb5b88277	[log] [tgz]
author	mkwst <mkwst@chromium.org>	Wed Jul 22 06:03:25 2015
committer	Commit bot <commit-bot@chromium.org>	Wed Jul 22 06:03:58 2015
tree	20533a6844ab7d3257ea80872bac51bd2ee6aa0f
parent	5b864132129fe01715e190d14aaa59e2f3c2a9d6 [diff]