Raise error if serializing DF with struct fields

googleapis · plamut · Aug 21, 2019 · Aug 18, 2019 · Aug 18, 2019 · Aug 19, 2019
commit effcf7e6fde670eeeae257388af46dfa8329b41e
diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py
@@ -60,6 +60,7 @@
 from google.cloud.bigquery.retry import DEFAULT_RETRY
 from google.cloud.bigquery.routine import Routine
 from google.cloud.bigquery.routine import RoutineReference
+from google.cloud.bigquery.schema import _STRUCT_TYPES
 from google.cloud.bigquery.schema import SchemaField
 from google.cloud.bigquery.table import _table_arg_to_table
 from google.cloud.bigquery.table import _table_arg_to_table_ref
@@ -1529,6 +1530,14 @@ def load_table_from_dataframe(
         os.close(tmpfd)
 
         try:
+            if job_config.schema:
+                for field in job_config.schema:
+                    if field.field_type in _STRUCT_TYPES:
+                        raise ValueError(
+                            "Pyarrow does not support serializing dataframes with "
+                            "struct (record) column types."
+                        )
+
             if pyarrow and job_config.schema:
                 if parquet_compression == "snappy":  # adjust the default value
                     parquet_compression = parquet_compression.upper()

diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py
@@ -5328,6 +5328,40 @@ def test_load_table_from_dataframe_w_custom_job_config(self):
         assert sent_config is job_config
         assert sent_config.source_format == job.SourceFormat.PARQUET
 
+    @unittest.skipIf(pandas is None, "Requires `pandas`")
+    @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
+    def test_load_table_from_dataframe_struct_fields_error(self):
+        from google.cloud.bigquery import job
+        from google.cloud.bigquery.schema import SchemaField
+
+        client = self._make_client()
+
+        records = [{"float_column": 3.14, "struct_column": [{"foo": 1}, {"bar": -1}]}]
+        dataframe = pandas.DataFrame(data=records)
+
+        schema = [
+            SchemaField("float_column", "FLOAT"),
+            SchemaField(
+                "agg_col",
+                "RECORD",
+                fields=[SchemaField("foo", "INTEGER"), SchemaField("bar", "INTEGER")],
+            ),
+        ]
+        job_config = job.LoadJobConfig(schema=schema)
+
+        load_patch = mock.patch(
+            "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+        )
+
+        with pytest.raises(ValueError) as exc_info, load_patch:
+            client.load_table_from_dataframe(
+                dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION
+            )
+
+        err_msg = str(exc_info.value)
+        assert "struct" in err_msg
+        assert "not support" in err_msg
+
     @unittest.skipIf(pandas is None, "Requires `pandas`")
     @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
     def test_load_table_from_dataframe_wo_schema_warning(self):