[go: nahoru, domu]

Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: consolidate PyarrowVersions helpers #1679

Merged
merged 16 commits into from
Oct 18, 2023
Prev Previous commit
Next Next commit
fix tests
  • Loading branch information
Linchin committed Oct 16, 2023
commit ab266a7b5568f7ebe26789be013629b26940e6eb
8 changes: 2 additions & 6 deletions google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,7 @@ def bq_to_arrow_data_type(field):
if field_type_upper in schema._STRUCT_TYPES:
Linchin marked this conversation as resolved.
Show resolved Hide resolved
return bq_to_arrow_struct_data_type(field)

data_type_constructor = _pyarrow_helpers.bq_to_arrow_scalars(
field_type_upper
)
data_type_constructor = _pyarrow_helpers.bq_to_arrow_scalars(field_type_upper)
if data_type_constructor is None:
return None
return data_type_constructor()
Expand Down Expand Up @@ -511,9 +509,7 @@ def augment_schema(dataframe, current_bq_schema):
detected_type = "DATETIME"
else:
detected_mode = field.mode
detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq(
arrow_table.type.id
)
detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id)

if detected_type is None:
unknown_type_fields.append(field)
Expand Down
13 changes: 13 additions & 0 deletions google/cloud/bigquery/_pyarrow_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

from typing import Any

from packaging import version

try:
import pyarrow # type: ignore
except ImportError: # pragma: NO COVER
Expand Down Expand Up @@ -95,6 +97,16 @@ def pyarrow_timestamp():
pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC",
}

# Adds bignumeric support only if pyarrow version >= 3.0.0
# Decimal256 support was added to arrow 3.0.0
# https://arrow.apache.org/blog/2021/01/25/3.0.0-release/
if version.parse(pyarrow.__version__) >= version.parse("3.0.0"):
_BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric
# The exact decimal's scale and precision are not important, as only
# the type ID matters, and it's the same for all decimal256 instances.
_ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC"


def bq_to_arrow_scalars(bq_scalar: str):
"""
Returns:
Expand All @@ -103,6 +115,7 @@ def bq_to_arrow_scalars(bq_scalar: str):
"""
return _BQ_TO_ARROW_SCALARS.get(bq_scalar)


def arrow_scalar_ids_to_bq(arrow_scalar: Any):
"""
Returns:
Expand Down
1 change: 1 addition & 0 deletions google/cloud/bigquery/_versions_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
# https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414
_PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")])


class PyarrowVersions:
"""Version comparisons for pyarrow package."""

Expand Down
1 change: 0 additions & 1 deletion google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@
from google.cloud.bigquery import enums
from google.cloud.bigquery.enums import AutoRowIDs
from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
from google.cloud.bigquery.exceptions import LegacyPyarrowError
from google.cloud.bigquery.opentelemetry_tracing import create_span
from google.cloud.bigquery import job
from google.cloud.bigquery.job import (
Expand Down
1 change: 0 additions & 1 deletion google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@
import google.cloud._helpers # type: ignore
from google.cloud.bigquery import _helpers
from google.cloud.bigquery import _pandas_helpers
from google.cloud.bigquery import _pyarrow_helpers
from google.cloud.bigquery.enums import DefaultPandasDTypes
from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
from google.cloud.bigquery.schema import _build_schema_resource
Expand Down
5 changes: 1 addition & 4 deletions tests/unit/test__pyarrow_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,5 @@ def test_bq_to_arrow_scalars(module_under_test):

@pytest.mark.skipIf(pyarrow is None, reason="Requires `pyarrow`")
def test_arrow_scalar_ids_to_bq(module_under_test):
assert (
module_under_test.arrow_scalar_ids_to_bq(pyarrow.bool_().id)
== "BOOL"
)
assert module_under_test.arrow_scalar_ids_to_bq(pyarrow.bool_().id) == "BOOL"
assert module_under_test.arrow_scalar_ids_to_bq("UNKNOWN_TYPE") is None
5 changes: 3 additions & 2 deletions tests/unit/test__versions_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import pytest

import mock

try:
import pyarrow
except ImportError: # pragma: NO COVER
Expand Down Expand Up @@ -50,7 +51,7 @@ def test_try_import_raises_error_w_legacy_pyarrow():

versions = _versions_helpers.PyarrowVersions()
with mock.patch("pyarrow.__version__", new="2.0.0"):
with pytest.raises(LegacyPyarrowError):
with pytest.raises(LegacyPyarrowError):
versions.try_import(raise_if_error=True)


Expand All @@ -69,4 +70,4 @@ def test_installed_version_returns_parsed_version():

assert version.major == 1
assert version.minor == 2
assert version.micro == 3
assert version.micro == 3
1 change: 0 additions & 1 deletion tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8636,7 +8636,6 @@ def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self):
location=self.LOCATION,
)


@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
def test_load_table_from_dataframe_w_nulls(self):
Expand Down