[go: nahoru, domu]

Skip to content

Commit

Permalink
make conll DatasetBuilder classes config based, by accepting descript…
Browse files Browse the repository at this point in the history
…ion and citation to be specified using code still.

PiperOrigin-RevId: 489442803
  • Loading branch information
pierrot0 authored and The TensorFlow Datasets Authors committed Nov 18, 2022
1 parent 73e190e commit 2800d1d
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 21 deletions.
4 changes: 0 additions & 4 deletions docs/format_specific_dataset_builders.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,6 @@ class MyCoNNLDataset(tfds.dataset_builders.ConllDatasetBuilder):

def _info(self) -> tfds.core.DatasetInfo:
return self.create_dataset_info(
description="My dataset description",
citation="My dataset citation",
# ...
)

Expand Down Expand Up @@ -329,8 +327,6 @@ class MyCoNNLUDataset(tfds.dataset_builders.ConllUDatasetBuilder):

def _info(self) -> tfds.core.DatasetInfo:
return self.create_dataset_info(
description="My dataset description",
citation="My dataset citation",
# ...
)

Expand Down
13 changes: 8 additions & 5 deletions tensorflow_datasets/core/dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,15 +881,18 @@ def dataset_info_from_configs(self, **kwargs):
Sub-class should call this and add information not present in config files
using kwargs directly passed to tfds.core.DatasetInfo object.
If information is present both in passed arguments and config files, config
files will prevail.
Args:
**kwargs: kw args to pass to DatasetInfo directly.
"""
metadata = self.get_metadata()
return dataset_info.DatasetInfo(
builder=self,
description=metadata.description,
citation=metadata.citation,
**kwargs)
if metadata.description:
kwargs["description"] = metadata.description
if metadata.citation:
kwargs["citation"] = metadata.citation
return dataset_info.DatasetInfo(builder=self, **kwargs)

@abc.abstractmethod
@utils.docs.doc_private
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,28 +90,28 @@ def builder_config(self) -> ConllBuilderConfig:

def create_dataset_info(
self,
description: str,
description: Optional[str] = None,
supervised_keys: Optional[dataset_info.SupervisedKeysType] = None,
homepage: Optional[str] = None,
citation: Optional[str] = None,
) -> dataset_info.DatasetInfo:
"""Initializes `dataset_info.DatasetInfo` for Conll datasets.
Args:
description: A short, markdown-formatted description of the dataset.
description: [DEPRECATED] A short, markdown-formatted description of the
dataset. Prefer placing description in `README.md` file.
supervised_keys: Specifies the input structure for supervised learning,
if applicable for the dataset, used with "as_supervised". Typically this
is a `(input_key, target_key)` tuple.
homepage: The homepage of the dataset, if applicable for this dataset.
citation: The citation to use for this dataset, if applicable for this
dataset.
citation: [DEPRECATED] The citation to use for this dataset, if applicable
for this dataset. Prefer placing citations in `CITATIONS.bib` file.
Returns:
`dataset_info.DatasetInfo` for Conll datasets, populated with the values
from the provided arguments.
"""
return dataset_info.DatasetInfo(
builder=self,
return self.dataset_info_from_configs(
description=description,
features=self.builder_config.features_dict,
supervised_keys=supervised_keys,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,28 +155,28 @@ def builder_config(self) -> ConllUBuilderConfig:

def create_dataset_info(
self,
description: str,
description: Optional[str] = None,
supervised_keys: Optional[dataset_info.SupervisedKeysType] = None,
homepage: Optional[str] = None,
citation: Optional[str] = None,
) -> dataset_info.DatasetInfo:
"""Initializes `dataset_info.DatasetInfo` for Conll-U datasets.
Args:
description: A short, markdown-formatted description of the dataset.
description: [DEPRECATED] A short, markdown-formatted description of the
dataset. Prefer placing description in `README.md` file.
supervised_keys: Specifies the input structure for supervised learning,
if applicable for the dataset, used with "as_supervised". Typically this
is a `(input_key, target_key)` tuple.
homepage: The homepage of the dataset, if applicable for this dataset.
citation: The citation to use for this dataset, if applicable for this
dataset.
citation: [DEPRECATED] The citation to use for this dataset, if applicable
for this dataset. Prefer placing citations in `CITATIONS.bib` file.
Returns:
`dataset_info.DatasetInfo` for Conll-U datasets, populated with the values
from the provided arguments.
"""
return dataset_info.DatasetInfo(
builder=self,
return self.dataset_info_from_configs(
description=description,
features=self.builder_config.features_dict,
supervised_keys=supervised_keys,
Expand Down

0 comments on commit 2800d1d

Please sign in to comment.