[go: nahoru, domu]

Skip to content

Commit

Permalink
Replace (in bulk) usage of tf.io.gfile by etils/epath
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 481899777
  • Loading branch information
The TensorFlow Datasets Authors committed Oct 18, 2022
1 parent 5205800 commit ad716e5
Show file tree
Hide file tree
Showing 130 changed files with 385 additions and 269 deletions.
6 changes: 4 additions & 2 deletions tensorflow_datasets/audio/fuss.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
"""FUSS dataset."""

import os

from absl import logging
from etils import epath
import tensorflow as tf
import tensorflow_datasets.public_api as tfds

Expand Down Expand Up @@ -147,7 +149,7 @@ def _parse_segments(self, path):
# Some segments files are missing in the "unprocessed" set.
logging.info("Missing segments file: %s", path)
return segments
with tf.io.gfile.GFile(path) as f:
with epath.Path(path).open() as f:
for l in f:
try:
start, end, label = l.split()
Expand All @@ -164,7 +166,7 @@ def _generate_examples(self, base_dir, split):
"""Generates examples for the given split."""
path = os.path.join(base_dir, "%s_example_list.txt" % split)
split_dir = os.path.join(base_dir, split)
with tf.io.gfile.GFile(path) as example_list:
with epath.Path(path).open() as example_list:
for line in example_list:
paths = line.split()
key = _basename_without_ext(paths[0])
Expand Down
4 changes: 2 additions & 2 deletions tensorflow_datasets/audio/librispeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

import os

from etils import epath
import tensorflow as tf

import tensorflow_datasets.public_api as tfds

_CITATION = """\
Expand Down Expand Up @@ -90,7 +90,7 @@ def _populate_metadata(self, dirs):

def _read_metadata_file(self, path, field_names):
metadata = {}
with tf.io.gfile.GFile(path) as f:
with epath.Path(path).open() as f:
for line in f:
if line.startswith(";"):
continue
Expand Down
4 changes: 2 additions & 2 deletions tensorflow_datasets/audio/ljspeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

import os

from etils import epath
import tensorflow as tf

import tensorflow_datasets.public_api as tfds

_CITATION = """\
Expand Down Expand Up @@ -81,7 +81,7 @@ def _split_generators(self, dl_manager):
def _generate_examples(self, directory):
"""Yields examples."""
metadata_path = os.path.join(directory, "LJSpeech-1.1", "metadata.csv")
with tf.io.gfile.GFile(metadata_path) as f:
with epath.Path(metadata_path).open() as f:
for line in f:
line = line.strip()
key, transcript, transcript_normalized = line.split("|")
Expand Down
6 changes: 3 additions & 3 deletions tensorflow_datasets/audio/tedlium.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@

import os
import re
import numpy as np

from etils import epath
import numpy as np
import tensorflow as tf

import tensorflow_datasets.public_api as tfds


Expand Down Expand Up @@ -210,7 +210,7 @@ def _generate_examples_from_stm_file(stm_path):
"""Generate examples from a TED-LIUM stm file."""
stm_dir = os.path.dirname(stm_path)
sph_dir = os.path.join(os.path.dirname(stm_dir), "sph")
with tf.io.gfile.GFile(stm_path) as f:
with epath.Path(stm_path).open() as f:
for line in f:
line = line.strip()
fn, channel, speaker, start, end, label, transcript = line.split(" ", 6)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import csv
import os

from etils import epath
import tensorflow as tf
import tensorflow_datasets.public_api as tfds

Expand Down Expand Up @@ -51,7 +52,7 @@
def read_metadata_file(path):
"""Reads the tab-separated metadata from the path."""
metadata = {}
with tf.io.gfile.GFile(path) as f:
with epath.Path(path).open() as f:
reader = csv.DictReader(f, delimiter="\t")
for row in reader:
# Collect metadata for each split_userID, for example
Expand Down
4 changes: 3 additions & 1 deletion tensorflow_datasets/audio/voxceleb.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

import collections
import os

from etils import epath
import tensorflow as tf
import tensorflow_datasets.public_api as tfds

Expand Down Expand Up @@ -120,7 +122,7 @@ def _generate_examples(self, extract_path, file_names):
def _calculate_splits(self, iden_splits_path):
"""Read the train/dev/test splits from VoxCeleb's iden_split.txt file."""
data_splits = collections.defaultdict(set)
with tf.io.gfile.GFile(iden_splits_path) as f:
with epath.Path(iden_splits_path).open() as f:
for line in f:
group, path = line.strip().split()
split_name = {1: 'train', 2: 'validation', 3: 'test'}[int(group)]
Expand Down
2 changes: 1 addition & 1 deletion tensorflow_datasets/core/as_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@

"""As dataframe util."""

import dataclasses
import typing
from typing import Any, Callable, Dict, List, Optional, Tuple

import dataclasses
import numpy as np

import tensorflow as tf
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager):
from typing import List, Optional, OrderedDict, Sequence, Union

from etils import epath
import tensorflow as tf
from tensorflow_datasets.core import dataset_builder
from tensorflow_datasets.core import dataset_info
from tensorflow_datasets.core import split_builder as split_builder_lib
Expand Down Expand Up @@ -139,7 +138,7 @@ def _generate_examples(

example_id = 0
for filepath in path:
with tf.io.gfile.GFile(filepath) as f:
with epath.Path(filepath).open() as f:
for line in f:
if line.startswith("-DOCSTART-") or line == "\n" or not line:
if input_sequences["tokens"]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
from typing import Callable, List, Mapping, Optional, OrderedDict, Sequence, Union

from etils import epath
import tensorflow as tf

from tensorflow_datasets.core import dataset_builder
from tensorflow_datasets.core import dataset_info
from tensorflow_datasets.core import lazy_imports_lib
Expand Down Expand Up @@ -209,7 +207,7 @@ def _generate_examples(

example_id = 0
for filepath in path:
with tf.io.gfile.GFile(filepath) as data_file:
with epath.Path(filepath).open() as data_file:
annotated_sentences = list(conllu.parse_incr(data_file))
for sentence in annotated_sentences:
example = process_example_fn(
Expand Down
3 changes: 2 additions & 1 deletion tensorflow_datasets/core/features/video_feature_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import os
import pathlib

from etils import epath
import numpy as np
import tensorflow as tf
from tensorflow_datasets import testing
Expand Down Expand Up @@ -78,7 +79,7 @@ def test_video_concatenated_frames(self):
def test_video_ffmpeg(self):
video_path = os.path.join(self._test_data_path, 'video.mkv')
video_json_path = os.path.join(self._test_data_path, 'video.json')
with tf.io.gfile.GFile(video_json_path) as fp:
with epath.Path(video_json_path).open() as fp:
video_array = np.asarray(json.load(fp))

self.assertFeature(
Expand Down
3 changes: 2 additions & 1 deletion tensorflow_datasets/core/folder_dataset/translate_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import os
from typing import Dict, List, Tuple

from etils import epath
import tensorflow as tf
from tensorflow_datasets.core import dataset_builder
from tensorflow_datasets.core import dataset_info
Expand Down Expand Up @@ -154,6 +155,6 @@ def _get_split_language_examples(


def _list_examples(file: str) -> List[str]:
with tf.io.gfile.GFile(file) as f:
with epath.Path(file).open() as f:
sentences = f.read().splitlines()
return sentences
3 changes: 2 additions & 1 deletion tensorflow_datasets/image/arc.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import json
import os

from etils import epath
import tensorflow as tf
import tensorflow_datasets.public_api as tfds

Expand Down Expand Up @@ -140,7 +141,7 @@ def _generate_examples(self, directory):
"""Yields (key, example) tuples from the dataset."""
json_filepaths = tf.io.gfile.glob(os.path.join(directory, "*.json"))
for json_path in sorted(json_filepaths):
with tf.io.gfile.GFile(json_path) as f:
with epath.Path(json_path).open() as f:
task = json.load(f)
task_id = os.path.basename(json_path)[:-len(".json")]
yield task_id, {
Expand Down
5 changes: 3 additions & 2 deletions tensorflow_datasets/image/bccd/bccd.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import os
import xml.etree.ElementTree as ET

from etils import epath
import tensorflow as tf
import tensorflow_datasets.public_api as tfds

Expand Down Expand Up @@ -103,7 +104,7 @@ def _split_generators(self, dl_manager):
for root, _, filename in tf.io.gfile.walk(splits_dir_path):
for fname in filename:
full_file_name = os.path.join(root, fname)
with tf.io.gfile.GFile(full_file_name) as f:
with epath.Path(full_file_name).open() as f:
for line in f:
if fname == "train.txt":
train_list.append(line)
Expand Down Expand Up @@ -178,7 +179,7 @@ def get_label(attributes, n):

for fname in file_names:
annotation_file_path = get_annotations_file_path(fname)
with tf.io.gfile.GFile(annotation_file_path) as f:
with epath.Path(annotation_file_path).open() as f:
xml_list[fname] = ET.parse(f)
attributes = collections.defaultdict(list)
for element in xml_list[fname].iter():
Expand Down
6 changes: 3 additions & 3 deletions tensorflow_datasets/image/celeba.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

import os

from etils import epath
import tensorflow as tf

import tensorflow_datasets.public_api as tfds

IMG_ALIGNED_DATA = ("https://drive.google.com/uc?export=download&"
Expand Down Expand Up @@ -174,7 +174,7 @@ def _process_celeba_config_file(self, file_path):
values: map from the file name to the list of attribute values for
this file.
"""
with tf.io.gfile.GFile(file_path) as f:
with epath.Path(file_path).open() as f:
data_raw = f.read()
lines = data_raw.split("\n")

Expand All @@ -194,7 +194,7 @@ def _generate_examples(self, file_id, downloaded_dirs, downloaded_images):
landmarks_path = downloaded_dirs["landmarks_celeba"]
attr_path = downloaded_dirs["list_attr_celeba"]

with tf.io.gfile.GFile(img_list_path) as f:
with epath.Path(img_list_path).open() as f:
files = [
line.split()[0]
for line in f.readlines()
Expand Down
5 changes: 3 additions & 2 deletions tensorflow_datasets/image/clevr.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import json
import os

from etils import epath
import tensorflow as tf
import tensorflow_datasets.public_api as tfds

Expand Down Expand Up @@ -133,7 +134,7 @@ def _generate_examples(self, images_dir_path, question_file,
for filename in tf.io.gfile.listdir(images_dir_path)
])

with tf.io.gfile.GFile(question_file) as f:
with epath.Path(question_file).open() as f:
questions_json = json.load(f)
questions = collections.defaultdict(list)
for q in questions_json["questions"]:
Expand All @@ -143,7 +144,7 @@ def _generate_examples(self, images_dir_path, question_file,
})

if tf.io.gfile.exists(scenes_description_file):
with tf.io.gfile.GFile(scenes_description_file) as f:
with epath.Path(scenes_description_file).open() as f:
scenes_json = json.load(f)
else:
# if annotation file does not exist, we create empty annotations
Expand Down
4 changes: 3 additions & 1 deletion tensorflow_datasets/image/duke_ultrasound.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

import csv
import os

from etils import epath
import numpy as np
import tensorflow as tf
import tensorflow_datasets.public_api as tfds
Expand Down Expand Up @@ -136,7 +138,7 @@ def _split_generators(self, dl_manager):
return splits

def _generate_examples(self, datapath, csvpath):
with tf.io.gfile.GFile(csvpath) as f:
with epath.Path(csvpath).open() as f:
reader = csv.DictReader(f)
for row in reader:
data_key = 'mark_data' if row['target'] == 'mark' else 'phantom_data'
Expand Down
5 changes: 3 additions & 2 deletions tensorflow_datasets/image_classification/caltech_birds.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import os
import re

from etils import epath
import numpy as np
import tensorflow as tf
import tensorflow_datasets.public_api as tfds
Expand Down Expand Up @@ -100,10 +101,10 @@ def _split_generators(self, dl_manager):
train_path = os.path.join(extracted_path[0], "lists/train.txt")
test_path = os.path.join(extracted_path[0], "lists/test.txt")

with tf.io.gfile.GFile(train_path) as f:
with epath.Path(train_path).open() as f:
train_list = f.read().splitlines()

with tf.io.gfile.GFile(test_path) as f:
with epath.Path(test_path).open() as f:
test_list = f.read().splitlines()

attributes = collections.defaultdict(list)
Expand Down
3 changes: 2 additions & 1 deletion tensorflow_datasets/image_classification/chexpert.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import csv
import os

from etils import epath
import tensorflow as tf
import tensorflow_datasets.public_api as tfds

Expand Down Expand Up @@ -131,7 +132,7 @@ def _split_generators(self, dl_manager):

def _generate_examples(self, imgs_path, csv_path):
"""Yields examples."""
with tf.io.gfile.GFile(csv_path) as csv_f:
with epath.Path(csv_path).open() as csv_f:
reader = csv.DictReader(csv_f)
# Get keys for each label from csv
label_keys = reader.fieldnames[5:]
Expand Down
3 changes: 2 additions & 1 deletion tensorflow_datasets/image_classification/cifar.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import collections
import os

from etils import epath
import numpy as np
import tensorflow as tf
import tensorflow_datasets.public_api as tfds
Expand Down Expand Up @@ -83,7 +84,7 @@ def _split_generators(self, dl_manager):
for label_key, label_file in zip(cifar_info.label_keys,
cifar_info.label_files):
labels_path = os.path.join(cifar_path, label_file)
with tf.io.gfile.GFile(labels_path) as label_f:
with epath.Path(labels_path).open() as label_f:
label_names = [name for name in label_f.read().split("\n") if name]
self.info.features[label_key].names = label_names

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import collections
import os

from etils import epath
import numpy as np
import tensorflow as tf
import tensorflow_datasets.public_api as tfds
Expand Down Expand Up @@ -132,7 +133,7 @@ def _split_generators(self, dl_manager):
for label_key, label_file in zip(cifar_info.label_keys,
cifar_info.label_files):
labels_path = os.path.join(cifar_path, label_file)
with tf.io.gfile.GFile(labels_path) as label_f:
with epath.Path(labels_path).open() as label_f:
label_names = [name for name in label_f.read().split('\n') if name]
self.info.features[label_key].names = label_names

Expand Down
Loading

0 comments on commit ad716e5

Please sign in to comment.