Replace (in bulk) usage of tf.io.gfile by etils/epath

PiperOrigin-RevId: 481899777
tensorflow · Oct 18, 2022 · ad716e5 · ad716e5
1 parent 5205800
commit ad716e5
Show file tree

Hide file tree

Showing 130 changed files with 385 additions and 269 deletions.
diff --git a/tensorflow_datasets/audio/fuss.py b/tensorflow_datasets/audio/fuss.py
@@ -16,7 +16,9 @@
 """FUSS dataset."""
 
 import os
+
 from absl import logging
+from etils import epath
 import tensorflow as tf
 import tensorflow_datasets.public_api as tfds
 
@@ -147,7 +149,7 @@ def _parse_segments(self, path):
       # Some segments files are missing in the "unprocessed" set.
       logging.info("Missing segments file: %s", path)
       return segments
-    with tf.io.gfile.GFile(path) as f:
+    with epath.Path(path).open() as f:
       for l in f:
         try:
           start, end, label = l.split()
@@ -164,7 +166,7 @@ def _generate_examples(self, base_dir, split):
     """Generates examples for the given split."""
     path = os.path.join(base_dir, "%s_example_list.txt" % split)
     split_dir = os.path.join(base_dir, split)
-    with tf.io.gfile.GFile(path) as example_list:
+    with epath.Path(path).open() as example_list:
       for line in example_list:
         paths = line.split()
         key = _basename_without_ext(paths[0])

diff --git a/tensorflow_datasets/audio/librispeech.py b/tensorflow_datasets/audio/librispeech.py
@@ -17,8 +17,8 @@
 
 import os
 
+from etils import epath
 import tensorflow as tf
-
 import tensorflow_datasets.public_api as tfds
 
 _CITATION = """\
@@ -90,7 +90,7 @@ def _populate_metadata(self, dirs):
 
   def _read_metadata_file(self, path, field_names):
     metadata = {}
-    with tf.io.gfile.GFile(path) as f:
+    with epath.Path(path).open() as f:
       for line in f:
         if line.startswith(";"):
           continue

diff --git a/tensorflow_datasets/audio/ljspeech.py b/tensorflow_datasets/audio/ljspeech.py
@@ -17,8 +17,8 @@
 
 import os
 
+from etils import epath
 import tensorflow as tf
-
 import tensorflow_datasets.public_api as tfds
 
 _CITATION = """\
@@ -81,7 +81,7 @@ def _split_generators(self, dl_manager):
   def _generate_examples(self, directory):
     """Yields examples."""
     metadata_path = os.path.join(directory, "LJSpeech-1.1", "metadata.csv")
-    with tf.io.gfile.GFile(metadata_path) as f:
+    with epath.Path(metadata_path).open() as f:
       for line in f:
         line = line.strip()
         key, transcript, transcript_normalized = line.split("|")

diff --git a/tensorflow_datasets/audio/tedlium.py b/tensorflow_datasets/audio/tedlium.py
@@ -17,10 +17,10 @@
 
 import os
 import re
-import numpy as np
 
+from etils import epath
+import numpy as np
 import tensorflow as tf
-
 import tensorflow_datasets.public_api as tfds
 
 
@@ -210,7 +210,7 @@ def _generate_examples_from_stm_file(stm_path):
   """Generate examples from a TED-LIUM stm file."""
   stm_dir = os.path.dirname(stm_path)
   sph_dir = os.path.join(os.path.dirname(stm_dir), "sph")
-  with tf.io.gfile.GFile(stm_path) as f:
+  with epath.Path(stm_path).open() as f:
     for line in f:
       line = line.strip()
       fn, channel, speaker, start, end, label, transcript = line.split(" ", 6)

diff --git a/tensorflow_datasets/audio/userlibri_audio_data/userlibri_audio_data.py b/tensorflow_datasets/audio/userlibri_audio_data/userlibri_audio_data.py
@@ -18,6 +18,7 @@
 import csv
 import os
 
+from etils import epath
 import tensorflow as tf
 import tensorflow_datasets.public_api as tfds
 
@@ -51,7 +52,7 @@
 def read_metadata_file(path):
   """Reads the tab-separated metadata from the path."""
   metadata = {}
-  with tf.io.gfile.GFile(path) as f:
+  with epath.Path(path).open() as f:
     reader = csv.DictReader(f, delimiter="\t")
     for row in reader:
       # Collect metadata for each split_userID, for example

diff --git a/tensorflow_datasets/audio/voxceleb.py b/tensorflow_datasets/audio/voxceleb.py
@@ -17,6 +17,8 @@
 
 import collections
 import os
+
+from etils import epath
 import tensorflow as tf
 import tensorflow_datasets.public_api as tfds
 
@@ -120,7 +122,7 @@ def _generate_examples(self, extract_path, file_names):
   def _calculate_splits(self, iden_splits_path):
     """Read the train/dev/test splits from VoxCeleb's iden_split.txt file."""
     data_splits = collections.defaultdict(set)
-    with tf.io.gfile.GFile(iden_splits_path) as f:
+    with epath.Path(iden_splits_path).open() as f:
       for line in f:
         group, path = line.strip().split()
         split_name = {1: 'train', 2: 'validation', 3: 'test'}[int(group)]

diff --git a/tensorflow_datasets/core/as_dataframe.py b/tensorflow_datasets/core/as_dataframe.py
@@ -15,10 +15,10 @@
 
 """As dataframe util."""
 
+import dataclasses
 import typing
 from typing import Any, Callable, Dict, List, Optional, Tuple
 
-import dataclasses
 import numpy as np
 
 import tensorflow as tf

diff --git a/tensorflow_datasets/core/dataset_builders/conll/conll_dataset_builder.py b/tensorflow_datasets/core/dataset_builders/conll/conll_dataset_builder.py
@@ -35,7 +35,6 @@ def _split_generators(self, dl_manager: tfds.download.DownloadManager):
 from typing import List, Optional, OrderedDict, Sequence, Union
 
 from etils import epath
-import tensorflow as tf
 from tensorflow_datasets.core import dataset_builder
 from tensorflow_datasets.core import dataset_info
 from tensorflow_datasets.core import split_builder as split_builder_lib
@@ -139,7 +138,7 @@ def _generate_examples(
 
     example_id = 0
     for filepath in path:
-      with tf.io.gfile.GFile(filepath) as f:
+      with epath.Path(filepath).open() as f:
         for line in f:
           if line.startswith("-DOCSTART-") or line == "\n" or not line:
             if input_sequences["tokens"]:

diff --git a/tensorflow_datasets/core/dataset_builders/conll/conllu_dataset_builder.py b/tensorflow_datasets/core/dataset_builders/conll/conllu_dataset_builder.py
@@ -21,8 +21,6 @@
 from typing import Callable, List, Mapping, Optional, OrderedDict, Sequence, Union
 
 from etils import epath
-import tensorflow as tf
-
 from tensorflow_datasets.core import dataset_builder
 from tensorflow_datasets.core import dataset_info
 from tensorflow_datasets.core import lazy_imports_lib
@@ -209,7 +207,7 @@ def _generate_examples(
 
     example_id = 0
     for filepath in path:
-      with tf.io.gfile.GFile(filepath) as data_file:
+      with epath.Path(filepath).open() as data_file:
         annotated_sentences = list(conllu.parse_incr(data_file))
         for sentence in annotated_sentences:
           example = process_example_fn(

diff --git a/tensorflow_datasets/core/features/video_feature_test.py b/tensorflow_datasets/core/features/video_feature_test.py
@@ -19,6 +19,7 @@
 import os
 import pathlib
 
+from etils import epath
 import numpy as np
 import tensorflow as tf
 from tensorflow_datasets import testing
@@ -78,7 +79,7 @@ def test_video_concatenated_frames(self):
   def test_video_ffmpeg(self):
     video_path = os.path.join(self._test_data_path, 'video.mkv')
     video_json_path = os.path.join(self._test_data_path, 'video.json')
-    with tf.io.gfile.GFile(video_json_path) as fp:
+    with epath.Path(video_json_path).open() as fp:
       video_array = np.asarray(json.load(fp))
 
     self.assertFeature(

diff --git a/tensorflow_datasets/core/folder_dataset/translate_folder.py b/tensorflow_datasets/core/folder_dataset/translate_folder.py
@@ -19,6 +19,7 @@
 import os
 from typing import Dict, List, Tuple
 
+from etils import epath
 import tensorflow as tf
 from tensorflow_datasets.core import dataset_builder
 from tensorflow_datasets.core import dataset_info
@@ -154,6 +155,6 @@ def _get_split_language_examples(
 
 
 def _list_examples(file: str) -> List[str]:
-  with tf.io.gfile.GFile(file) as f:
+  with epath.Path(file).open() as f:
     sentences = f.read().splitlines()
   return sentences
diff --git a/tensorflow_datasets/image/arc.py b/tensorflow_datasets/image/arc.py
@@ -18,6 +18,7 @@
 import json
 import os
 
+from etils import epath
 import tensorflow as tf
 import tensorflow_datasets.public_api as tfds
 
@@ -140,7 +141,7 @@ def _generate_examples(self, directory):
     """Yields (key, example) tuples from the dataset."""
     json_filepaths = tf.io.gfile.glob(os.path.join(directory, "*.json"))
     for json_path in sorted(json_filepaths):
-      with tf.io.gfile.GFile(json_path) as f:
+      with epath.Path(json_path).open() as f:
         task = json.load(f)
       task_id = os.path.basename(json_path)[:-len(".json")]
       yield task_id, {

diff --git a/tensorflow_datasets/image/bccd/bccd.py b/tensorflow_datasets/image/bccd/bccd.py
@@ -19,6 +19,7 @@
 import os
 import xml.etree.ElementTree as ET
 
+from etils import epath
 import tensorflow as tf
 import tensorflow_datasets.public_api as tfds
 
@@ -103,7 +104,7 @@ def _split_generators(self, dl_manager):
     for root, _, filename in tf.io.gfile.walk(splits_dir_path):
       for fname in filename:
         full_file_name = os.path.join(root, fname)
-        with tf.io.gfile.GFile(full_file_name) as f:
+        with epath.Path(full_file_name).open() as f:
           for line in f:
             if fname == "train.txt":
               train_list.append(line)
@@ -178,7 +179,7 @@ def get_label(attributes, n):
 
     for fname in file_names:
       annotation_file_path = get_annotations_file_path(fname)
-      with tf.io.gfile.GFile(annotation_file_path) as f:
+      with epath.Path(annotation_file_path).open() as f:
         xml_list[fname] = ET.parse(f)
       attributes = collections.defaultdict(list)
       for element in xml_list[fname].iter():

diff --git a/tensorflow_datasets/image/celeba.py b/tensorflow_datasets/image/celeba.py
@@ -23,8 +23,8 @@
 
 import os
 
+from etils import epath
 import tensorflow as tf
-
 import tensorflow_datasets.public_api as tfds
 
 IMG_ALIGNED_DATA = ("https://drive.google.com/uc?export=download&"
@@ -174,7 +174,7 @@ def _process_celeba_config_file(self, file_path):
       values: map from the file name to the list of attribute values for
               this file.
     """
-    with tf.io.gfile.GFile(file_path) as f:
+    with epath.Path(file_path).open() as f:
       data_raw = f.read()
     lines = data_raw.split("\n")
 
@@ -194,7 +194,7 @@ def _generate_examples(self, file_id, downloaded_dirs, downloaded_images):
     landmarks_path = downloaded_dirs["landmarks_celeba"]
     attr_path = downloaded_dirs["list_attr_celeba"]
 
-    with tf.io.gfile.GFile(img_list_path) as f:
+    with epath.Path(img_list_path).open() as f:
       files = [
           line.split()[0]
           for line in f.readlines()

diff --git a/tensorflow_datasets/image/clevr.py b/tensorflow_datasets/image/clevr.py
@@ -19,6 +19,7 @@
 import json
 import os
 
+from etils import epath
 import tensorflow as tf
 import tensorflow_datasets.public_api as tfds
 
@@ -133,7 +134,7 @@ def _generate_examples(self, images_dir_path, question_file,
         for filename in tf.io.gfile.listdir(images_dir_path)
     ])
 
-    with tf.io.gfile.GFile(question_file) as f:
+    with epath.Path(question_file).open() as f:
       questions_json = json.load(f)
     questions = collections.defaultdict(list)
     for q in questions_json["questions"]:
@@ -143,7 +144,7 @@ def _generate_examples(self, images_dir_path, question_file,
       })
 
     if tf.io.gfile.exists(scenes_description_file):
-      with tf.io.gfile.GFile(scenes_description_file) as f:
+      with epath.Path(scenes_description_file).open() as f:
         scenes_json = json.load(f)
     else:
       # if annotation file does not exist, we create empty annotations

diff --git a/tensorflow_datasets/image/duke_ultrasound.py b/tensorflow_datasets/image/duke_ultrasound.py
@@ -17,6 +17,8 @@
 
 import csv
 import os
+
+from etils import epath
 import numpy as np
 import tensorflow as tf
 import tensorflow_datasets.public_api as tfds
@@ -136,7 +138,7 @@ def _split_generators(self, dl_manager):
     return splits
 
   def _generate_examples(self, datapath, csvpath):
-    with tf.io.gfile.GFile(csvpath) as f:
+    with epath.Path(csvpath).open() as f:
       reader = csv.DictReader(f)
       for row in reader:
         data_key = 'mark_data' if row['target'] == 'mark' else 'phantom_data'

diff --git a/tensorflow_datasets/image_classification/caltech_birds.py b/tensorflow_datasets/image_classification/caltech_birds.py
@@ -19,6 +19,7 @@
 import os
 import re
 
+from etils import epath
 import numpy as np
 import tensorflow as tf
 import tensorflow_datasets.public_api as tfds
@@ -100,10 +101,10 @@ def _split_generators(self, dl_manager):
     train_path = os.path.join(extracted_path[0], "lists/train.txt")
     test_path = os.path.join(extracted_path[0], "lists/test.txt")
 
-    with tf.io.gfile.GFile(train_path) as f:
+    with epath.Path(train_path).open() as f:
       train_list = f.read().splitlines()
 
-    with tf.io.gfile.GFile(test_path) as f:
+    with epath.Path(test_path).open() as f:
       test_list = f.read().splitlines()
 
     attributes = collections.defaultdict(list)

diff --git a/tensorflow_datasets/image_classification/chexpert.py b/tensorflow_datasets/image_classification/chexpert.py
@@ -19,6 +19,7 @@
 import csv
 import os
 
+from etils import epath
 import tensorflow as tf
 import tensorflow_datasets.public_api as tfds
 
@@ -131,7 +132,7 @@ def _split_generators(self, dl_manager):
 
   def _generate_examples(self, imgs_path, csv_path):
     """Yields examples."""
-    with tf.io.gfile.GFile(csv_path) as csv_f:
+    with epath.Path(csv_path).open() as csv_f:
       reader = csv.DictReader(csv_f)
       # Get keys for each label from csv
       label_keys = reader.fieldnames[5:]

diff --git a/tensorflow_datasets/image_classification/cifar.py b/tensorflow_datasets/image_classification/cifar.py
@@ -18,6 +18,7 @@
 import collections
 import os
 
+from etils import epath
 import numpy as np
 import tensorflow as tf
 import tensorflow_datasets.public_api as tfds
@@ -83,7 +84,7 @@ def _split_generators(self, dl_manager):
     for label_key, label_file in zip(cifar_info.label_keys,
                                      cifar_info.label_files):
       labels_path = os.path.join(cifar_path, label_file)
-      with tf.io.gfile.GFile(labels_path) as label_f:
+      with epath.Path(labels_path).open() as label_f:
         label_names = [name for name in label_f.read().split("\n") if name]
       self.info.features[label_key].names = label_names
 

diff --git a/tensorflow_datasets/image_classification/cifar100_n/cifar100_n.py b/tensorflow_datasets/image_classification/cifar100_n/cifar100_n.py
@@ -18,6 +18,7 @@
 import collections
 import os
 
+from etils import epath
 import numpy as np
 import tensorflow as tf
 import tensorflow_datasets.public_api as tfds
@@ -132,7 +133,7 @@ def _split_generators(self, dl_manager):
     for label_key, label_file in zip(cifar_info.label_keys,
                                      cifar_info.label_files):
       labels_path = os.path.join(cifar_path, label_file)
-      with tf.io.gfile.GFile(labels_path) as label_f:
+      with epath.Path(labels_path).open() as label_f:
         label_names = [name for name in label_f.read().split('\n') if name]
       self.info.features[label_key].names = label_names