tensorflow · ManishAradwad · Mar 15, 2020
diff --git a/tensorflow_datasets/audio/groove.py b/tensorflow_datasets/audio/groove.py
@@ -117,6 +117,8 @@ class Groove(tfds.core.GeneratorBasedBuilder):
   ]
 
   def _info(self):
+    """Returns Dataset info."""
+
     features_dict = {
         "id": tf.string,
         "drummer":
@@ -164,6 +166,8 @@ def _split_generators(self, dl_manager):
         for split, split_rows in rows.items()]
 
   def _generate_examples(self, rows, data_dir):
+    """Yields examples."""
+
     split_bars = self._builder_config.split_bars
     for row in rows:
       split_genre = row["style"].split("/")

diff --git a/tensorflow_datasets/audio/librispeech.py b/tensorflow_datasets/audio/librispeech.py
@@ -161,6 +161,7 @@ def _populate_metadata(self, dirs):
         ])
 
   def _read_metadata_file(self, path, field_names):
+    """Returns metadata in standard format."""
     metadata = {}
     with tf.io.gfile.GFile(path) as f:
       for line in f:
@@ -173,6 +174,8 @@ def _read_metadata_file(self, path, field_names):
     return metadata
 
   def _split_generators(self, dl_manager):
+    """Returns splits."""
+
     extracted_dirs = dl_manager.download_and_extract(_DL_URLS)
     # Generate vocabulary from training data if SubwordTextEncoder configured.
     all_train_dirs = [

diff --git a/tensorflow_datasets/audio/librispeech_test.py b/tensorflow_datasets/audio/librispeech_test.py
@@ -25,6 +25,8 @@
 
 
 class LibrispeechTest(testing.DatasetBuilderTestCase):
+  """Tests for librispeech dataset module."""
+
   DATASET_CLASS = librispeech.Librispeech
   BUILDER_CONFIG_NAMES_TO_TEST = ["plain_text", "subwords8k"]
   SPLITS = {

diff --git a/tensorflow_datasets/audio/libritts.py b/tensorflow_datasets/audio/libritts.py
@@ -92,6 +92,7 @@ def _info(self):
     )
 
   def _populate_metadata(self, archive_paths):
+    """Generate metadata of LibriTTS dataset."""
     # All archives contain the same metadata.
     archive_path = list(archive_paths.values())[0]
 

diff --git a/tensorflow_datasets/audio/libritts_test.py b/tensorflow_datasets/audio/libritts_test.py
@@ -25,6 +25,8 @@
 
 
 class LibriTTSTest(testing.DatasetBuilderTestCase):
+  """Tests for LibriTTS dataset module."""
+
   DATASET_CLASS = libritts.Libritts
   SPLITS = {
       "train_clean100": 2,

diff --git a/tensorflow_datasets/audio/nsynth.py b/tensorflow_datasets/audio/nsynth.py
@@ -142,6 +142,7 @@ class Nsynth(tfds.core.BeamBasedBuilder):
   ]
 
   def _info(self):
+    """Returns Dataset info."""
     features = {
         "id":
             tf.string,

diff --git a/tensorflow_datasets/audio/nsynth_test.py b/tensorflow_datasets/audio/nsynth_test.py
@@ -22,7 +22,7 @@
 class NsynthFullTest(tfds_test.DatasetBuilderTestCase):
   DATASET_CLASS = nsynth.Nsynth
   # Make test run faster by using fewer output shards.
-  nsynth._SPLIT_SHARDS = {"train": 1, "valid": 1, "test": 1,}
+  nsynth._SPLIT_SHARDS = {"train": 1, "valid": 1, "test": 1,} # pylint: disable = protected-access
   BUILDER_CONFIG_NAMES_TO_TEST = ["full"]
   SPLITS = {"train": 3, "test": 3, "valid": 3}
   DL_EXTRACT_RESULT = {

diff --git a/tensorflow_datasets/audio/speech_commands.py b/tensorflow_datasets/audio/speech_commands.py
@@ -124,7 +124,7 @@ def _generate_examples(self, archive, file_list):
       example_id = '{}_{}'.format(word, wavname)
       if word in WORDS:
         label = word
-      elif word == SILENCE or word == BACKGROUND_NOISE:
+      elif word in (SILENCE, BACKGROUND_NOISE):
         # The main tar file already contains all of the test files, except for
         # the silence ones. In fact it does not contain silence files at all.
         # So for the test set we take the silence files from the test tar file,
@@ -161,10 +161,11 @@ def _generate_examples(self, archive, file_list):
                   label,
           }
           yield example_id, example
-        except lazy_imports_lib.lazy_imports.pydub.exceptions.CouldntDecodeError:
+        except lazy_imports_lib.lazy_imports.pydub.exceptions.CouldntDecodeError: # pylint: disable=line-too-long
           pass
 
   def _split_archive(self, train_archive):
+    """Returns training and validation set paths."""
     train_paths = []
     for path, file_obj in train_archive:
       if 'testing_list.txt' in path:
@@ -186,3 +187,4 @@ def _split_archive(self, train_archive):
         set(train_paths) - set(validation_paths) - set(train_test_paths))
 
     return train_paths, validation_paths
+