Skip to content

Updating tensorflow_datasets/audio to remove pylint errors #1646

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tensorflow_datasets/audio/groove.py
Original file line number Diff line number Diff line change
@@ -117,6 +117,8 @@ class Groove(tfds.core.GeneratorBasedBuilder):
]

def _info(self):
"""Returns Dataset info."""

features_dict = {
"id": tf.string,
"drummer":
@@ -164,6 +166,8 @@ def _split_generators(self, dl_manager):
for split, split_rows in rows.items()]

def _generate_examples(self, rows, data_dir):
"""Yields examples."""

split_bars = self._builder_config.split_bars
for row in rows:
split_genre = row["style"].split("/")
3 changes: 3 additions & 0 deletions tensorflow_datasets/audio/librispeech.py
Original file line number Diff line number Diff line change
@@ -161,6 +161,7 @@ def _populate_metadata(self, dirs):
])

def _read_metadata_file(self, path, field_names):
"""Returns metadata in standard format."""
metadata = {}
with tf.io.gfile.GFile(path) as f:
for line in f:
@@ -173,6 +174,8 @@ def _read_metadata_file(self, path, field_names):
return metadata

def _split_generators(self, dl_manager):
"""Returns splits."""

extracted_dirs = dl_manager.download_and_extract(_DL_URLS)
# Generate vocabulary from training data if SubwordTextEncoder configured.
all_train_dirs = [
2 changes: 2 additions & 0 deletions tensorflow_datasets/audio/librispeech_test.py
Original file line number Diff line number Diff line change
@@ -25,6 +25,8 @@


class LibrispeechTest(testing.DatasetBuilderTestCase):
"""Tests for librispeech dataset module."""

DATASET_CLASS = librispeech.Librispeech
BUILDER_CONFIG_NAMES_TO_TEST = ["plain_text", "subwords8k"]
SPLITS = {
1 change: 1 addition & 0 deletions tensorflow_datasets/audio/libritts.py
Original file line number Diff line number Diff line change
@@ -92,6 +92,7 @@ def _info(self):
)

def _populate_metadata(self, archive_paths):
"""Generate metadata of LibriTTS dataset."""
# All archives contain the same metadata.
archive_path = list(archive_paths.values())[0]

2 changes: 2 additions & 0 deletions tensorflow_datasets/audio/libritts_test.py
Original file line number Diff line number Diff line change
@@ -25,6 +25,8 @@


class LibriTTSTest(testing.DatasetBuilderTestCase):
"""Tests for LibriTTS dataset module."""

DATASET_CLASS = libritts.Libritts
SPLITS = {
"train_clean100": 2,
1 change: 1 addition & 0 deletions tensorflow_datasets/audio/nsynth.py
Original file line number Diff line number Diff line change
@@ -142,6 +142,7 @@ class Nsynth(tfds.core.BeamBasedBuilder):
]

def _info(self):
"""Returns Dataset info."""
features = {
"id":
tf.string,
2 changes: 1 addition & 1 deletion tensorflow_datasets/audio/nsynth_test.py
Original file line number Diff line number Diff line change
@@ -22,7 +22,7 @@
class NsynthFullTest(tfds_test.DatasetBuilderTestCase):
DATASET_CLASS = nsynth.Nsynth
# Make test run faster by using fewer output shards.
nsynth._SPLIT_SHARDS = {"train": 1, "valid": 1, "test": 1,}
nsynth._SPLIT_SHARDS = {"train": 1, "valid": 1, "test": 1,} # pylint: disable = protected-access
BUILDER_CONFIG_NAMES_TO_TEST = ["full"]
SPLITS = {"train": 3, "test": 3, "valid": 3}
DL_EXTRACT_RESULT = {
6 changes: 4 additions & 2 deletions tensorflow_datasets/audio/speech_commands.py
Original file line number Diff line number Diff line change
@@ -124,7 +124,7 @@ def _generate_examples(self, archive, file_list):
example_id = '{}_{}'.format(word, wavname)
if word in WORDS:
label = word
elif word == SILENCE or word == BACKGROUND_NOISE:
elif word in (SILENCE, BACKGROUND_NOISE):
# The main tar file already contains all of the test files, except for
# the silence ones. In fact it does not contain silence files at all.
# So for the test set we take the silence files from the test tar file,
@@ -161,10 +161,11 @@ def _generate_examples(self, archive, file_list):
label,
}
yield example_id, example
except lazy_imports_lib.lazy_imports.pydub.exceptions.CouldntDecodeError:
except lazy_imports_lib.lazy_imports.pydub.exceptions.CouldntDecodeError: # pylint: disable=line-too-long
pass

def _split_archive(self, train_archive):
"""Returns training and validation set paths."""
train_paths = []
for path, file_obj in train_archive:
if 'testing_list.txt' in path:
@@ -186,3 +187,4 @@ def _split_archive(self, train_archive):
set(train_paths) - set(validation_paths) - set(train_test_paths))

return train_paths, validation_paths