tensorflow · Eshan-Agarwal · Mar 15, 2020 · Mar 15, 2020 · Mar 15, 2020 · Mar 15, 2020
diff --git a/tensorflow_datasets/summarization/__init__.py b/tensorflow_datasets/summarization/__init__.py
@@ -14,8 +14,11 @@
 # limitations under the License.
 
 # Lint as: python3
+
 """Text datasets."""
 
+# pylint: disable=c0103
+
 from tensorflow_datasets.summarization.aeslc import Aeslc
 from tensorflow_datasets.summarization.big_patent import BigPatent
 from tensorflow_datasets.summarization.billsum import Billsum

diff --git a/tensorflow_datasets/summarization/cnn_dailymail.py b/tensorflow_datasets/summarization/cnn_dailymail.py
@@ -14,10 +14,9 @@
 # limitations under the License.
 
 # Lint as: python3
+
 """CNN/DailyMail Summarization dataset, non-anonymized version."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+
 import hashlib
 import os
 from absl import logging
@@ -78,14 +77,6 @@
 
 _HIGHLIGHTS = 'highlights'
 _ARTICLE = 'article'
-_SUPPORTED_VERSIONS = [
-    # Same data as 0.0.2
-    tfds.core.Version('1.0.0',
-                      'New split API (https://tensorflow.org/datasets/splits)'),
-    # Having the model predict newline separators makes it easier to evaluate
-    # using summary-level ROUGE.
-    tfds.core.Version('2.0.0', 'Separate target sentences with newline.')
-]
 
 # Using cased version.
 _DEFAULT_VERSION = tfds.core.Version('3.0.0', 'Using cased version.')
@@ -212,12 +203,11 @@ def fix_missing_period(line):
   for line in lines:
     if not line:
       continue  # empty line
-    elif line.startswith('@highlight'):
+    if line.startswith('@highlight'):
       next_is_highlight = True
-    elif next_is_highlight:
+    if next_is_highlight:
       highlights.append(line)
-    else:
-      article_lines.append(line)
+    article_lines.append(line)
 
   # Make article into a single string
   article = ' '.join(article_lines)
@@ -277,6 +267,7 @@ def _vocab_text_gen(self, paths):
       yield ' '.join([ex[_ARTICLE], ex[_HIGHLIGHTS]])
 
   def _split_generators(self, dl_manager):
+    """Generate Splits."""
     dl_paths = dl_manager.download_and_extract(_DL_URLS)
     train_files = _subset_filenames(dl_paths, tfds.Split.TRAIN)
     # Generate shared vocabulary

diff --git a/tensorflow_datasets/summarization/cnn_dailymail_test.py b/tensorflow_datasets/summarization/cnn_dailymail_test.py
@@ -14,10 +14,9 @@
 # limitations under the License.
 
 # Lint as: python3
+
 """Tests for tensorflow_datasets.text.cnn_dailymail."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+
 
 import tempfile
 
@@ -43,6 +42,7 @@
 
 
 class CnnDailymailTest(testing.DatasetBuilderTestCase):
+  """ Create testing.DatasetBuilderTestCase for Test"""
   DATASET_CLASS = cnn_dailymail.CnnDailymail
   SPLITS = {'train': 3, 'validation': 2, 'test': 2}
   DL_EXTRACT_RESULT = {
@@ -54,10 +54,11 @@ class CnnDailymailTest(testing.DatasetBuilderTestCase):
   }
 
   def test_get_art_abs(self):
+    """Generate some article text"""
     with tempfile.NamedTemporaryFile(delete=True) as f:
       f.write(_STORY_FILE)
       f.flush()
-      article, abstract = cnn_dailymail._get_art_abs(f.name,
+      article, abstract = cnn_dailymail._get_art_abs(f.name,  # pylint: disable=protected-access
                                                      tfds.core.Version('1.0.0'))
       self.assertEqual('Some article. This is some article text.', article)
       # This is a bit weird, but the original code at
@@ -66,7 +67,7 @@ def test_get_art_abs(self):
       self.assertEqual('highlight text . Highlight two . highlight Three .',
                        abstract)
 
-      article, abstract = cnn_dailymail._get_art_abs(f.name,
+      article, abstract = cnn_dailymail._get_art_abs(f.name,  # pylint: disable=protected-access
                                                      tfds.core.Version('2.0.0'))
       self.assertEqual('highlight text .\nHighlight two .\nhighlight Three .',
                        abstract)

diff --git a/tensorflow_datasets/summarization/wikihow.py b/tensorflow_datasets/summarization/wikihow.py
@@ -14,11 +14,9 @@
 # limitations under the License.
 
 # Lint as: python3
+
 """WikiHow Datasets."""
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
 
 import csv
 import os
@@ -170,7 +168,7 @@ def _generate_examples(self, path=None, title_set=None):
     """Yields examples."""
     with tf.io.gfile.GFile(path) as f:
       reader = csv.reader(f)
-      headers = next(reader)
+      headers = next(reader)  # pylint: disable=stop-iteration-return
       if self.builder_config.name == "all" and headers != [
           "headline", "title", "text"
       ]:
@@ -210,5 +208,4 @@ def _filter_and_clean(abstract, article):
     # remove extra commas in articles
     article = re.sub(r"[.]+[\n]+[,]", ".\n", article)
     return abstract, article
-  else:
-    return "", ""
+  return "", ""