From 66f69d670db194e1d6b50c70a0940379ff963409 Mon Sep 17 00:00:00 2001 From: Joe Clinton <48254978+Joeclinton1@users.noreply.github.com> Date: Sun, 9 Feb 2020 16:53:23 +0100 Subject: [PATCH 1/2] Fixed None type By filtering out the image objects which had data[0]==2, I have removed the null items and it will no longer give the error: "TypeError: 'NoneType' object is not subscriptable". --- .../google_images_download.py | 47 +++++++++++-------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index fd89a3a9..5447de5f 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -271,15 +271,18 @@ def get_all_tabs(self,page): #Format the object in readable format def format_object(self,object): + data = object[1] + main = data[3] + info = data[9] formatted_object = {} - formatted_object['image_format'] = object['ity'] - formatted_object['image_height'] = object['oh'] - formatted_object['image_width'] = object['ow'] - formatted_object['image_link'] = object['ou'] - formatted_object['image_description'] = object['pt'] - formatted_object['image_host'] = object['rh'] - formatted_object['image_source'] = object['ru'] - formatted_object['image_thumbnail_url'] = object['tu'] + formatted_object['image_height'] = main[2] + formatted_object['image_width'] = main[1] + formatted_object['image_link'] = main[0] + formatted_object['image_format']=main[0][-1*(len(main[0])-main[0].rfind(".")-1):] + formatted_object['image_description'] = info['2003'][3] + formatted_object['image_host'] = info['183836587'][0] + formatted_object['image_source'] = info['2003'][2] + formatted_object['image_thumbnail_url'] = data[2][0] return formatted_object @@ -482,7 +485,7 @@ def create_directories(self,main_directory, dir_name,thumbnail,thumbnail_only): try: if not os.path.exists(main_directory): os.makedirs(main_directory) - time.sleep(0.2) + time.sleep(0.15) path = (dir_name) sub_directory = os.path.join(main_directory, path) if not os.path.exists(sub_directory): @@ -740,24 +743,30 @@ def _get_next_item(self,s): # Getting all links with the help of '_images_get_next_image' + def _get_image_objects(self,s): + start_line = s.find("AF_initDataCallback({key: \\'ds:2\\'") - 10 + start_object = s.find('[', start_line + 1) + end_object = s.find('', start_object + 1) - 4 + object_raw = str(s[start_object:end_object]) + object_decode = bytes(object_raw, "utf-8").decode("unicode_escape") + image_objects = json.loads(object_decode)[31][0][12][2] + image_objects = [x for x in image_objects if x[0]==1] + return image_objects + def _get_all_items(self,page,main_directory,dir_name,limit,arguments): items = [] abs_path = [] errorCount = 0 i = 0 count = 1 - while count < limit+1: - object, end_content = self._get_next_item(page) - if object == "no_links": + image_objects = self._get_image_objects(page) + while count < limit+1 and i Date: Wed, 25 Mar 2020 12:47:31 +0100 Subject: [PATCH 2/2] changed start_line (ds:2 to ds:1) --- google_images_download/google_images_download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google_images_download/google_images_download.py b/google_images_download/google_images_download.py index bbceba8c..345f4c87 100755 --- a/google_images_download/google_images_download.py +++ b/google_images_download/google_images_download.py @@ -745,7 +745,7 @@ def _get_next_item(self,s): # Getting all links with the help of '_images_get_next_image' def _get_image_objects(self,s): - start_line = s.find("AF_initDataCallback({key: \\'ds:2\\'") - 10 + start_line = s.find("AF_initDataCallback({key: \\'ds:1\\'") - 10 start_object = s.find('[', start_line + 1) end_object = s.find('', start_object + 1) - 4 object_raw = str(s[start_object:end_object])