From 558fda89480519de817662e373fbcb6731f3e180 Mon Sep 17 00:00:00 2001
From: cherukuri12 <35265453+cherukuri12@users.noreply.github.com>
Date: Tue, 1 Oct 2019 16:36:28 +0530
Subject: [PATCH 1/2] created webscarping.py for html parsing

---
 scripts/webscraping.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 scripts/webscraping.py

diff --git a/scripts/webscraping.py b/scripts/webscraping.py
new file mode 100644
index 0000000..96f750f
--- /dev/null
+++ b/scripts/webscraping.py
@@ -0,0 +1,22 @@
+import requests
+from bs4 import BeautifulSoup
+
+def cars_brand_links():
+    url = 'https://www.carsprite.com/en/car-prices'
+    source_code = requests.get(url)
+    plain_text = source_code.text
+    soup = BeautifulSoup(plain_text)
+    for link in soup.findAll("a"):
+        href = link.get('href')
+        if "car-prices" not in href:
+           pass
+        else:
+            data = href
+            i = 9
+            while i < 49:
+                print(data[i])
+                i += 1
+
+
+
+cars_brand_links()

From 441058cad1a70c11a35449ed652d04dbd658c53a Mon Sep 17 00:00:00 2001
From: cherukuri12 <35265453+cherukuri12@users.noreply.github.com>
Date: Tue, 1 Oct 2019 16:39:14 +0530
Subject: [PATCH 2/2] Update webscraping.py

---
 scripts/webscraping.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/scripts/webscraping.py b/scripts/webscraping.py
index 96f750f..2dfd00c 100644
--- a/scripts/webscraping.py
+++ b/scripts/webscraping.py
@@ -2,21 +2,33 @@
 from bs4 import BeautifulSoup
 
 def cars_brand_links():
-    url = 'https://www.carsprite.com/en/car-prices'
+    url = 'https://www.carsprite.com/en/car-prices/'
     source_code = requests.get(url)
     plain_text = source_code.text
     soup = BeautifulSoup(plain_text)
     for link in soup.findAll("a"):
-        href = link.get('href')
-        if "car-prices" not in href:
+        href = "https://www.carsprite.com/en/" + link.get('href')
+        if "car-prices/" not in href:
            pass
         else:
             data = href
-            i = 9
-            while i < 49:
-                print(data[i])
-                i += 1
+            get_single_item_data(data)
+def get_single_item_data(brand_url):
+    source_code = requests.get(brand_url)
+    plain_text = source_code.text
+    soup = BeautifulSoup(plain_text)
+    for link in soup.findAll("a"):
+        href1 = link.get('href')
+        if "/en/" not in href1:
+           data1 = href1
+           if "https" not in data1:
+               data2 = data1
+               if "/car-prices/" not in data2:
+                   data_final = 'https://www.carsprite.com/en/car-prices/' + data2
+                   print(data_final)
 
+        else:
+            pass
 
 
 cars_brand_links()