Ejercicio web scraping webs correlativas

2023-04-12 22:36:48 +02:00
parent 57bf0e8777
commit c681de5c61
1 changed files with 20 additions and 28 deletions
--- a/dia_11/03_web_scraping_03.py
+++ b/dia_11/03_web_scraping_03.py
@@ -8,17 +8,12 @@ Con las librerias beautifulsoup4, lxml y requests
 import bs4
 import requests

+# Variables
 raiz = 'http://books.toscrape.com/catalogue/page-'
 extension = '.html'
-
-
-def comprobar_enlaces(http, ext):
-    """ Comprobar si el enlace existe """
-
-    # Variables
-    enlaces = []
 fin_url = True
 page = 0
+lista_titulos = []

 # Bucle para formar url y añadir a una lista
 while fin_url:
@@ -26,22 +21,19 @@ def comprobar_enlaces(http, ext):
    page += 1
    page = str(page)

-        enlace = http + page + ext
+    enlace = raiz + page + extension

    resultado = requests.get(enlace)
+    sopa = bs4.BeautifulSoup(resultado.text, 'lxml')

    if resultado:
-            enlaces.append(enlace)
        page = int(page)
-            # BORRAR CUANDO ESTE LISTO EL RESTO DEL DOCUMENTO
-            break
+
+        # Todos los títulos
+        titulos = sopa.select('.product_pod a')
+        for title in titulos:
+            if title.get('title') != None:
+                lista_titulos.append(title.get('title'))
+
    else:
        fin_url = False
-
-    return enlaces
-
-
-print(comprobar_enlaces(raiz, extension))
-
-
-# sopa = bs4.BeautifulSoup(resultado.text, 'lxml')