Python WebScraping closes without finishing and without giving error









up vote
1
down vote

favorite
2












I'm making a simple WebScraping that download the image of the items of some champions of a site, I put a "for" with 5 characters and it only executes 2 of them and then closes without giving any error!



import bs4 as bs
import sys,os
import urllib.request
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl

class Page(QWebEnginePage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.html = ''
print("#1 __init__")
self.loadFinished.connect(self._on_load_finished)
self.load(QUrl(url))
self.app.exec_()

def _on_load_finished(self):
self.html = self.toHtml(self.Callable)
print('#2 On Load finished')

def Callable(self, html_str):
print("#3 Callablen")
self.html = html_str
self.app.quit()

def already_exist(image_name):
for _, _, folder in os.walk('Images'):
if image_name in folder:
return False
else:
return True

def ImageDownload(url):
image_name = url.split("/")
try:
if already_exist(image_name[-1]):
full_path = "Images/" + image_name[-1]
urllib.request.urlretrieve(url, full_path)
print("Download %s" % image_name)
else:
print("Image already Downloaded >: %s" % image_name[-1])
except:
print("Error Download")

def main():
champions = ['Amumu','Akali','Zed','Nunu'] #champions
for champ in champions:
try:
print("nDownloading Images >: %s"% champ)
data = Page('https://www.probuilds.net/champions/details/%s' % champ.strip())
soup = bs.BeautifulSoup(data.html, 'html.parser')
items = soup.find_all('div','class':'items')
for photos in items:
images = photos.find_all('img')
for image in images:
ImageDownload(image['src'])
except:
print("Shi...")

main()


i'm getting no error but the program only executes 2 times this is the problem, someone help me !!!










share|improve this question

























    up vote
    1
    down vote

    favorite
    2












    I'm making a simple WebScraping that download the image of the items of some champions of a site, I put a "for" with 5 characters and it only executes 2 of them and then closes without giving any error!



    import bs4 as bs
    import sys,os
    import urllib.request
    from PyQt5.QtWebEngineWidgets import QWebEnginePage
    from PyQt5.QtWidgets import QApplication
    from PyQt5.QtCore import QUrl

    class Page(QWebEnginePage):
    def __init__(self, url):
    self.app = QApplication(sys.argv)
    QWebEnginePage.__init__(self)
    self.html = ''
    print("#1 __init__")
    self.loadFinished.connect(self._on_load_finished)
    self.load(QUrl(url))
    self.app.exec_()

    def _on_load_finished(self):
    self.html = self.toHtml(self.Callable)
    print('#2 On Load finished')

    def Callable(self, html_str):
    print("#3 Callablen")
    self.html = html_str
    self.app.quit()

    def already_exist(image_name):
    for _, _, folder in os.walk('Images'):
    if image_name in folder:
    return False
    else:
    return True

    def ImageDownload(url):
    image_name = url.split("/")
    try:
    if already_exist(image_name[-1]):
    full_path = "Images/" + image_name[-1]
    urllib.request.urlretrieve(url, full_path)
    print("Download %s" % image_name)
    else:
    print("Image already Downloaded >: %s" % image_name[-1])
    except:
    print("Error Download")

    def main():
    champions = ['Amumu','Akali','Zed','Nunu'] #champions
    for champ in champions:
    try:
    print("nDownloading Images >: %s"% champ)
    data = Page('https://www.probuilds.net/champions/details/%s' % champ.strip())
    soup = bs.BeautifulSoup(data.html, 'html.parser')
    items = soup.find_all('div','class':'items')
    for photos in items:
    images = photos.find_all('img')
    for image in images:
    ImageDownload(image['src'])
    except:
    print("Shi...")

    main()


    i'm getting no error but the program only executes 2 times this is the problem, someone help me !!!










    share|improve this question























      up vote
      1
      down vote

      favorite
      2









      up vote
      1
      down vote

      favorite
      2






      2





      I'm making a simple WebScraping that download the image of the items of some champions of a site, I put a "for" with 5 characters and it only executes 2 of them and then closes without giving any error!



      import bs4 as bs
      import sys,os
      import urllib.request
      from PyQt5.QtWebEngineWidgets import QWebEnginePage
      from PyQt5.QtWidgets import QApplication
      from PyQt5.QtCore import QUrl

      class Page(QWebEnginePage):
      def __init__(self, url):
      self.app = QApplication(sys.argv)
      QWebEnginePage.__init__(self)
      self.html = ''
      print("#1 __init__")
      self.loadFinished.connect(self._on_load_finished)
      self.load(QUrl(url))
      self.app.exec_()

      def _on_load_finished(self):
      self.html = self.toHtml(self.Callable)
      print('#2 On Load finished')

      def Callable(self, html_str):
      print("#3 Callablen")
      self.html = html_str
      self.app.quit()

      def already_exist(image_name):
      for _, _, folder in os.walk('Images'):
      if image_name in folder:
      return False
      else:
      return True

      def ImageDownload(url):
      image_name = url.split("/")
      try:
      if already_exist(image_name[-1]):
      full_path = "Images/" + image_name[-1]
      urllib.request.urlretrieve(url, full_path)
      print("Download %s" % image_name)
      else:
      print("Image already Downloaded >: %s" % image_name[-1])
      except:
      print("Error Download")

      def main():
      champions = ['Amumu','Akali','Zed','Nunu'] #champions
      for champ in champions:
      try:
      print("nDownloading Images >: %s"% champ)
      data = Page('https://www.probuilds.net/champions/details/%s' % champ.strip())
      soup = bs.BeautifulSoup(data.html, 'html.parser')
      items = soup.find_all('div','class':'items')
      for photos in items:
      images = photos.find_all('img')
      for image in images:
      ImageDownload(image['src'])
      except:
      print("Shi...")

      main()


      i'm getting no error but the program only executes 2 times this is the problem, someone help me !!!










      share|improve this question













      I'm making a simple WebScraping that download the image of the items of some champions of a site, I put a "for" with 5 characters and it only executes 2 of them and then closes without giving any error!



      import bs4 as bs
      import sys,os
      import urllib.request
      from PyQt5.QtWebEngineWidgets import QWebEnginePage
      from PyQt5.QtWidgets import QApplication
      from PyQt5.QtCore import QUrl

      class Page(QWebEnginePage):
      def __init__(self, url):
      self.app = QApplication(sys.argv)
      QWebEnginePage.__init__(self)
      self.html = ''
      print("#1 __init__")
      self.loadFinished.connect(self._on_load_finished)
      self.load(QUrl(url))
      self.app.exec_()

      def _on_load_finished(self):
      self.html = self.toHtml(self.Callable)
      print('#2 On Load finished')

      def Callable(self, html_str):
      print("#3 Callablen")
      self.html = html_str
      self.app.quit()

      def already_exist(image_name):
      for _, _, folder in os.walk('Images'):
      if image_name in folder:
      return False
      else:
      return True

      def ImageDownload(url):
      image_name = url.split("/")
      try:
      if already_exist(image_name[-1]):
      full_path = "Images/" + image_name[-1]
      urllib.request.urlretrieve(url, full_path)
      print("Download %s" % image_name)
      else:
      print("Image already Downloaded >: %s" % image_name[-1])
      except:
      print("Error Download")

      def main():
      champions = ['Amumu','Akali','Zed','Nunu'] #champions
      for champ in champions:
      try:
      print("nDownloading Images >: %s"% champ)
      data = Page('https://www.probuilds.net/champions/details/%s' % champ.strip())
      soup = bs.BeautifulSoup(data.html, 'html.parser')
      items = soup.find_all('div','class':'items')
      for photos in items:
      images = photos.find_all('img')
      for image in images:
      ImageDownload(image['src'])
      except:
      print("Shi...")

      main()


      i'm getting no error but the program only executes 2 times this is the problem, someone help me !!!







      python web-scraping pyqt pyqt5






      share|improve this question













      share|improve this question











      share|improve this question




      share|improve this question










      asked Nov 10 at 0:40









      Tuti Tutors

      82




      82






















          1 Answer
          1






          active

          oldest

          votes

















          up vote
          0
          down vote



          accepted










          What it seems is that the QWebEnginePage does not close correctly, it is also advisable to reuse instead of creating another QWebEnginePage, so using an old answer as a basis I have implemented the following solution:



          import os
          import sys
          import bs4 as bs
          import urllib.request
          from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets

          class WebPage(QtWebEngineWidgets.QWebEnginePage):
          def __init__(self):
          super(WebPage, self).__init__()
          self.loadFinished.connect(self.handleLoadFinished)

          def start(self, urls):
          self._urls = iter(urls)
          self.fetchNext()

          def fetchNext(self):
          try:
          url = next(self._urls)
          except StopIteration:
          return False
          else:
          self.load(QtCore.QUrl(url))
          return True

          def processCurrentPage(self, html):
          self.process(self.url(), html)
          if not self.fetchNext():
          QtWidgets.qApp.quit()

          def handleLoadFinished(self):
          self.toHtml(self.processCurrentPage)

          def process(self, url, html):
          print('loaded: [%d chars] %s' % (len(html), url.toString()))

          class ScrapePage(WebPage):
          def __init__(self):
          super(ScrapePage, self).__init__()
          self.results = set()

          def process(self, url, html):
          soup = bs.BeautifulSoup(html, 'html.parser')
          items = soup.find_all('div','class':'items')
          for photos in items:
          images = photos.find_all('img')
          for image in images:
          self.results.add(image['src'])

          def already_exist(image_name):
          for _, _, folder in os.walk('Images'):
          if image_name in folder:
          return False
          else:
          return True

          def ImageDownload(url):
          image_name = url.split("/")
          try:
          if already_exist(image_name[-1]):
          full_path = "Images/" + image_name[-1]
          urllib.request.urlretrieve(url, full_path)
          print("Download %s" % image_name)
          else:
          print("Image already Downloaded >: %s" % image_name[-1])
          except:
          print("Error Download")

          if __name__ == '__main__':

          app = QtWidgets.QApplication(sys.argv)
          webpage = ScrapePage()

          champions = ['Amumu','Akali','Zed','Nunu']
          base_url = 'https://www.probuilds.net/champions/details/'

          urls =
          for champ in champions:
          url = QtCore.QUrl(base_url).resolved(QtCore.QUrl(champ))
          urls.append(url)
          webpage.start(urls)
          app.exec_()
          for url in webpage.results:
          ImageDownload(url)





          share|improve this answer




















          • Can you explain the code to me?
            – Tuti Tutors
            Nov 10 at 4:11










          • @TutiTutors Can you be specific? What part of the code do you not understand?
            – eyllanesc
            Nov 10 at 4:12











          • class "WebPage" !
            – Tuti Tutors
            Nov 10 at 4:51










          • @TutiTutors you have to be more specific, practically the complete solution is that class, but good in general the idea is to create an iterator with the links, so when you get the .html fetchNext() is called to get the new url using the iterator and load the page. If you have a specific question it would be great.
            – eyllanesc
            Nov 10 at 4:55










          • I made an edit in the post and it is now working, can you take a look at the code and say your opinion?
            – Tuti Tutors
            Nov 11 at 19:12










          Your Answer






          StackExchange.ifUsing("editor", function ()
          StackExchange.using("externalEditor", function ()
          StackExchange.using("snippets", function ()
          StackExchange.snippets.init();
          );
          );
          , "code-snippets");

          StackExchange.ready(function()
          var channelOptions =
          tags: "".split(" "),
          id: "1"
          ;
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function()
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled)
          StackExchange.using("snippets", function()
          createEditor();
          );

          else
          createEditor();

          );

          function createEditor()
          StackExchange.prepareEditor(
          heartbeatType: 'answer',
          convertImagesToLinks: true,
          noModals: true,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: 10,
          bindNavPrevention: true,
          postfix: "",
          imageUploader:
          brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
          contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
          allowUrls: true
          ,
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          );



          );













           

          draft saved


          draft discarded


















          StackExchange.ready(
          function ()
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53235004%2fpython-webscraping-closes-without-finishing-and-without-giving-error%23new-answer', 'question_page');

          );

          Post as a guest















          Required, but never shown

























          1 Answer
          1






          active

          oldest

          votes








          1 Answer
          1






          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes








          up vote
          0
          down vote



          accepted










          What it seems is that the QWebEnginePage does not close correctly, it is also advisable to reuse instead of creating another QWebEnginePage, so using an old answer as a basis I have implemented the following solution:



          import os
          import sys
          import bs4 as bs
          import urllib.request
          from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets

          class WebPage(QtWebEngineWidgets.QWebEnginePage):
          def __init__(self):
          super(WebPage, self).__init__()
          self.loadFinished.connect(self.handleLoadFinished)

          def start(self, urls):
          self._urls = iter(urls)
          self.fetchNext()

          def fetchNext(self):
          try:
          url = next(self._urls)
          except StopIteration:
          return False
          else:
          self.load(QtCore.QUrl(url))
          return True

          def processCurrentPage(self, html):
          self.process(self.url(), html)
          if not self.fetchNext():
          QtWidgets.qApp.quit()

          def handleLoadFinished(self):
          self.toHtml(self.processCurrentPage)

          def process(self, url, html):
          print('loaded: [%d chars] %s' % (len(html), url.toString()))

          class ScrapePage(WebPage):
          def __init__(self):
          super(ScrapePage, self).__init__()
          self.results = set()

          def process(self, url, html):
          soup = bs.BeautifulSoup(html, 'html.parser')
          items = soup.find_all('div','class':'items')
          for photos in items:
          images = photos.find_all('img')
          for image in images:
          self.results.add(image['src'])

          def already_exist(image_name):
          for _, _, folder in os.walk('Images'):
          if image_name in folder:
          return False
          else:
          return True

          def ImageDownload(url):
          image_name = url.split("/")
          try:
          if already_exist(image_name[-1]):
          full_path = "Images/" + image_name[-1]
          urllib.request.urlretrieve(url, full_path)
          print("Download %s" % image_name)
          else:
          print("Image already Downloaded >: %s" % image_name[-1])
          except:
          print("Error Download")

          if __name__ == '__main__':

          app = QtWidgets.QApplication(sys.argv)
          webpage = ScrapePage()

          champions = ['Amumu','Akali','Zed','Nunu']
          base_url = 'https://www.probuilds.net/champions/details/'

          urls =
          for champ in champions:
          url = QtCore.QUrl(base_url).resolved(QtCore.QUrl(champ))
          urls.append(url)
          webpage.start(urls)
          app.exec_()
          for url in webpage.results:
          ImageDownload(url)





          share|improve this answer




















          • Can you explain the code to me?
            – Tuti Tutors
            Nov 10 at 4:11










          • @TutiTutors Can you be specific? What part of the code do you not understand?
            – eyllanesc
            Nov 10 at 4:12











          • class "WebPage" !
            – Tuti Tutors
            Nov 10 at 4:51










          • @TutiTutors you have to be more specific, practically the complete solution is that class, but good in general the idea is to create an iterator with the links, so when you get the .html fetchNext() is called to get the new url using the iterator and load the page. If you have a specific question it would be great.
            – eyllanesc
            Nov 10 at 4:55










          • I made an edit in the post and it is now working, can you take a look at the code and say your opinion?
            – Tuti Tutors
            Nov 11 at 19:12














          up vote
          0
          down vote



          accepted










          What it seems is that the QWebEnginePage does not close correctly, it is also advisable to reuse instead of creating another QWebEnginePage, so using an old answer as a basis I have implemented the following solution:



          import os
          import sys
          import bs4 as bs
          import urllib.request
          from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets

          class WebPage(QtWebEngineWidgets.QWebEnginePage):
          def __init__(self):
          super(WebPage, self).__init__()
          self.loadFinished.connect(self.handleLoadFinished)

          def start(self, urls):
          self._urls = iter(urls)
          self.fetchNext()

          def fetchNext(self):
          try:
          url = next(self._urls)
          except StopIteration:
          return False
          else:
          self.load(QtCore.QUrl(url))
          return True

          def processCurrentPage(self, html):
          self.process(self.url(), html)
          if not self.fetchNext():
          QtWidgets.qApp.quit()

          def handleLoadFinished(self):
          self.toHtml(self.processCurrentPage)

          def process(self, url, html):
          print('loaded: [%d chars] %s' % (len(html), url.toString()))

          class ScrapePage(WebPage):
          def __init__(self):
          super(ScrapePage, self).__init__()
          self.results = set()

          def process(self, url, html):
          soup = bs.BeautifulSoup(html, 'html.parser')
          items = soup.find_all('div','class':'items')
          for photos in items:
          images = photos.find_all('img')
          for image in images:
          self.results.add(image['src'])

          def already_exist(image_name):
          for _, _, folder in os.walk('Images'):
          if image_name in folder:
          return False
          else:
          return True

          def ImageDownload(url):
          image_name = url.split("/")
          try:
          if already_exist(image_name[-1]):
          full_path = "Images/" + image_name[-1]
          urllib.request.urlretrieve(url, full_path)
          print("Download %s" % image_name)
          else:
          print("Image already Downloaded >: %s" % image_name[-1])
          except:
          print("Error Download")

          if __name__ == '__main__':

          app = QtWidgets.QApplication(sys.argv)
          webpage = ScrapePage()

          champions = ['Amumu','Akali','Zed','Nunu']
          base_url = 'https://www.probuilds.net/champions/details/'

          urls =
          for champ in champions:
          url = QtCore.QUrl(base_url).resolved(QtCore.QUrl(champ))
          urls.append(url)
          webpage.start(urls)
          app.exec_()
          for url in webpage.results:
          ImageDownload(url)





          share|improve this answer




















          • Can you explain the code to me?
            – Tuti Tutors
            Nov 10 at 4:11










          • @TutiTutors Can you be specific? What part of the code do you not understand?
            – eyllanesc
            Nov 10 at 4:12











          • class "WebPage" !
            – Tuti Tutors
            Nov 10 at 4:51










          • @TutiTutors you have to be more specific, practically the complete solution is that class, but good in general the idea is to create an iterator with the links, so when you get the .html fetchNext() is called to get the new url using the iterator and load the page. If you have a specific question it would be great.
            – eyllanesc
            Nov 10 at 4:55










          • I made an edit in the post and it is now working, can you take a look at the code and say your opinion?
            – Tuti Tutors
            Nov 11 at 19:12












          up vote
          0
          down vote



          accepted







          up vote
          0
          down vote



          accepted






          What it seems is that the QWebEnginePage does not close correctly, it is also advisable to reuse instead of creating another QWebEnginePage, so using an old answer as a basis I have implemented the following solution:



          import os
          import sys
          import bs4 as bs
          import urllib.request
          from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets

          class WebPage(QtWebEngineWidgets.QWebEnginePage):
          def __init__(self):
          super(WebPage, self).__init__()
          self.loadFinished.connect(self.handleLoadFinished)

          def start(self, urls):
          self._urls = iter(urls)
          self.fetchNext()

          def fetchNext(self):
          try:
          url = next(self._urls)
          except StopIteration:
          return False
          else:
          self.load(QtCore.QUrl(url))
          return True

          def processCurrentPage(self, html):
          self.process(self.url(), html)
          if not self.fetchNext():
          QtWidgets.qApp.quit()

          def handleLoadFinished(self):
          self.toHtml(self.processCurrentPage)

          def process(self, url, html):
          print('loaded: [%d chars] %s' % (len(html), url.toString()))

          class ScrapePage(WebPage):
          def __init__(self):
          super(ScrapePage, self).__init__()
          self.results = set()

          def process(self, url, html):
          soup = bs.BeautifulSoup(html, 'html.parser')
          items = soup.find_all('div','class':'items')
          for photos in items:
          images = photos.find_all('img')
          for image in images:
          self.results.add(image['src'])

          def already_exist(image_name):
          for _, _, folder in os.walk('Images'):
          if image_name in folder:
          return False
          else:
          return True

          def ImageDownload(url):
          image_name = url.split("/")
          try:
          if already_exist(image_name[-1]):
          full_path = "Images/" + image_name[-1]
          urllib.request.urlretrieve(url, full_path)
          print("Download %s" % image_name)
          else:
          print("Image already Downloaded >: %s" % image_name[-1])
          except:
          print("Error Download")

          if __name__ == '__main__':

          app = QtWidgets.QApplication(sys.argv)
          webpage = ScrapePage()

          champions = ['Amumu','Akali','Zed','Nunu']
          base_url = 'https://www.probuilds.net/champions/details/'

          urls =
          for champ in champions:
          url = QtCore.QUrl(base_url).resolved(QtCore.QUrl(champ))
          urls.append(url)
          webpage.start(urls)
          app.exec_()
          for url in webpage.results:
          ImageDownload(url)





          share|improve this answer












          What it seems is that the QWebEnginePage does not close correctly, it is also advisable to reuse instead of creating another QWebEnginePage, so using an old answer as a basis I have implemented the following solution:



          import os
          import sys
          import bs4 as bs
          import urllib.request
          from PyQt5 import QtCore, QtWidgets, QtWebEngineWidgets

          class WebPage(QtWebEngineWidgets.QWebEnginePage):
          def __init__(self):
          super(WebPage, self).__init__()
          self.loadFinished.connect(self.handleLoadFinished)

          def start(self, urls):
          self._urls = iter(urls)
          self.fetchNext()

          def fetchNext(self):
          try:
          url = next(self._urls)
          except StopIteration:
          return False
          else:
          self.load(QtCore.QUrl(url))
          return True

          def processCurrentPage(self, html):
          self.process(self.url(), html)
          if not self.fetchNext():
          QtWidgets.qApp.quit()

          def handleLoadFinished(self):
          self.toHtml(self.processCurrentPage)

          def process(self, url, html):
          print('loaded: [%d chars] %s' % (len(html), url.toString()))

          class ScrapePage(WebPage):
          def __init__(self):
          super(ScrapePage, self).__init__()
          self.results = set()

          def process(self, url, html):
          soup = bs.BeautifulSoup(html, 'html.parser')
          items = soup.find_all('div','class':'items')
          for photos in items:
          images = photos.find_all('img')
          for image in images:
          self.results.add(image['src'])

          def already_exist(image_name):
          for _, _, folder in os.walk('Images'):
          if image_name in folder:
          return False
          else:
          return True

          def ImageDownload(url):
          image_name = url.split("/")
          try:
          if already_exist(image_name[-1]):
          full_path = "Images/" + image_name[-1]
          urllib.request.urlretrieve(url, full_path)
          print("Download %s" % image_name)
          else:
          print("Image already Downloaded >: %s" % image_name[-1])
          except:
          print("Error Download")

          if __name__ == '__main__':

          app = QtWidgets.QApplication(sys.argv)
          webpage = ScrapePage()

          champions = ['Amumu','Akali','Zed','Nunu']
          base_url = 'https://www.probuilds.net/champions/details/'

          urls =
          for champ in champions:
          url = QtCore.QUrl(base_url).resolved(QtCore.QUrl(champ))
          urls.append(url)
          webpage.start(urls)
          app.exec_()
          for url in webpage.results:
          ImageDownload(url)






          share|improve this answer












          share|improve this answer



          share|improve this answer










          answered Nov 10 at 2:14









          eyllanesc

          69.3k93052




          69.3k93052











          • Can you explain the code to me?
            – Tuti Tutors
            Nov 10 at 4:11










          • @TutiTutors Can you be specific? What part of the code do you not understand?
            – eyllanesc
            Nov 10 at 4:12











          • class "WebPage" !
            – Tuti Tutors
            Nov 10 at 4:51










          • @TutiTutors you have to be more specific, practically the complete solution is that class, but good in general the idea is to create an iterator with the links, so when you get the .html fetchNext() is called to get the new url using the iterator and load the page. If you have a specific question it would be great.
            – eyllanesc
            Nov 10 at 4:55










          • I made an edit in the post and it is now working, can you take a look at the code and say your opinion?
            – Tuti Tutors
            Nov 11 at 19:12
















          • Can you explain the code to me?
            – Tuti Tutors
            Nov 10 at 4:11










          • @TutiTutors Can you be specific? What part of the code do you not understand?
            – eyllanesc
            Nov 10 at 4:12











          • class "WebPage" !
            – Tuti Tutors
            Nov 10 at 4:51










          • @TutiTutors you have to be more specific, practically the complete solution is that class, but good in general the idea is to create an iterator with the links, so when you get the .html fetchNext() is called to get the new url using the iterator and load the page. If you have a specific question it would be great.
            – eyllanesc
            Nov 10 at 4:55










          • I made an edit in the post and it is now working, can you take a look at the code and say your opinion?
            – Tuti Tutors
            Nov 11 at 19:12















          Can you explain the code to me?
          – Tuti Tutors
          Nov 10 at 4:11




          Can you explain the code to me?
          – Tuti Tutors
          Nov 10 at 4:11












          @TutiTutors Can you be specific? What part of the code do you not understand?
          – eyllanesc
          Nov 10 at 4:12





          @TutiTutors Can you be specific? What part of the code do you not understand?
          – eyllanesc
          Nov 10 at 4:12













          class "WebPage" !
          – Tuti Tutors
          Nov 10 at 4:51




          class "WebPage" !
          – Tuti Tutors
          Nov 10 at 4:51












          @TutiTutors you have to be more specific, practically the complete solution is that class, but good in general the idea is to create an iterator with the links, so when you get the .html fetchNext() is called to get the new url using the iterator and load the page. If you have a specific question it would be great.
          – eyllanesc
          Nov 10 at 4:55




          @TutiTutors you have to be more specific, practically the complete solution is that class, but good in general the idea is to create an iterator with the links, so when you get the .html fetchNext() is called to get the new url using the iterator and load the page. If you have a specific question it would be great.
          – eyllanesc
          Nov 10 at 4:55












          I made an edit in the post and it is now working, can you take a look at the code and say your opinion?
          – Tuti Tutors
          Nov 11 at 19:12




          I made an edit in the post and it is now working, can you take a look at the code and say your opinion?
          – Tuti Tutors
          Nov 11 at 19:12

















           

          draft saved


          draft discarded















































           


          draft saved


          draft discarded














          StackExchange.ready(
          function ()
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53235004%2fpython-webscraping-closes-without-finishing-and-without-giving-error%23new-answer', 'question_page');

          );

          Post as a guest















          Required, but never shown





















































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown

































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown







          Popular posts from this blog

          How to how show current date and time by default on contact form 7 in WordPress without taking input from user in datetimepicker

          Syphilis

          Darth Vader #20