Logo Search packages:      
Sourcecode: webcheck version File versions  Download package

def crawler::Link::fetch (   self  ) 

Attempt to fetch the url (if isyanked is not True) and fill in link
attributes (based on isinternal).

Definition at line 508 of file crawler.py.

00508                    :
        """Attempt to fetch the url (if isyanked is not True) and fill in link
        attributes (based on isinternal)."""
        # fully ignore links that should not be feteched
        if self.isyanked:
            debugio.info('  %s' % self.url)
            debugio.info('    ' + self.isyanked)
            return
        # see if we can import the proper module for this scheme
        schememodule = schemes.get_schememodule(self.scheme)
        if schememodule is None:
            self.isyanked = 'unsupported scheme (' + self.scheme + ')'
            self._ischanged = True
            debugio.info('  %s' % self.url)
            debugio.info('    ' + self.isyanked)
            return
        debugio.info('  %s' % self.url)
        content = schememodule.fetch(self, parsers.get_mimetypes())
        self.isfetched = True
        self._ischanged = True
        # skip parsing of content if we were returned nothing
        if content is None:
            return
        # find a parser for the content-type
        parsermodule = parsers.get_parsermodule(self.mimetype)
        if parsermodule is None:
            debugio.debug('crawler.Link.fetch(): unsupported content-type: %s' % self.mimetype)
            return
        # parse the content
        debugio.debug('crawler.Link.fetch(): parsing using %s' % parsermodule.__name__)
        parsermodule.parse(content, self)

    def follow_link(self, visited=set()):


Generated by  Doxygen 1.6.0   Back to index