Logo Search packages:      
Sourcecode: webcheck version File versions

def crawler::Link::__init__ (   self,
  site,
  url 
)

Creates an instance of the Link class and initializes the
documented properties to some sensible value.

Definition at line 370 of file crawler.py.

00370                                  :
        """Creates an instance of the Link class and initializes the
        documented properties to some sensible value."""
        # store a reference to the site
        self.site = site
        # split the url in useful parts and store the parts
        (self.scheme, self.netloc, self.path, self.query) = \
          urlparse.urlsplit(url)[0:4]
        # store the url (without the fragment)
        url = urlparse.urlunsplit(
          (self.scheme, self.netloc, self.path, self.query, '') )
        self.url = url
        # ensure that we are not creating something that already exists
        assert not self.site.linkMap.has_key(url)
        # store the Link object in the linkMap
        self.site.linkMap[url] = self
        # deternmin the kind of url (internal or external)
        self.isinternal = self.site._is_internal(self)
        # check if the url is yanked
        self.isyanked = self.site._is_yanked(self)
        # initialize some properties
        self.parents = []
        self.children = []
        self.pagechildren = None
        self.embedded = []
        self.anchors = []
        self.reqanchors = {}
        self.depth = None
        self.isfetched = False
        self.ispage = False
        self.mtime = None
        self.size = None
        self.mimetype = None
        self.encoding = None
        self.title = None
        self.author = None
        self.status = None
        self.linkproblems = []
        self.pageproblems = []
        self.redirectdepth = 0
        self.redirectlist = None
        self._ischanged = False

    def __checkurl(self, url):


Generated by  Doxygen 1.6.0   Back to index