Just change your callback to parse_start_url
and override it:
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
class DownloadSpider(CrawlSpider):
name="downloader"
allowed_domains = ['bnt-chemicals.de']
start_urls = [
"http://www.bnt-chemicals.de",
]
rules = (
Rule(SgmlLinkExtractor(allow='prod'), callback='parse_start_url', follow=True),
)
fname = 0
def parse_start_url(self, response):
self.fname += 1
fname="%s.txt" % self.fname
with open(fname, 'w') as f:
f.write('%s, %s\n' % (response.url, response.meta.get('depth', 0)))
f.write('%s\n' % response.body)