You can download this code by clicking the button below.
This code is now available for download.
This function creates a Scrapy crawler to scrape the content of a specified URL and extract all the titles.
Technology Stack : Scrapy, Selector
Code Type : Scrapy Crawler
Code Difficulty : Intermediate
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy import Selector
def crawl_website(url):
process = CrawlerProcess(settings={
'USER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
})
class MySpider(scrapy.Spider):
name = 'my_spider'
start_urls = [url]
def parse(self, response):
sel = Selector(response)
titles = sel.css('h1::text').getall()
return {'titles': titles}
process.crawl(MySpider)
process.start()