Django Scrapy TypeError: RepoSpider.start_requests() missing 1 required positional argument: 'url'
I'm trying to build a webapp to fetch data from a repo. It's near completion, but I am facing this error currently.
The codes:
This is the spider code
import scrapy
from App.models import Repo
class RepoSpider(scrapy.Spider):
name = "RepoSpider"
allowed_domains = ["github.com"]
start_urls = []
def start_requests(self, url):
yield scrapy.Request(url)
def parse(self, response):
url = response.url
url_parts = url.split('/')
username = url_parts[-1]
repo = url_parts[-2]
description = response.css('.f4.my-3::text').get(default='').strip()
language = response.css('.color-fg-default.text-bold.mr-1::text').get(default='')
stars = response.css('a.Link.Link--muted strong::text').get(default='0').strip()
yield {
'username': username,
'repo': repo,
'description': description,
'top_language': language,
'stars': stars
}
scraped_repo = Repo(
url=url,
username=username,
description=description,
top_language=language,
stars=stars
)
scraped_repo.save()
django view
from django.shortcuts import render, redirect
from .models import Repo
from scrapy.crawler import CrawlerProcess
from .tester.tester.spiders.repo import RepoSpider
def index(request):
if request.method =='POST':
url = request.POST.get('url')
process = CrawlerProcess()
process.crawl(RepoSpider, url)
process.start()
return render(request, 'index.html')
Tried whatever I could get on, but running out of options now. This is a project I need to get done as soon as possible, it would mean a lot to me to have this working.
you can update the start_requests
like following.
def start_requests(self):
yield scrapy.Request(self.url)