Django & Celery: нет такой таблицы
Я пытаюсь создать пару задач с Celery, которые отправляют данные в модель Django, у меня все работает, но по какой-то причине задачи Celery не могут увидеть таблицу, хотя она существует в БД и Django может отправлять данные в нее.
Это происходит с PostgreSQL, это работает при использовании SQLite3.
Сталкивался ли кто-нибудь с подобной проблемой и как вы смогли ее решить?
tasks.py
--------
# tasks
from __future__ import absolute_import, unicode_literals
from celery import Celery
from celery import app, shared_task
# scraping
import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime
import lxml
from rss_feed_scraper.models import PressReleases
# logging
from celery.utils.log import get_task_logger
logger = get_task_logger(__name__)
@shared_task(serializer='json')
def save_function(article_list):
for article in article_list:
try:
PressReleases.objects.create(
title=article['title'],
description=article['description'],
link=article['link'],
image_url=article['image_url'],
published=article['published'],
source=article['source']
)
except Exception as e:
print('failed at inserting article')
print(e)
break
@shared_task
def prnewswire_rss():
article_list = []
try:
print('Starting the scraping tool')
r = requests.get('https://www.prnewswire.com/rss/all-news-releases-from-PR-newswire-news.rss')
soup = BeautifulSoup(r.content, features='lxml')
articles = soup.findAll('item')
for a in articles:
title = a.find('title').text
description = a.find('description').text
# Get Link
link = str(a)
i = link.find("<link/>")
j = link.find("<guid")
media_content = a.find('media:content')
image_url = None
if media_content:
image_url = media_content['url']
published_wrong = a.find('pubdate').text
published = datetime.strptime(published_wrong, '%a, %d %b %Y %H:%M:%S %z')
article = {
'title': title,
'description': description,
'link': link[i + 7:j],
'image_url': image_url,
'published': published,
'source': 'PR News Wire'
}
article_list.append(article)
# print(article_list)
print('Finished scraping the articles')
# Save to DB
return save_function(article_list)
except Exception as e:
print('The scraping job failed. See exception: ')
print(e)
celery.py
---------
from __future__ import absolute_import
import os
from celery import Celery
from celery.schedules import crontab # scheduler
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
app = Celery('config')
app.conf.timezone = 'UTC'
app.config_from_object('django.conf:settings', namespace='CELERY')
app.autodiscover_tasks()
app.conf.beat_schedule = {
# executes every 1 minute
'scraping-task-one-min': {
'task': 'rss_feed_scraper.tasks.prnewswire_rss',
'schedule': crontab(),
},
# # executes every 15 minutes
# 'scraping-task-fifteen-min': {
# 'task': 'tasks.hackernews_rss',
# 'schedule': crontab(minute='*/15')
# },
# # executes daily at midnight
# 'scraping-task-midnight-daily': {
# 'task': 'tasks.hackernews_rss',
# 'schedule': crontab(minute=0, hour=0)
# }
}
models.py
---------
from django.db import models
# Create your models here.
class PressReleases(models.Model):
title = models.CharField(max_length=200, unique=True)
description = models.CharField(max_length=500)
link = models.CharField(max_length=2083, default="")
image_url = models.CharField(max_length=2083, default="", null=True)
published = models.DateTimeField()
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
source = models.CharField(max_length=30, default="", blank=True, null=True)