How do I convert a complex Postgres SQL query into a Django queryset?
I have the following SQL query working:
SELECT
mrc.token_id,
ARRAY_AGG(mt.name) AS tracking
FROM
markets_rankscurrent mrc
LEFT JOIN (
SELECT
mtg.id,
mtg.index_id,
mtg.favorites_group_id,
mtg.name,
COALESCE(ufe.token_id, mte.token_id, mie.token_id) AS token_id
FROM
markets_trackinggroup mtg
LEFT JOIN users_favoritesentry ufe ON mtg.favorites_group_id = ufe.group_id
LEFT JOIN markets_trackingentry mte ON mtg.id = mte.group_id
LEFT JOIN markets_indexentry mie ON mtg.index_id = mie.index_id
) mt ON mrc.token_id = mt.token_id
GROUP BY
mrc.token_id;
Here are my models:
class Token(models.Model):
class RanksCurrent(models.Model):
token = models.ForeignKey(Token, on_delete=models.CASCADE, db_index=False)
class TrackingGroup(models.Model):
name = models.CharField(max_length=60, verbose_name='Name')
favorites_group = models.ForeignKey(FavoritesGroup, on_delete=models.CASCADE, related_name='tracking_groups', blank=True, null=True)
index = models.ForeignKey(Token, on_delete=models.CASCADE, related_name='tracking_groups', blank=True, null=True)
class TrackingEntry(models.Model):
group = models.ForeignKey(TrackingGroup, on_delete=models.CASCADE, related_name='tokens')
token = models.ForeignKey(Token, on_delete=models.CASCADE, related_name='tracking_entries')
class IndexEntry(models.Model):
index = models.ForeignKey(Token, on_delete=models.CASCADE, related_name='index_tokens')
token = models.ForeignKey(Token, on_delete=models.CASCADE, related_name='indices')
class FavoritesGroup(models.Model):
pass
class FavoritesEntry(models.Model):
group = models.ForeignKey(FavoritesGroup, on_delete=models.CASCADE, related_name='favorites_entries')
token = models.ForeignKey('markets.Token', on_delete=models.CASCADE, related_name='favorites_entries')
The TrackingGroup.index
foreign key will only be set to a Token
object that is also a foreign key in the IndexEntry
table.
My end goal is to be able to query the RanksCurrent
table and annotate a tracking_groups
column that contains a list of TrackingGroup
names where the Token
is a member. My attempts have made use of Subquery
and ArrayAgg
to try and do this, but if my subquery returns a list (like I want it to), it fails.
These types of methods have worked for getting the list of TrackingGroup names:
tracking_subquery = TrackingGroup.objects.filter(
Q(index__index_tokens__token=OuterRef('token_id')) |
Q(favorites_group__favorites_entries__token=OuterRef('token_id')) |
Q(tokens__token=OuterRef('token_id'))
).values('name')
tracking_subquery = TrackingGroup.objects.filter(
Q(favorites_group_id__in=FavoritesEntry.objects.filter(token_id=OuterRef(OuterRef('token_id'))).values('group_id')) |
Q(id__in=TrackingEntry.objects.filter(token_id=OuterRef(OuterRef('token_id'))).values('group_id')) |
Q(index_id__in=IndexEntry.objects.filter(token_id=OuterRef(OuterRef('token_id'))).values('index_id'))
).values('name')
However, when I attempt to annotate the main query, it fails:
RanksCurrent.objects.annotate(
tracking=ArrayAgg(Subquery(tracking_subquery))
)
I thought I would need to move the ArrayAgg
call inside the subquery, but that did not make a difference:
RanksCurrent.objects.annotate(
tracking=Subquery(
TrackingGroup.objects.filter(
Q(index__index_tokens__token=OuterRef('token_id')) |
Q(favorites_group__favorites_entries__token=OuterRef('token_id')) |
Q(tokens__token=OuterRef('token_id'))
).values('name').annotate(group_names=ArrayAgg('name')).values('group_names')))
As far as I can tell, the reason the SQL works is because the inner SELECT
statement returns a unique row for each token_id
, rather than a row for each TrackingGroup
.
My thought right now is to break this up into three separate queries, each targeting one of the *Entry tables, then either somehow combine the resulting lists into a single one and annotate the queryset with it, or just create three separate annotations. I would have already done this if not for getting the operation to work in SQL, so now I want to try and do the same in Django before giving up fully.
My current solution is to perform three separate queries and merge them in the serializer:
tracking_index_groups_subquery = Subquery(
queryset_filtered.filter(
token__indices__index__tracking_groups__isnull=False,
id=OuterRef('id')
).values('id').annotate(
index_tracking=ArrayAgg('token__indices__index__tracking_groups__name')
).values('index_tracking')[:1]
)
tracking_favorites_groups_subquery = Subquery(
queryset_filtered.filter(
token__favorites_entries__group__tracking_groups__isnull=False,
id=OuterRef('id')
).values('id').annotate(
favorites_tracking=ArrayAgg('token__favorites_entries__group__tracking_groups__name')
).values('favorites_tracking')[:1]
)
tracking_groups_subquery = Subquery(
queryset_filtered.filter(
token__tracking_entries__group__isnull=False,
id=OuterRef('id')
).values('id').annotate(
tracking=ArrayAgg('token__tracking_entries__group__name')
).values('tracking')[:1]
)
queryset_filtered = queryset_filtered.annotate(
index_tracking=tracking_index_groups_subquery,
favorites_tracking=tracking_favorites_groups_subquery,
tracking=tracking_groups_subquery
)
class RanksCurrentSerializer(serializers.ModelSerializer):
tracking = serializers.SerializerMethodField('get_tracking')
def get_tracking(self, obj):
tracking = []
if obj.index_tracking:
tracking.extend(obj.index_tracking)
if obj.favorites_tracking:
tracking.extend(obj.favorites_tracking)
if obj.tracking:
tracking.extend(obj.tracking)
return tracking