Aggregate a group by queryset in Ddjango?
I'm working with time series data which are represented using this model:
class Price:
timestamp = models.IntegerField()
price = models.FloatField()
Assuming timestamp has 1 min interval data, this is how I would resample it to 1 hr:
queryset = (
Price.objects.annotate(timestamp_agg=Floor(F('timestamp') / 3600))
.values('timestamp_agg')
.annotate(
timestamp=Min('timestamp'),
high=Max('price'),
)
.values('timestamp', 'high')
.order_by('timestamp')
)
which runs the following SQL under the hood:
select min(timestamp) timestamp, max(price) high
from core_price
group by floor((timestamp / 3600))
order by timestamp
Now I want to calculate a 4 hr moving average, usually calculated in the following way:
select *, avg(high) over (order by timestamp rows between 4 preceding and current row) ma
from (select min(timestamp) timestamp, max(price) high
from core_price
group by floor((timestamp / 3600))
order by timestamp)
or
Window(expression=Avg('price'), frame=RowRange(start=-4, end=0))
How to apply the window aggregation above to the first query? Obviously I can't do something like this since the first query is already an aggregation:
queryset.annotate(ma=Window(expression=Avg('high'), frame=RowRange(start=-4, end=0)))
django.core.exceptions.FieldError: Cannot compute Avg('high'): 'high' is an aggregate
from django.db.models import F, FloatField, IntegerField, Window, Avg
from django.db.models.functions import Floor, Min, Max, Cast, RowNumber
# Step 1: Aggregate hourly data
hourly_qs = (
Price.objects
.annotate(hour=Floor(F('timestamp') / 3600))
.values('hour')
.annotate(
timestamp=Min('timestamp'),
high=Max('price'),
)
.order_by('timestamp')
)
# Step 2: Wrap in a subquery using Window
from django.db.models.expressions import OuterRef, Subquery
# Turn the aggregated queryset into a subquery
from django.db.models import Window, F, Avg
from django.db.models.functions import RowNumber
# Add row number so we can define row frame
hourly_qs = hourly_qs.annotate(
row_number=Window(
expression=RowNumber(),
order_by=F('timestamp').asc()
)
)
# Calculate 4-hour moving average
moving_avg_qs = hourly_qs.annotate(
ma=Window(
expression=Avg('high'),
order_by=F('row_number').asc(),
frame='ROWS BETWEEN 3 PRECEDING AND CURRENT ROW'
)
)
# Now moving_avg_qs will have: timestamp, high, ma
for row in moving_avg_qs:
print(row['timestamp'], row['high'], row['ma'])
from django.db.models import F, FloatField, Window, Avg, Min, Max
from django.db.models.functions import Floor, RowNumber
# Step 1: Aggregate hourly data
# We group prices by hour (timestamp divided by 3600) and get min timestamp and max price per hour
hourly_qs = (
Price.objects
.annotate(hour=Floor(F('timestamp') / 3600)) # convert timestamp to hour
.values('hour')
.annotate(
timestamp=Min('timestamp'), # earliest timestamp in that hour
high=Max('price'), # highest price in that hour
)
.order_by('timestamp')
)
# Step 2: Add row numbers for window function
hourly_qs = hourly_qs.annotate(
row_number=Window(
expression=RowNumber(),
order_by=F('timestamp').asc()
)
)
# Step 3: Compute 4-hour moving average
moving_avg_qs = hourly_qs.annotate(
ma=Window(
expression=Avg('high'),
order_by=F('row_number').asc(),
frame='ROWS BETWEEN 3 PRECEDING AND CURRENT ROW' # include current and previous 3 rows
)
)
# Step 4: Iterate through results
for row in moving_avg_qs:
print(row['timestamp'], row['high'], row['ma'])
Explanation
annotate(hour=Floor(F('timestamp') / 3600))
Converts a timestamp in seconds to hourly buckets. Floor ensures all timestamps in the same hour are grouped together.
values('hour').annotate(timestamp=Min(...), high=Max(...))
Aggregates data per hour:
timestamp: earliest timestamp in the hour
high: maximum price in that hour
Window(RowNumber())
Assigns a row number to each hourly row. Needed for moving average window functions.
Window(Avg('high'), frame='ROWS BETWEEN 3 PRECEDING AND CURRENT ROW')
Calculates a 4-hour moving average of the high price. The window includes the current row and previous 3 rows.
Iterating results
Each row contains:
timestamp: start of the hour
high: max price in the hour
ma: 4-hour moving average