Coverage for jstark / mealkit / order_periods.py: 100%
24 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-23 22:34 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-23 22:34 +0000
1"""OrderPeriods feature"""
3import functools
4import operator
6from jstark.features.feature import DerivedFeature
8from pyspark.sql import Column
9import pyspark.sql.functions as f
11from .order_count import OrderCount
12from jstark.feature_period import FeaturePeriod
15class OrderPeriods(DerivedFeature):
16 """OrderPeriods feature"""
18 def column_expression(self) -> Column:
19 exprs = []
20 for period in range(self.feature_period.end, self.feature_period.start + 1):
21 exprs.append(
22 f.when(
23 OrderCount(
24 as_at=self.as_at,
25 feature_period=FeaturePeriod(
26 self.feature_period.period_unit_of_measure, period, period
27 ),
28 first_day_of_week=self._first_day_of_week,
29 ).column
30 > 0,
31 1,
32 ).otherwise(0)
33 )
34 return functools.reduce(operator.add, exprs)
36 def default_value(self) -> Column:
37 return f.lit(None)
39 @property
40 def description_subject(self) -> str:
41 return (
42 f"Number of {self.feature_period.period_unit_of_measure.name.lower()}s"
43 + " in which at least one order was placed"
44 )
46 @property
47 def commentary(self) -> str:
48 return (
49 f"The number of {self.feature_period.period_unit_of_measure.name.lower()}s "
50 + "in which at least one basket was purchased. The value will be in the "
51 + f"range 0 to {self.feature_period.start - self.feature_period.end + 1} "
52 + f"because {self.feature_period.start - self.feature_period.end + 1} is "
53 + f"the number of {self.feature_period.period_unit_of_measure.name.lower()}"
54 + f"s between {self.start_date.strftime('%Y-%m-%d')} and"
55 + f" {self.end_date.strftime('%Y-%m-%d')}. When grouped by Customer and"
56 + " Product this feature is a useful indicator of the frequency of"
57 + " which a Customer purchases a Product."
58 )
60 @property
61 def feature_name(self) -> str:
62 return (
63 f"Order{self.feature_period.period_unit_of_measure.name.title()}s"
64 + f"_{self.feature_period.mnemonic}"
65 )