Coverage for jstark/features/basket_periods.py: 100%
22 statements
« prev ^ index » next coverage.py v7.4.3, created at 2024-02-25 20:09 +0000
« prev ^ index » next coverage.py v7.4.3, created at 2024-02-25 20:09 +0000
1"""BasketPeriods feature"""
2from .feature import DerivedFeature
4from pyspark.sql import Column
5import pyspark.sql.functions as f
7from jstark.feature_period import FeaturePeriod
8from .basket_count import BasketCount
11class BasketPeriods(DerivedFeature):
12 """BasketPeriods feature"""
14 def column_expression(self) -> Column:
15 exprs = []
16 for period in range(self.feature_period.end, self.feature_period.start + 1):
17 exprs.append(
18 f.when(
19 BasketCount(
20 as_at=self.as_at,
21 feature_period=FeaturePeriod(
22 self.feature_period.period_unit_of_measure, period, period
23 ),
24 ).column
25 > 0,
26 1,
27 ).otherwise(0)
28 )
29 return sum(exprs)
31 def default_value(self) -> Column:
32 return f.lit(None)
34 @property
35 def description_subject(self) -> str:
36 return (
37 f"Number of {self.feature_period.period_unit_of_measure.name.lower()}s"
38 + " in which at least one basket was purchased"
39 )
41 @property
42 def commentary(self) -> str:
43 return (
44 f"The number of {self.feature_period.period_unit_of_measure.name.lower()}s "
45 + "in which at least one basket was purchased. The value will be in the "
46 + f"range 0 to {self.feature_period.start - self.feature_period.end + 1} "
47 + f"because {self.feature_period.start - self.feature_period.end + 1} is "
48 + f"the number of {self.feature_period.period_unit_of_measure.name.lower()}"
49 + f's between {self.start_date.strftime("%Y-%m-%d")} and'
50 + f' {self.end_date.strftime("%Y-%m-%d")}. When grouped by Customer and'
51 + " Product this feature is a useful indicator of the frequency of"
52 + " which a Customer purchases a Product."
53 )
55 @property
56 def feature_name(self) -> str:
57 return (
58 f"Basket{self.feature_period.period_unit_of_measure.name.title()}s"
59 + f"_{self.feature_period.mnemonic}"
60 )