Coverage for jstark / grocery / basket_periods.py: 100%

24 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-23 22:34 +0000

1"""BasketPeriods feature""" 

2 

3import functools 

4import operator 

5 

6from jstark.features.feature import DerivedFeature 

7 

8from pyspark.sql import Column 

9import pyspark.sql.functions as f 

10 

11from jstark.feature_period import FeaturePeriod 

12from .basket_count import BasketCount 

13 

14 

15class BasketPeriods(DerivedFeature): 

16 """BasketPeriods feature""" 

17 

18 def column_expression(self) -> Column: 

19 exprs = [] 

20 for period in range(self.feature_period.end, self.feature_period.start + 1): 

21 exprs.append( 

22 f.when( 

23 BasketCount( 

24 as_at=self.as_at, 

25 feature_period=FeaturePeriod( 

26 self.feature_period.period_unit_of_measure, period, period 

27 ), 

28 first_day_of_week=self._first_day_of_week, 

29 ).column 

30 > 0, 

31 1, 

32 ).otherwise(0) 

33 ) 

34 

35 return functools.reduce(operator.add, exprs) 

36 

37 def default_value(self) -> Column: 

38 return f.lit(None) 

39 

40 @property 

41 def description_subject(self) -> str: 

42 return ( 

43 f"Number of {self.feature_period.period_unit_of_measure.name.lower()}s" 

44 + " in which at least one basket was purchased" 

45 ) 

46 

47 @property 

48 def commentary(self) -> str: 

49 return ( 

50 f"The number of {self.feature_period.period_unit_of_measure.name.lower()}s " 

51 + "in which at least one basket was purchased. The value will be in the " 

52 + f"range 0 to {self.feature_period.start - self.feature_period.end + 1} " 

53 + f"because {self.feature_period.start - self.feature_period.end + 1} is " 

54 + f"the number of {self.feature_period.period_unit_of_measure.name.lower()}" 

55 + f"s between {self.start_date.strftime('%Y-%m-%d')} and" 

56 + f" {self.end_date.strftime('%Y-%m-%d')}. When grouped by Customer and" 

57 + " Product this feature is a useful indicator of the frequency of" 

58 + " which a Customer purchases a Product." 

59 ) 

60 

61 @property 

62 def feature_name(self) -> str: 

63 return ( 

64 f"Basket{self.feature_period.period_unit_of_measure.name.title()}s" 

65 + f"_{self.feature_period.mnemonic}" 

66 )