Coverage for jstark/features/basket_periods.py: 100%

22 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-02-25 20:09 +0000

1"""BasketPeriods feature""" 

2from .feature import DerivedFeature 

3 

4from pyspark.sql import Column 

5import pyspark.sql.functions as f 

6 

7from jstark.feature_period import FeaturePeriod 

8from .basket_count import BasketCount 

9 

10 

11class BasketPeriods(DerivedFeature): 

12 """BasketPeriods feature""" 

13 

14 def column_expression(self) -> Column: 

15 exprs = [] 

16 for period in range(self.feature_period.end, self.feature_period.start + 1): 

17 exprs.append( 

18 f.when( 

19 BasketCount( 

20 as_at=self.as_at, 

21 feature_period=FeaturePeriod( 

22 self.feature_period.period_unit_of_measure, period, period 

23 ), 

24 ).column 

25 > 0, 

26 1, 

27 ).otherwise(0) 

28 ) 

29 return sum(exprs) 

30 

31 def default_value(self) -> Column: 

32 return f.lit(None) 

33 

34 @property 

35 def description_subject(self) -> str: 

36 return ( 

37 f"Number of {self.feature_period.period_unit_of_measure.name.lower()}s" 

38 + " in which at least one basket was purchased" 

39 ) 

40 

41 @property 

42 def commentary(self) -> str: 

43 return ( 

44 f"The number of {self.feature_period.period_unit_of_measure.name.lower()}s " 

45 + "in which at least one basket was purchased. The value will be in the " 

46 + f"range 0 to {self.feature_period.start - self.feature_period.end + 1} " 

47 + f"because {self.feature_period.start - self.feature_period.end + 1} is " 

48 + f"the number of {self.feature_period.period_unit_of_measure.name.lower()}" 

49 + f's between {self.start_date.strftime("%Y-%m-%d")} and' 

50 + f' {self.end_date.strftime("%Y-%m-%d")}. When grouped by Customer and' 

51 + " Product this feature is a useful indicator of the frequency of" 

52 + " which a Customer purchases a Product." 

53 ) 

54 

55 @property 

56 def feature_name(self) -> str: 

57 return ( 

58 f"Basket{self.feature_period.period_unit_of_measure.name.title()}s" 

59 + f"_{self.feature_period.mnemonic}" 

60 )