Coverage for jstark/features/recency_days.py: 100%

17 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-02-25 20:09 +0000

1"""RecencyDays feature""" 

2from typing import Callable 

3import pyspark.sql.functions as f 

4from pyspark.sql import Column 

5 

6from .feature import BaseFeature 

7 

8 

9class RecencyDays(BaseFeature): 

10 def aggregator(self) -> Callable[[Column], Column]: 

11 return self.sum_aggregator 

12 

13 def column_expression(self) -> Column: 

14 return f.datediff(f.lit(self.as_at), f.col("Timestamp")) 

15 

16 def default_value(self) -> Column: 

17 return f.lit(0) 

18 

19 @property 

20 def description_subject(self) -> str: 

21 return "Minimum number of days since occurrence" 

22 

23 @property 

24 def commentary(self) -> str: 

25 return ( 

26 "This could be particularly useful (for example) in a grocery retailer " 

27 + "for determining when a customer most recently bought a product or " 

28 + " when a product was most recently bought in a store" 

29 + "Also note that this is very similar to " 

30 + f"MostRecentPurchaseDate_{self.feature_period.mnemonic} " 

31 + "so consider which of these " 

32 + "features is most useful to you." 

33 )