Coverage for jstark / sample / mealkit_orders.py: 97%

51 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-23 22:34 +0000

1import random 

2from functools import cached_property 

3import uuid 

4from datetime import date 

5from typing import Any, Iterable 

6from decimal import Decimal 

7from pyspark.sql import SparkSession, DataFrame 

8from pyspark.sql.types import ( 

9 IntegerType, 

10 StringType, 

11 StructField, 

12 StructType, 

13 TimestampType, 

14 DecimalType, 

15) 

16from faker import Faker 

17from faker.providers import DynamicProvider 

18 

19 

20class FakeMealkitOrders: 

21 def __init__(self, seed: int | None = None, number_of_orders: int = 1000): 

22 self.seed = seed 

23 self.number_of_orders = number_of_orders 

24 

25 @property 

26 def mealkit_orders_schema(self) -> StructType: 

27 return StructType( 

28 [ 

29 StructField("Timestamp", TimestampType(), True), 

30 StructField("Customer", StringType(), True), 

31 StructField("Product", StringType(), True), 

32 StructField("Recipe", StringType(), True), 

33 StructField("Cuisine", StringType(), True), 

34 StructField("Quantity", IntegerType(), True), 

35 StructField("Order", StringType(), True), 

36 StructField("Discount", DecimalType(10, 2), True), 

37 ] 

38 ) 

39 

40 @staticmethod 

41 def flatten_mealkit_orders(mealkit_orders: list[Any]) -> Iterable[dict[str, Any]]: 

42 return [ 

43 { 

44 "Customer": d["Customer"], 

45 "Product": d["Product"], 

46 "Cuisine": d["Cuisine"], 

47 "Order": d["Order"], 

48 "Timestamp": d["Timestamp"], 

49 **d2, 

50 } 

51 for d in mealkit_orders 

52 for d2 in d["Recipes"] 

53 ] 

54 

55 @cached_property 

56 def df(self) -> DataFrame: 

57 

58 products_provider = DynamicProvider( 

59 provider_name="product", 

60 elements=[ 

61 "classic-plan", 

62 "preset-box-bc", 

63 "balanced-living-t1", 

64 "dinner-box", 

65 "classic-plan-t11", 

66 "classic-plan-t12", 

67 "classic-plan-t13", 

68 "classic-plan-t14", 

69 "classic-plan-t15", 

70 ], 

71 ) 

72 recipes_provider = DynamicProvider( 

73 provider_name="recipe", 

74 elements=[ 

75 "Banging bangers and mash", 

76 "Fish and chips", 

77 "Pizza and salad", 

78 "Chicken curry", 

79 "Beef stew", 

80 "Vegetable lasagna", 

81 "Salad and bread", 

82 "Soup and bread", 

83 "Pasta and sauce", 

84 ], 

85 ) 

86 cuisines_provider = DynamicProvider( 

87 provider_name="cuisine", 

88 elements=[ 

89 "Italian", 

90 "French", 

91 "Spanish", 

92 ], 

93 ) 

94 

95 fake = Faker() 

96 if self.seed: 96 ↛ 99line 96 didn't jump to line 99 because the condition on line 96 was always true

97 Faker.seed(self.seed) 

98 

99 products_fake = Faker() 

100 products_fake.add_provider(products_provider) 

101 

102 recipes_fake = Faker() 

103 recipes_fake.add_provider(recipes_provider) 

104 

105 cuisines_fake = Faker() 

106 cuisines_fake.add_provider(cuisines_provider) 

107 

108 mealkit_orders = [] 

109 

110 possible_quantities = [1, 2] 

111 if self.seed: 111 ↛ 113line 111 didn't jump to line 113 because the condition on line 111 was always true

112 random.seed(self.seed) 

113 quantities = random.choices( 

114 possible_quantities, 

115 weights=[100, 1], 

116 k=self.number_of_orders * len(recipes_provider.elements), 

117 ) 

118 for order in range(self.number_of_orders): 

119 recipes = [] 

120 # if self.seed: 

121 # random.seed(self.seed) 

122 for recipe_index in range(random.randint(2, 5)): 

123 r = recipes_fake.unique.recipe() 

124 quantity = quantities[(order * len(possible_quantities)) + recipe_index] 

125 recipes.append( 

126 { 

127 "Recipe": r, 

128 "Quantity": quantity, 

129 } 

130 ) 

131 mealkit_orders.append( 

132 { 

133 "Customer": fake.name(), 

134 "Timestamp": fake.date_time_between( 

135 start_date=date(2021, 1, 1), end_date=date(2021, 12, 31) 

136 ), 

137 "Order": str(uuid.uuid4()), 

138 "Product": products_fake.product(), 

139 "Cuisine": cuisines_fake.cuisine(), 

140 "Recipes": recipes, 

141 "Discount": Decimal(random.uniform(0, 5)), 

142 } 

143 ) 

144 recipes_fake.unique.clear() 

145 cuisines_fake.unique.clear() 

146 flattened_mealkit_orders = self.flatten_mealkit_orders(mealkit_orders) 

147 spark = SparkSession.builder.getOrCreate() 

148 return spark.createDataFrame( 

149 flattened_mealkit_orders, 

150 schema=self.mealkit_orders_schema, # type: ignore 

151 )