Coverage for jstark / sample / mealkit_orders.py: 100%
54 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-30 09:36 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-30 09:36 +0000
1import random
2from functools import cached_property
3import uuid
4from datetime import date
5from typing import Any, Iterable
6from decimal import Decimal
7from pyspark.sql import SparkSession, DataFrame
8from pyspark.sql.types import (
9 IntegerType,
10 StringType,
11 StructField,
12 StructType,
13 TimestampType,
14 DecimalType,
15)
16from faker import Faker
17from faker.providers import DynamicProvider
20class FakeMealkitOrders:
21 def __init__(self, seed: int | None = None, number_of_orders: int = 1000):
22 self.seed = seed
23 self.number_of_orders = number_of_orders
25 @property
26 def mealkit_orders_schema(self) -> StructType:
27 return StructType(
28 [
29 StructField("Timestamp", TimestampType(), True),
30 StructField("Customer", StringType(), True),
31 StructField("Product", StringType(), True),
32 StructField("Recipe", StringType(), True),
33 StructField("Cuisine", StringType(), True),
34 StructField("Allergen", StringType(), True),
35 StructField("Quantity", IntegerType(), True),
36 StructField("Order", StringType(), True),
37 StructField("Discount", DecimalType(10, 2), True),
38 ]
39 )
41 @staticmethod
42 def flatten_mealkit_orders(mealkit_orders: list[Any]) -> Iterable[dict[str, Any]]:
43 return [
44 {
45 "Customer": d["Customer"],
46 "Product": d["Product"],
47 "Cuisine": d["Cuisine"],
48 "Allergen": d["Allergen"],
49 "Order": d["Order"],
50 "Timestamp": d["Timestamp"],
51 **d2,
52 }
53 for d in mealkit_orders
54 for d2 in d["Recipes"]
55 ]
57 @cached_property
58 def df(self) -> DataFrame:
60 products_provider = DynamicProvider(
61 provider_name="product",
62 elements=[
63 "classic-plan",
64 "preset-box-bc",
65 "balanced-living-t1",
66 "dinner-box",
67 "classic-plan-t11",
68 "classic-plan-t12",
69 "classic-plan-t13",
70 "classic-plan-t14",
71 "classic-plan-t15",
72 ],
73 )
74 recipes_provider = DynamicProvider(
75 provider_name="recipe",
76 elements=[
77 "Banging bangers and mash",
78 "Fish and chips",
79 "Pizza and salad",
80 "Chicken curry",
81 "Beef stew",
82 "Vegetable lasagna",
83 "Salad and bread",
84 "Soup and bread",
85 "Pasta and sauce",
86 ],
87 )
88 cuisines_provider = DynamicProvider(
89 provider_name="cuisine",
90 elements=[
91 "Italian",
92 "French",
93 "Spanish",
94 ],
95 )
97 allergens_provider = DynamicProvider(
98 provider_name="allergen",
99 elements=[
100 "Gluten",
101 "Eggs",
102 "Milk",
103 "Soy",
104 ],
105 )
107 fake = Faker()
108 if self.seed:
109 Faker.seed(self.seed)
111 products_fake = Faker()
112 products_fake.add_provider(products_provider)
114 recipes_fake = Faker()
115 recipes_fake.add_provider(recipes_provider)
117 cuisines_fake = Faker()
118 cuisines_fake.add_provider(cuisines_provider)
120 allergens_fake = Faker()
121 allergens_fake.add_provider(allergens_provider)
123 mealkit_orders = []
125 possible_quantities = [1, 2]
126 if self.seed:
127 random.seed(self.seed)
128 quantities = random.choices(
129 possible_quantities,
130 weights=[100, 1],
131 k=self.number_of_orders * len(recipes_provider.elements),
132 )
133 for order in range(self.number_of_orders):
134 recipes = []
135 # if self.seed:
136 # random.seed(self.seed)
137 for recipe_index in range(random.randint(2, 5)):
138 r = recipes_fake.unique.recipe()
139 quantity = quantities[(order * len(possible_quantities)) + recipe_index]
140 recipes.append(
141 {
142 "Recipe": r,
143 "Quantity": quantity,
144 }
145 )
146 mealkit_orders.append(
147 {
148 "Customer": fake.name(),
149 "Timestamp": fake.date_time_between(
150 start_date=date(2021, 1, 1), end_date=date(2021, 12, 31)
151 ),
152 "Order": str(uuid.uuid4()),
153 "Product": products_fake.product(),
154 "Cuisine": cuisines_fake.cuisine(),
155 "Allergen": allergens_fake.allergen(),
156 "Recipes": recipes,
157 "Discount": Decimal(random.uniform(0, 5)),
158 }
159 )
160 recipes_fake.unique.clear()
161 cuisines_fake.unique.clear()
162 flattened_mealkit_orders = self.flatten_mealkit_orders(mealkit_orders)
163 spark = SparkSession.builder.getOrCreate()
164 return spark.createDataFrame(
165 flattened_mealkit_orders,
166 schema=self.mealkit_orders_schema, # type: ignore
167 )