Coverage for jstark / sample / mealkit_orders.py: 97%
51 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-23 22:34 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-23 22:34 +0000
1import random
2from functools import cached_property
3import uuid
4from datetime import date
5from typing import Any, Iterable
6from decimal import Decimal
7from pyspark.sql import SparkSession, DataFrame
8from pyspark.sql.types import (
9 IntegerType,
10 StringType,
11 StructField,
12 StructType,
13 TimestampType,
14 DecimalType,
15)
16from faker import Faker
17from faker.providers import DynamicProvider
20class FakeMealkitOrders:
21 def __init__(self, seed: int | None = None, number_of_orders: int = 1000):
22 self.seed = seed
23 self.number_of_orders = number_of_orders
25 @property
26 def mealkit_orders_schema(self) -> StructType:
27 return StructType(
28 [
29 StructField("Timestamp", TimestampType(), True),
30 StructField("Customer", StringType(), True),
31 StructField("Product", StringType(), True),
32 StructField("Recipe", StringType(), True),
33 StructField("Cuisine", StringType(), True),
34 StructField("Quantity", IntegerType(), True),
35 StructField("Order", StringType(), True),
36 StructField("Discount", DecimalType(10, 2), True),
37 ]
38 )
40 @staticmethod
41 def flatten_mealkit_orders(mealkit_orders: list[Any]) -> Iterable[dict[str, Any]]:
42 return [
43 {
44 "Customer": d["Customer"],
45 "Product": d["Product"],
46 "Cuisine": d["Cuisine"],
47 "Order": d["Order"],
48 "Timestamp": d["Timestamp"],
49 **d2,
50 }
51 for d in mealkit_orders
52 for d2 in d["Recipes"]
53 ]
55 @cached_property
56 def df(self) -> DataFrame:
58 products_provider = DynamicProvider(
59 provider_name="product",
60 elements=[
61 "classic-plan",
62 "preset-box-bc",
63 "balanced-living-t1",
64 "dinner-box",
65 "classic-plan-t11",
66 "classic-plan-t12",
67 "classic-plan-t13",
68 "classic-plan-t14",
69 "classic-plan-t15",
70 ],
71 )
72 recipes_provider = DynamicProvider(
73 provider_name="recipe",
74 elements=[
75 "Banging bangers and mash",
76 "Fish and chips",
77 "Pizza and salad",
78 "Chicken curry",
79 "Beef stew",
80 "Vegetable lasagna",
81 "Salad and bread",
82 "Soup and bread",
83 "Pasta and sauce",
84 ],
85 )
86 cuisines_provider = DynamicProvider(
87 provider_name="cuisine",
88 elements=[
89 "Italian",
90 "French",
91 "Spanish",
92 ],
93 )
95 fake = Faker()
96 if self.seed: 96 ↛ 99line 96 didn't jump to line 99 because the condition on line 96 was always true
97 Faker.seed(self.seed)
99 products_fake = Faker()
100 products_fake.add_provider(products_provider)
102 recipes_fake = Faker()
103 recipes_fake.add_provider(recipes_provider)
105 cuisines_fake = Faker()
106 cuisines_fake.add_provider(cuisines_provider)
108 mealkit_orders = []
110 possible_quantities = [1, 2]
111 if self.seed: 111 ↛ 113line 111 didn't jump to line 113 because the condition on line 111 was always true
112 random.seed(self.seed)
113 quantities = random.choices(
114 possible_quantities,
115 weights=[100, 1],
116 k=self.number_of_orders * len(recipes_provider.elements),
117 )
118 for order in range(self.number_of_orders):
119 recipes = []
120 # if self.seed:
121 # random.seed(self.seed)
122 for recipe_index in range(random.randint(2, 5)):
123 r = recipes_fake.unique.recipe()
124 quantity = quantities[(order * len(possible_quantities)) + recipe_index]
125 recipes.append(
126 {
127 "Recipe": r,
128 "Quantity": quantity,
129 }
130 )
131 mealkit_orders.append(
132 {
133 "Customer": fake.name(),
134 "Timestamp": fake.date_time_between(
135 start_date=date(2021, 1, 1), end_date=date(2021, 12, 31)
136 ),
137 "Order": str(uuid.uuid4()),
138 "Product": products_fake.product(),
139 "Cuisine": cuisines_fake.cuisine(),
140 "Recipes": recipes,
141 "Discount": Decimal(random.uniform(0, 5)),
142 }
143 )
144 recipes_fake.unique.clear()
145 cuisines_fake.unique.clear()
146 flattened_mealkit_orders = self.flatten_mealkit_orders(mealkit_orders)
147 spark = SparkSession.builder.getOrCreate()
148 return spark.createDataFrame(
149 flattened_mealkit_orders,
150 schema=self.mealkit_orders_schema, # type: ignore
151 )