-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot.py
73 lines (54 loc) · 2.68 KB
/
plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from typing import List
from matplotlib import pyplot
import pandas
from scipy.stats import chisquare
def perform_chi_square_test(all_results: List[List[int]], min_num: int, max_num: int):
# Flatten the list of lists into a single list
all_numbers = [number for result in all_results for number in result]
# Calculate observed frequencies
observed_freq = [all_numbers.count(i) for i in range(min_num, max_num + 1)]
# Expected frequency (assuming uniform distribution)
expected_freq = [len(all_numbers) / (max_num - min_num + 1)] * (max_num - min_num + 1)
# Perform chi-square test
chi2, p_value = chisquare(f_obs=observed_freq, f_exp=expected_freq)
return chi2, p_value, observed_freq, expected_freq
def plot_frequencies(observed_freq: List[int], expected_freq: List[float], min_num: int, max_num: int):
numbers = list(range(min_num, max_num + 1))
pyplot.bar(numbers, observed_freq, color='blue', alpha=0.7, label='Observed')
pyplot.plot(numbers, expected_freq, color='red', marker='o', linestyle='dashed', linewidth=2, markersize=8,
label='Expected')
pyplot.title('Observed vs Expected Frequencies of Random Numbers')
pyplot.xlabel('Number')
pyplot.ylabel('Frequency')
pyplot.legend()
pyplot.show()
def calculate_probabilities(min_num: int, max_num: int, max_batch_size: int) -> pandas.DataFrame:
data = []
total_numbers = max_num - min_num + 1
for k in range(min_num, max_num + 1):
row = {}
numbers_greater_than_k = max_num - k
for batch_size in range(1, max_batch_size + 1):
if numbers_greater_than_k >= batch_size:
# Calculate the probability of picking any number > k in batch_size draws
probability = 1.0
for i in range(batch_size):
probability *= (numbers_greater_than_k - i) / (total_numbers - i)
else:
probability = 0.0
row[f'Batch Size {batch_size}'] = probability
data.append(row)
return pandas.DataFrame(data, index=[f'k = {k}' for k in range(min_num, max_num + 1)])
def execution_time(df: pandas.DataFrame):
# Plot the scatter plot
pyplot.figure(figsize=(10, 6))
# Loop through each range_size to plot its points
for range_size in df['range_size'].unique():
subset = df[df['range_size'] == range_size]
pyplot.plot(subset['batch_percentage'], subset['execution_time'], marker='o', label=f'Range Size: {range_size}')
pyplot.title('Execution Time vs. Batch Percentage')
pyplot.xlabel('Batch Percentage')
pyplot.ylabel('Execution Time (seconds)')
pyplot.legend(title='Range Size')
pyplot.grid(True)
pyplot.show()