import math
# setup sama seperti Thompson
conversion_rates = [0.10, 0.50, 0.15]
N = 1000
d = 3
ads_selected = []
numbers_of_selections = [0] * d
sums_of_rewards = [0] * d
total_reward = 0
for n in range(0, N):
ad = 0
max_upper_bound = 0
for i in range(d):
if numbers_of_selections[i] > 0:
# Hitung rata-rata
average_reward = sums_of_rewards[i] / numbers_of_selections[i]
# Hitung Delta (Confidence)
delta_i = math.sqrt(1.5 * math.log(n + 1) / numbers_of_selections[i])
upper_bound = average_reward + delta_i
else:
# Jika belum pernah dipilih, kasih nilai super tinggi (1e400) biar DIPILIH DULUAN
upper_bound = 1e400
if upper_bound > max_upper_bound:
max_upper_bound = upper_bound
ad = i
# Mainkan
ads_selected.append(ad)
numbers_of_selections[ad] += 1
reward = 1 if random.random() < conversion_rates[ad] else 0
sums_of_rewards[ad] += reward
total_reward += reward
print("Seleksi UCB:", numbers_of_selections)