import requests, json import numpy as np import pandas as pd def fetch_results_data(): response = {"next":"https://www.metaculus.com/api2/questions/?limit=100&status=resolved“} results = [] while response[“next”] is not None: print(response[“next”]) response = json.loads(requests.get(response[“next”]).text) results.append(response[“results”]) return sum(results,[]) all_results = fetch_results_data() binary_qns = [q for q in all_results if q[‘possibilities’][‘type’] == ‘binary’ and q[‘resolution’] in [0,1]] binary_qns.sort(key=lambda q: q[‘resolve_time’]) def get_estimates(ys): xs = np.linspace(0.01, 0.99, 99) odds = xs/(1-xs) mean = np.sum(xs * ys) / np.sum(ys) geo_mean = np.exp(np.sum(np.log(xs) * ys) / np.sum(ys)) geo_mean_odds = np.exp(np.sum(np.log(odds) * ys) / np.sum(ys)) geo_mean_odds_p = geo_mean_odds/(1+geo_mean_odds) extremized_odds = np.exp(np.sum(2.5 * np.log(odds) * ys) / np.sum(ys)) extr_geo_mean_odds = extremized_odds/(1+extremized_odds) median = weighted_quantile(xs, 0.5, sample_weight=ys) return [mean, geo_mean, median, geo_mean_odds_p, extr_geo_mean_odds], [“mean”, “geo_mean”, “median”, “geo_mean_odds”, “extr_geo_mean_odds_2.5”] def brier(p, r): return (p-r)**2 def log_s(p, r): return -(r * np.log(p) + (1-r)*np.log(1-p)) X = [] for q in binary_qns: weighted = q[‘community_prediction’][‘full’][‘y’] unweighted = q[‘community_prediction’][‘unweighted’][‘y’] t = [q[‘resolution’], q[“community_prediction”][“history”][-1][“nu”]] all_names = [‘resolution’, ‘users’] for (e, ys) in [(‘_weighted’, weighted), (‘_unweighted’, unweighted)]: s, names = get_estimates(np.array(ys)) all_names += [n+e for n in names] t += s t += [q[“metaculus_prediction”][“full”]] all_names.append(“metaculus_prediction”) X.append(t) df = pd.DataFrame(X, columns=all_names) df_v = df[:] pd.concat([df_v.apply(lambda x: brier(x, df[“resolution”]), axis=0).mean().to_frame(“brier”), (df_v.apply(lambda x: log_s(x, df[“resolution”]), axis=0)).mean().to_frame(“-log”), df_v.count().to_frame(“questions”), ], axis=1).sort_values(‘-log’)[:-1].round(3)
Simon_M comments on When pooling forecasts, use the geometric mean of odds