Jaime Sevilla comments on When pooling forecasts, use the geometric mean of odds

Jaime Sevilla 4 Sep 2021 7:11 UTC

6 points

0 ∶ 0

META: Do you think you could edit this comment to include...

The number of questions, and aggregated predictions per question?
The information on extremized geometric mean you computed below (I think it is not receiving as much attention due to being buried in the replies)?
Possibly a code snippet to reproduce the results?

Thanks in advance!

Simon_M 8 Sep 2021 9:34 UTC

18 points

0 ∶ 0

Parent

import requests, json
import numpy as np
import pandas as pd

def fetch_results_data():
    response = {"next":"https://www.metaculus.com/api2/questions/?limit=100&status=resolved“}

    results = []
    while response[“next”] is not None:
        print(response[“next”])
        response = json.loads(requests.get(response[“next”]).text)
        results.append(response[“results”])
    return sum(results,[])


all_results = fetch_results_data()
binary_qns = [q for q in all_results if q[‘possibilities’][‘type’] == ‘binary’ and q[‘resolution’] in [0,1]]
binary_qns.sort(key=lambda q: q[‘resolve_time’])

def get_estimates(ys):
    xs = np.linspace(0.01, 0.99, 99)
    odds = xs/(1-xs)
    mean = np.sum(xs * ys) / np.sum(ys)
    geo_mean = np.exp(np.sum(np.log(xs) * ys) / np.sum(ys))
    geo_mean_odds = np.exp(np.sum(np.log(odds) * ys) / np.sum(ys))
    geo_mean_odds_p = geo_mean_odds/(1+geo_mean_odds)
    extremized_odds = np.exp(np.sum(2.5 * np.log(odds) * ys) / np.sum(ys))
    extr_geo_mean_odds = extremized_odds/(1+extremized_odds)
    median = weighted_quantile(xs, 0.5, sample_weight=ys)
    return [mean, geo_mean, median, geo_mean_odds_p, extr_geo_mean_odds], [“mean”, “geo_mean”, “median”, “geo_mean_odds”, “extr_geo_mean_odds_2.5”]

def brier(p, r):
    return (p-r)**2
def log_s(p, r):
    return -(r * np.log(p) + (1-r)*np.log(1-p))

X = []

for q in binary_qns:
    weighted = q[‘community_prediction’][‘full’][‘y’]
    unweighted = q[‘community_prediction’][‘unweighted’][‘y’]
    t = [q[‘resolution’], q[“community_prediction”][“history”][-1][“nu”]]
    all_names = [‘resolution’, ‘users’]
    for (e, ys) in [(‘_weighted’, weighted), (‘_unweighted’, unweighted)]:
        s, names = get_estimates(np.array(ys))
        all_names += [n+e for n in names]
        t += s
    t += [q[“metaculus_prediction”][“full”]]
    all_names.append(“metaculus_prediction”)
    X.append(t)
df = pd.DataFrame(X, columns=all_names)

df_v = df[:]

pd.concat([df_v.apply(lambda x: brier(x, df[“resolution”]), axis=0).mean().to_frame(“brier”),
           (df_v.apply(lambda x: log_s(x, df[“resolution”]), axis=0)).mean().to_frame(“-log”),
           df_v.count().to_frame(“questions”),
          ], axis=1).sort_values(‘-log’)[:-1].round(3)

What links here?