Commit ef414ed2 authored by Noric Couderc's avatar Noric Couderc
Browse files

Added function for getting the ratio of improvement

The ratio of improvement is a very useful metric, so we add it to the
way we compute the input data.
parent b25e8989
Loading
Loading
Loading
Loading
+10 −1
Original line number Diff line number Diff line
@@ -136,6 +136,14 @@ def load_jmh_data(filename):

#%%

def compute_sample_ratios(jmh_data):
    """
    Takes a DataFrame with columns "Lowest score_best" and "Lowest score"
    and returns the ratio of improvement.
    """
    return jmh_data["Lowest score_best"] / jmh_data["Lowest score"]


def compute_sample_weights(jmh_data):
    """
    Takes a DataFrame with columns "Lowest score_best" and "Lowest score"
@@ -143,7 +151,7 @@ def compute_sample_weights(jmh_data):
    The weight is computer by computing the ratio of improvement,
    and adding it's log to 1/N, where N is the number of samples
    """
    ratios = jmh_data["Lowest score_best"] / jmh_data["Lowest score"]
    ratios = compute_sample_ratios(jmh_data)
    sample_weights = (1 / ratios.shape[0]) + numpy.log(ratios)
    return sample_weights

@@ -285,6 +293,7 @@ def load_training_data(jmh_results_filename,

    # Ok here we go
    jmh_with_best = load_jmh_data(jmh_results_filename)
    jmh_with_best["Ratio improvement"] = compute_sample_ratios(jmh_with_best)
    jmh_with_best["Sample weight"] = compute_sample_weights(jmh_with_best)
    software_data = load_software_counters(software_counters_filename)
    software_with_jmh = merge_jmh_software(jmh_with_best, software_data)