Commit 327e2e9f authored by Noric Couderc's avatar Noric Couderc

Merge branch 'discussing-population'

parents 006bbefd 4ba7a712
This diff is collapsed.
......@@ -112,7 +112,7 @@ def load_jmh_data(filename):
# Grouping the applications, to compare similar ones.
selected_jmh_columns = ["Param: seed",
"Param: applicationSize",
"Param: baseStructureSize"
"Param: baseStructureSize",
"Benchmark"]
# Best data structures
jmh_best_structures = jmh_data_filtered\
......@@ -136,6 +136,14 @@ def load_jmh_data(filename):
#%%
def compute_sample_ratios(jmh_data):
"""
Takes a DataFrame with columns "Lowest score_best" and "Lowest score"
and returns the ratio of improvement.
"""
return jmh_data["Lowest score_best"] / jmh_data["Lowest score"]
def compute_sample_weights(jmh_data):
"""
Takes a DataFrame with columns "Lowest score_best" and "Lowest score"
......@@ -143,7 +151,7 @@ def compute_sample_weights(jmh_data):
The weight is computer by computing the ratio of improvement,
and adding it's log to 1/N, where N is the number of samples
"""
ratios = jmh_data["Lowest score_best"] / jmh_data["Lowest score"]
ratios = compute_sample_ratios(jmh_data)
sample_weights = (1 / ratios.shape[0]) + numpy.log(ratios)
return sample_weights
......@@ -285,10 +293,11 @@ def load_training_data(jmh_results_filename,
# Ok here we go
jmh_with_best = load_jmh_data(jmh_results_filename)
jmh_with_best["Ratio improvement"] = compute_sample_ratios(jmh_with_best)
jmh_with_best["Sample weight"] = compute_sample_weights(jmh_with_best)
software_data = load_software_counters(software_counters_filename)
software_with_jmh = merge_jmh_software(jmh_with_best, software_data)
software_selected_columns = software_data.columns
software_selected_columns = list(software_data.columns)
software_features = software_with_jmh.get(software_selected_columns)
papi_data = load_hardware_counters(hardware_counters_filename)
papi_data['size'] = jmh_with_best['Param: applicationSize']
......@@ -359,7 +368,7 @@ if __name__ == "__main__":
#%%
print("Training classifier...")
X_train, X_test, y_train, y_test, w_train, w_test = train_test_split(features_extended_poly,
X_train, X_test, y_train, y_test, w_train, w_test = train_test_split(features_extended,
labels,
sw_hw_cleaned["Sample weight"],
stratify=labels,
......@@ -375,8 +384,7 @@ if __name__ == "__main__":
classifier.fit(X_train, y_train, w_train)
# We just add it to the class
classifier.feature_names = poly_transformer \
.get_feature_names(features_extended.columns.values)
classifier.feature_names = features_extended.columns.values
print("Accuracy: {0}".format(classifier.score(X_test, y_test, w_test)))
print("Classifier trained")
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment