...
 
Commits (12)
This diff is collapsed.
No preview for this file type
Subproject commit f5eea667cfe3d5dad27b8e150454fec1a065544f
Subproject commit bf31fed20566e62e2dcc3b7aba9787b5647cbd8d
......@@ -112,7 +112,7 @@ def load_jmh_data(filename):
# Grouping the applications, to compare similar ones.
selected_jmh_columns = ["Param: seed",
"Param: applicationSize",
"Param: baseStructureSize"
"Param: baseStructureSize",
"Benchmark"]
# Best data structures
jmh_best_structures = jmh_data_filtered\
......@@ -136,6 +136,14 @@ def load_jmh_data(filename):
#%%
def compute_sample_ratios(jmh_data):
"""
Takes a DataFrame with columns "Lowest score_best" and "Lowest score"
and returns the ratio of improvement.
"""
return jmh_data["Lowest score_best"] / jmh_data["Lowest score"]
def compute_sample_weights(jmh_data):
"""
Takes a DataFrame with columns "Lowest score_best" and "Lowest score"
......@@ -143,7 +151,7 @@ def compute_sample_weights(jmh_data):
The weight is computer by computing the ratio of improvement,
and adding it's log to 1/N, where N is the number of samples
"""
ratios = jmh_data["Lowest score_best"] / jmh_data["Lowest score"]
ratios = compute_sample_ratios(jmh_data)
sample_weights = (1 / ratios.shape[0]) + numpy.log(ratios)
return sample_weights
......@@ -285,10 +293,11 @@ def load_training_data(jmh_results_filename,
# Ok here we go
jmh_with_best = load_jmh_data(jmh_results_filename)
jmh_with_best["Ratio improvement"] = compute_sample_ratios(jmh_with_best)
jmh_with_best["Sample weight"] = compute_sample_weights(jmh_with_best)
software_data = load_software_counters(software_counters_filename)
software_with_jmh = merge_jmh_software(jmh_with_best, software_data)
software_selected_columns = software_data.columns
software_selected_columns = list(software_data.columns)
software_features = software_with_jmh.get(software_selected_columns)
papi_data = load_hardware_counters(hardware_counters_filename)
papi_data['size'] = jmh_with_best['Param: applicationSize']
......@@ -359,7 +368,7 @@ if __name__ == "__main__":
#%%
print("Training classifier...")
X_train, X_test, y_train, y_test, w_train, w_test = train_test_split(features_extended_poly,
X_train, X_test, y_train, y_test, w_train, w_test = train_test_split(features_extended,
labels,
sw_hw_cleaned["Sample weight"],
stratify=labels,
......@@ -375,8 +384,7 @@ if __name__ == "__main__":
classifier.fit(X_train, y_train, w_train)
# We just add it to the class
classifier.feature_names = poly_transformer \
.get_feature_names(features_extended.columns.values)
classifier.feature_names = features_extended.columns.values
print("Accuracy: {0}".format(classifier.score(X_test, y_test, w_test)))
print("Classifier trained")
......