Skip to content
Commits on Source (7)
%% Cell type:markdown id: tags:
 
# Model investigation
 
%% Cell type:markdown id: tags:
 
## Imports and stuff
 
%% Cell type:code id: tags:
 
``` python
%load_ext autoreload
```
 
%% Output
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload
%% Cell type:code id: tags:
 
``` python
%autoreload 2
```
 
%% Cell type:code id: tags:
 
``` python
# Add the JBrainy directory to the path
import sys
sys.path.append('../')
```
%% Cell type:code id: tags:
``` python
from train_model import load_training_data, normalize_data
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
```
 
%% Cell type:markdown id: tags:
 
This is a dictionary for printing the benchmark labels in a more readable way.
 
%% Cell type:code id: tags:
 
``` python
labels_to_readable = { "se.lth.cs.jmh.ListApplicationBenchmark.ListApplicationBenchmark" : "List",
"se.lth.cs.jmh.MapApplicationBenchmark.MapApplicationBenchmark": "Map",
"se.lth.cs.jmh.SetApplicationBenchmark.SetApplicationBenchmark": "Set" }
```
 
%% Cell type:markdown id: tags:
 
## Loading the data
 
%% Cell type:code id: tags:
 
``` python
data = load_training_data("data/jmh-results-9307f70f.csv",
"software-perf.csv",
"data/hardware-perf-data.csv")
data = load_training_data("../data/jmh-results-9307f70f.csv",
"../data/software-counters-21-01-20.csv",
"../data/hardware-perf-data.csv")
 
benchmark_data = data["data"]
```
 
%% Cell type:code id: tags:
 
``` python
benchmark_data.describe()
```
 
%% Output
 
Threads Samples Score Score Error (99.9%) \
count 39643.0 39643.0 39643.000000 39643.000000
mean 1.0 10.0 97446.013333 6353.113808
std 0.0 0.0 178141.235600 13825.193748
min 1.0 10.0 10.153360 0.070377
25% 1.0 10.0 1964.599295 73.185579
50% 1.0 10.0 12473.571612 535.074529
75% 1.0 10.0 96777.965251 4896.221083
max 1.0 10.0 709935.485128 224533.338017
Param: applicationSize Param: baseStructureSize Param: seed \
count 39643.000000 39643.000000 39643.000000
mean 376.808516 3639.709406 249.669046
std 448.570636 4489.781686 144.230804
min 10.000000 0.000000 0.000000
25% 10.000000 0.000000 125.000000
50% 100.000000 1000.000000 250.000000
75% 1000.000000 10000.000000 375.000000
max 1000.000000 10000.000000 499.000000
Lowest score Threads_best Samples_best ... PAPI_REF_CYC \
count 39643.000000 39643.0 39643.0 ... 3.964300e+04
mean 91092.899525 1.0 10.0 ... 3.422986e+05
std 168517.226906 0.0 0.0 ... 1.936199e+06
min 9.975083 1.0 10.0 ... 1.728000e+03
25% 1871.044644 1.0 10.0 ... 3.294000e+03
50% 11750.424375 1.0 10.0 ... 2.260800e+04
75% 85632.226297 1.0 10.0 ... 1.802070e+05
max 695996.818472 1.0 10.0 ... 5.683349e+07
PAPI_SP_OPS PAPI_SR_INS PAPI_STL_ICY PAPI_TLB_DM PAPI_TLB_IM \
count 39643.0 3.964300e+04 3.964300e+04 39643.000000 39643.000000
mean 0.0 8.176693e+04 1.392746e+04 12.370759 6.364364
std 0.0 4.709412e+05 7.647459e+04 17.167082 2.544259
min 0.0 3.170000e+02 8.500000e+01 0.000000 5.000000
25% 0.0 4.840000e+02 2.965000e+02 2.000000 5.000000
50% 0.0 3.817000e+03 1.434500e+03 6.000000 5.000000
75% 0.0 3.391000e+04 8.316000e+03 16.000000 6.000000
max 0.0 1.248077e+07 2.205866e+06 685.000000 27.000000
PAPI_TOT_CYC PAPI_TOT_INS PAPI_VEC_DP PAPI_VEC_SP
count 3.964300e+04 3.964300e+04 39643.0 39643.0
mean 3.625881e+05 1.024529e+06 0.0 0.0
std 2.045093e+06 6.593926e+06 0.0 0.0
min 1.825000e+03 2.297000e+03 0.0 0.0
25% 3.440750e+03 3.583000e+03 0.0 0.0
50% 2.376650e+04 3.187000e+04 0.0 0.0
75% 1.898682e+05 2.906225e+05 0.0 0.0
max 5.999046e+07 1.743255e+08 0.0 0.0
[8 rows x 101 columns]
 
%% Cell type:markdown id: tags:
 
We print the columns
 
%% Cell type:code id: tags:
 
``` python
benchmark_data.columns.values
```
 
%% Output
 
array(['Benchmark', 'Mode', 'Threads', 'Samples', 'Score',
'Score Error (99.9%)', 'Unit', 'Param: applicationSize',
'Param: baseStructureSize', 'Param: datastructureName',
'Param: seed', 'Lowest score', 'Mode_best', 'Threads_best',
'Samples_best', 'Score_best', 'Score Error (99.9%)_best',
'Unit_best', 'Param: datastructureName_best', 'Lowest score_best',
'Ratio improvement', 'Sample weight', 'seed', 'size',
'base_structure_size', 'data_structure', 'add(Object)',
'add(int, Object)', 'addAll(Collection)',
'addAll(int, Collection)', 'clear()', 'contains(Object)',
'containsAll(Collection)', 'containsKey(Object)',
'containsValue(Object)', 'entrySet()', 'equals(Object)',
'get(Object)', 'hashCode()', 'indexOf(Object)', 'isEmpty()',
'get(int)', 'hashCode()', 'indexOf(Object)', 'isEmpty()',
'iterator()', 'keySet()', 'lastIndexOf(Object)', 'listIterator()',
'listIterator(int)', 'put(Object, Object)', 'putAll(Map)',
'remove(Object)', 'remove(int)', 'removeAll(Collection)',
'retainAll(Collection)', 'set(int, Object)', 'size()',
'sort(Comparator)', 'subList(int, int)', 'toArray()',
'toArray(Object[])', 'values()', 'PAPI_BR_CN', 'PAPI_BR_INS',
'PAPI_BR_MSP', 'PAPI_BR_NTK', 'PAPI_BR_PRC', 'PAPI_BR_TKN',
'PAPI_BR_UCN', 'PAPI_DP_OPS', 'PAPI_FDV_INS', 'PAPI_FP_INS',
'PAPI_FP_OPS', 'PAPI_L1_DCM', 'PAPI_L1_ICM', 'PAPI_L1_LDM',
'PAPI_L1_STM', 'PAPI_L1_TCM', 'PAPI_L2_DCA', 'PAPI_L2_DCH',
'PAPI_L2_DCM', 'PAPI_L2_DCR', 'PAPI_L2_DCW', 'PAPI_L2_ICA',
'PAPI_L2_ICH', 'PAPI_L2_ICM', 'PAPI_L2_ICR', 'PAPI_L2_STM',
'PAPI_L2_TCA', 'PAPI_L2_TCM', 'PAPI_L2_TCR', 'PAPI_L2_TCW',
'PAPI_L3_DCA', 'PAPI_L3_DCR', 'PAPI_L3_DCW', 'PAPI_L3_ICA',
'PAPI_L3_ICR', 'PAPI_L3_TCA', 'PAPI_L3_TCM', 'PAPI_L3_TCR',
'PAPI_L3_TCW', 'PAPI_LD_INS', 'PAPI_REF_CYC', 'PAPI_SP_OPS',
'PAPI_SR_INS', 'PAPI_STL_ICY', 'PAPI_TLB_DM', 'PAPI_TLB_IM',
'PAPI_TOT_CYC', 'PAPI_TOT_INS', 'PAPI_VEC_DP', 'PAPI_VEC_SP'],
dtype=object)
 
%% Cell type:markdown id: tags:
 
The following is a dictionary mapping method names to a category. This could be useful to make comparisons with CollectionsBench, but I think it is not used for now.
 
%% Cell type:code id: tags:
 
``` python
method_classes = {'add(Object)' : "add",
'add(int, Object)' : "add",
'addAll(Collection)' : "add",
'addAll(int, Collection)' : "add",
'clear()' : "remove",
'contains(Object)' : "contains",
'containsAll(Collection)' : "contains",
'containsKey(Object)' : "contains",
'containsValue(Object)' : "contains",
'entrySet()' : "iterate",
'equals(Object)' : "other",
'get(Object)' : "get",
'hashCode()' : "other",
'indexOf(Object)' : "other",
'isEmpty()' : "other",
'iterator()' : "iterate",
'keySet()' : "iterate",
'lastIndexOf(Object)' : "other",
'listIterator()' : "iterate",
'listIterator(int)' : "iterate",
'put(Object, Object)' : "add",
'putAll(Map)' : "add",
'remove(Object)' : "remove",
'remove(int)' : "remove",
'removeAll(Colleion)' : "remove",
'retainAll(Collection)' : "remove",
'set(int, Object)' : "modify",
'size()' : "other",
'sort(Comparator)' : "other",
'subList(int, int)' : "other",
'toArray()' : "other",
'toArray(Object[])' : "other",
'values()' : "other"
}
```
 
%% Cell type:markdown id: tags:
 
## Comparing to baseline
We want to compare to a baseline: For this analysis, it will be the most common collection class
 
%% Cell type:code id: tags:
 
``` python
most_popular_class = { "List" : "ArrayList", "Map" : "HashMap", "Set" : "HashSet" }
```
 
%% Cell type:code id: tags:
 
``` python
# This list defines the "key" of the synthetic benchmark.
# TODO: Probably needs to be put into the script directly.
selected_jmh_columns = ["Param: seed", "Param: applicationSize", "Param: baseStructureSize", "Benchmark"]
```
 
%% Cell type:code id: tags:
 
``` python
# We want to compare with the baseline, that is, the most common collection class
# We start by extracting the runs with one of the most common collections.
most_common_collection = ["ArrayList", "HashSet", "HashMap"]
benchmark_baseline = benchmark_data[benchmark_data["Param: datastructureName"].isin(most_common_collection)]
benchmark_baseline.describe()
```
 
%% Output
 
Threads Samples Score Score Error (99.9%) \
count 13213.0 13213.0 13213.000000 13213.000000
mean 1.0 10.0 98093.499962 6336.399757
std 0.0 0.0 180013.864277 14375.832154
min 1.0 10.0 19.000371 0.201609
25% 1.0 10.0 1762.068645 59.924702
50% 1.0 10.0 11262.677993 412.191656
75% 1.0 10.0 95865.757811 4821.471227
max 1.0 10.0 709935.485128 184002.953476
Param: applicationSize Param: baseStructureSize Param: seed \
count 13213.000000 13213.000000 13213.000000
mean 376.845531 3639.067585 249.678196
std 448.589450 4489.666823 144.238569
min 10.000000 0.000000 0.000000
25% 10.000000 0.000000 125.000000
50% 100.000000 1000.000000 250.000000
75% 1000.000000 10000.000000 375.000000
max 1000.000000 10000.000000 499.000000
Lowest score Threads_best Samples_best ... PAPI_REF_CYC \
count 13213.000000 13213.0 13213.0 ... 1.321300e+04
mean 91757.100205 1.0 10.0 ... 2.108043e+05
std 170388.576568 0.0 0.0 ... 8.068594e+05
min 18.464808 1.0 10.0 ... 1.764000e+03
25% 1667.443682 1.0 10.0 ... 3.348000e+03
50% 10817.282360 1.0 10.0 ... 2.264400e+04
75% 88351.754534 1.0 10.0 ... 1.777140e+05
max 695996.818472 1.0 10.0 ... 1.558566e+07
PAPI_SP_OPS PAPI_SR_INS PAPI_STL_ICY PAPI_TLB_DM PAPI_TLB_IM \
count 13213.0 1.321300e+04 13213.000000 13213.000000 13213.000000
mean 0.0 4.506547e+04 7158.992772 12.425641 6.198517
std 0.0 1.830012e+05 12966.289900 19.535496 2.613468
min 0.0 3.170000e+02 86.000000 0.000000 5.000000
25% 0.0 4.900000e+02 307.000000 2.000000 5.000000
50% 0.0 3.916000e+03 1361.000000 5.000000 5.000000
75% 0.0 3.377500e+04 8144.500000 16.000000 6.000000
max 0.0 3.766786e+06 308983.500000 685.000000 27.000000
PAPI_TOT_CYC PAPI_TOT_INS PAPI_VEC_DP PAPI_VEC_SP
count 1.321300e+04 1.321300e+04 13213.0 13213.0
mean 2.220744e+05 5.988283e+05 0.0 0.0
std 8.479012e+05 2.771842e+06 0.0 0.0
min 1.858500e+03 2.297000e+03 0.0 0.0
25% 3.499500e+03 3.518000e+03 0.0 0.0
50% 2.373650e+04 3.289300e+04 0.0 0.0
75% 1.875485e+05 2.945730e+05 0.0 0.0
max 1.695867e+07 5.883289e+07 0.0 0.0
[8 rows x 101 columns]
 
%% Cell type:code id: tags:
 
``` python
# We are only interested in the "key" (selected columns) and the lowest score of the baseline.
# The rest is not interesting.
columns_to_keep = selected_jmh_columns + ["Lowest score"]
benchmark_baseline = benchmark_baseline.get(columns_to_keep)
# We merge that with the benchmark data.
bench_with_baseline = pd.merge(benchmark_data, benchmark_baseline, on=selected_jmh_columns, suffixes=("", "_baseline"))
```
 
%% Cell type:code id: tags:
 
``` python
baseline_speedup = (bench_with_baseline["Lowest score"] / bench_with_baseline["Lowest score_baseline"])
bench_with_baseline["Speedup over baseline"] = baseline_speedup
```
 
%% Cell type:code id: tags:
 
``` python
# We export the data, to be able to process it with other tools:
bench_with_baseline.to_csv("training_data.csv")
```
 
%% Cell type:markdown id: tags:
 
# Plotting the mean improvement
 
%% Cell type:code id: tags:
 
``` python
methods_data = bench_with_baseline.get(data["software_selected_columns"])
to_plot = pd.DataFrame()
# Most common method name.
to_plot["Method name"] = methods_data.transpose().idxmax()
to_plot["Collection class"] = bench_with_baseline["Param: datastructureName"]
to_plot["Speedup"] = bench_with_baseline["Speedup over baseline"]
to_plot["Benchmark"] = bench_with_baseline["Benchmark"]
 
to_plot.to_csv("to_plot.csv")
```
 
%% Cell type:code id: tags:
 
``` python
for (label, group) in to_plot.groupby("Benchmark"):
# to_plot["Method class"] = to_plot["most common method"].apply(lambda m: method_classes[m])
fig = plt.figure(figsize=(10,5))
data_without_baseline = group[ ~ group["Collection class"].isin(most_common_collection)]
# sns.boxplot(data=data_without_baseline, y="Speedup", x="Method name", hue="Collection class")
sns.stripplot(data=data_without_baseline, y="Speedup", x="Method name", hue="Collection class",
dodge=True, jitter=True, alpha=0.25)
fig.autofmt_xdate()
plt.yscale("log")
fig.savefig("methods_and_improvement_{0}.png".format(labels_to_readable[label]))
```
 
%% Cell type:markdown id: tags:
 
## Grouping the data
 
Since it makes little sense to compare Apples with Oranges, we group the samples by the `Benchmark`, which serves as a proxy for the type of interface the implementation class was supposed to implement.
 
%% Cell type:code id: tags:
 
``` python
data_grouped = data["data"].groupby("Benchmark")
data_grouped.describe()
```
 
%% Output
 
Threads \
count mean std min
Benchmark
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 13500.0 1.0 0.0 1.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 12645.0 1.0 0.0 1.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 13498.0 1.0 0.0 1.0
\
25% 50% 75% max
Benchmark
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 1.0 1.0 1.0 1.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 1.0 1.0 1.0 1.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 1.0 1.0 1.0 1.0
Samples ... \
count mean ...
Benchmark ...
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 13500.0 10.0 ...
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 12645.0 10.0 ...
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 13498.0 10.0 ...
PAPI_VEC_DP \
75% max
Benchmark
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 0.0 0.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 0.0 0.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 0.0 0.0
PAPI_VEC_SP \
count mean std min
Benchmark
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 13500.0 0.0 0.0 0.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 12645.0 0.0 0.0 0.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 13498.0 0.0 0.0 0.0
25% 50% 75% max
Benchmark
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 0.0 0.0 0.0 0.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 0.0 0.0 0.0 0.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 0.0 0.0 0.0 0.0
[3 rows x 808 columns]
 
%% Cell type:markdown id: tags:
 
# How do our population looks like?
 
So far, we do not know how the population of applications looks like.
We have a few questions:
 
- Are all methods run?
- Which methods are run together?
 
%% Cell type:markdown id: tags:
 
# Looking at the results
 
Here we look at what the results are for our benchmarks, trying to understand the relationships between collection classes.
 
%% Cell type:code id: tags:
 
``` python
# For each method, what is the data structure which won most times?
 
to_plot = pd.DataFrame()
 
for (label, group) in data_grouped:
for method_name in data["software_selected_columns"]:
# We could select the apps that have at least one
# Call to the method, but that seems a low bar to pass
selected_apps = group[group[method_name] > 0]
if (selected_apps.shape[0] == 0):
continue
winning_collection = selected_apps["Param: datastructureName_best"].mode()[0]
mean_ratio = selected_apps["Ratio improvement"].mean()
to_plot = to_plot.append({ "benchmark" : label,
"method" : method_name,
"best collection" : winning_collection,
"mean ratio" : mean_ratio },
ignore_index=True)
 
#to_plot
plt.figure(figsize=(10, 30))
sns.catplot(data=to_plot, y="method", x="mean ratio", hue="best collection")
```
 
%% Output
 
<seaborn.axisgrid.FacetGrid at 0x7fd2d186fcd0>
 
 
 
%% Cell type:markdown id: tags:
 
The plot above depicts the mean ratio of improvement, for applications that had at least one call to the method on the y axis, with the type of the datastructure that was the fastest in most cases.
 
Selecting an application because it had at least *one* call is a bit low, because most of the application might be spent somewhere else (which might explain why `LinkedHashSet` has such a high rate of success).
 
We will try to do the same plot for applications where the method in y is the method *most common* in the application (this is actually easier to compute).
 
%% Cell type:markdown id: tags:
 
We want to have three columns in the dataframe we will plot:
 
- Most common method in application
- Best collection class
- Ratio of improvement
 
%% Cell type:markdown id: tags:
 
The plots above show the ratio of improvement as a function of the most common method in the application for each collection class.
 
We can also summarize this data to get only the mean ratio of improvement.
 
%% Cell type:code id: tags:
 
``` python
import numpy
 
ratios_table = to_plot.groupby(["Benchmark", "Method name", "Best collection class"]).agg([numpy.mean, numpy.var, numpy.count_nonzero]).reset_index()
ratios_table
```
 
%% Output
 
Benchmark \
0 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
1 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
2 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
3 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
4 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
5 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
6 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
7 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
8 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
9 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
10 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
11 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
12 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
13 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
14 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
15 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
16 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
17 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
18 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
19 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
20 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
21 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
22 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
23 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
24 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
25 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
26 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
27 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
28 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
29 se.lth.cs.jmh.ListApplicationBenchmark.ListApp...
.. ...
131 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
132 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
133 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
134 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
135 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
136 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
137 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
138 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
139 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
140 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
141 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
142 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
143 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
144 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
145 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
146 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
147 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
148 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
149 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
150 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
151 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
152 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
153 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
154 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
155 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
156 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
157 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
158 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
159 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
160 se.lth.cs.jmh.SetApplicationBenchmark.SetAppli...
Method name Best collection class Improvement ratio \
mean
0 add(Object) ArrayList 1.440029
1 add(Object) LinkedList 1.028072
2 add(Object) Vector 1.357749
3 add(int, Object) ArrayList 1.463486
4 add(int, Object) LinkedList 1.021634
5 add(int, Object) Vector 1.444071
6 addAll(Collection) ArrayList 1.393847
7 addAll(Collection) Vector 1.367315
8 addAll(int, Collection) ArrayList 1.415510
9 addAll(int, Collection) Vector 1.476533
10 clear() ArrayList 1.504171
11 clear() LinkedList 1.020784
12 clear() Vector 1.705206
13 contains(Object) ArrayList 1.397852
14 contains(Object) LinkedList 1.018017
15 contains(Object) Vector 1.319226
16 containsAll(Collection) ArrayList 1.432509
17 containsAll(Collection) LinkedList 1.019269
18 containsAll(Collection) Vector 1.273349
19 equals(Object) ArrayList 1.419942
20 equals(Object) LinkedList 1.024466
21 equals(Object) Vector 1.462853
22 get(Object) ArrayList 1.432147
23 get(Object) LinkedList 1.014496
24 get(Object) Vector 1.365438
25 hashCode() ArrayList 1.435411
26 hashCode() LinkedList 1.019209
27 hashCode() Vector 1.195675
28 indexOf(Object) ArrayList 1.436558
29 indexOf(Object) LinkedList 1.021378
.. ... ... ...
131 equals(Object) HashSet 1.080062
132 equals(Object) LinkedHashSet 2.056383
133 equals(Object) TreeSet 1.401200
134 hashCode() HashSet 1.098361
135 hashCode() LinkedHashSet 2.176369
136 hashCode() TreeSet 1.251715
137 isEmpty() HashSet 1.119544
138 isEmpty() LinkedHashSet 2.066236
139 isEmpty() TreeSet 1.448485
140 iterator() HashSet 1.131452
141 iterator() LinkedHashSet 1.976893
142 iterator() TreeSet 1.352661
143 remove(Object) HashSet 1.123605
144 remove(Object) LinkedHashSet 1.976829
145 remove(Object) TreeSet 1.534206
146 removeAll(Collection) HashSet 1.150890
147 removeAll(Collection) LinkedHashSet 2.160150
148 removeAll(Collection) TreeSet 1.470462
149 retainAll(Collection) HashSet 1.101031
150 retainAll(Collection) LinkedHashSet 1.906250
151 retainAll(Collection) TreeSet 2.151176
152 size() HashSet 1.068096
153 size() LinkedHashSet 1.909949
154 size() TreeSet 2.314495
155 toArray() HashSet 1.046435
156 toArray() LinkedHashSet 2.370237
157 toArray() TreeSet 1.839251
158 toArray(Object[]) HashSet 1.025195
159 toArray(Object[]) LinkedHashSet 2.381420
160 toArray(Object[]) TreeSet 1.808265
var count_nonzero
0 0.464894 558.0
1 0.000782 18.0
2 0.544729 63.0
3 0.495266 585.0
4 0.000783 12.0
5 0.733571 60.0
6 0.340083 483.0
7 0.704825 48.0
8 0.316189 417.0
9 1.154016 51.0
10 0.602896 573.0
11 0.000673 24.0
12 2.258017 78.0
13 0.345941 486.0
14 0.000780 27.0
15 0.722835 45.0
16 0.352992 594.0
17 0.000960 6.0
18 0.269239 21.0
19 0.359556 417.0
20 0.000691 24.0
21 1.302057 45.0
22 0.554104 651.0
23 0.000276 18.0
24 0.853023 51.0
25 0.341852 636.0
26 0.000335 18.0
27 0.126291 39.0
28 0.346075 474.0
29 0.001302 15.0
.. ... ...
131 0.045753 120.0
132 2.123022 828.0
133 2.061297 69.0
134 0.038900 93.0
135 3.347090 783.0
136 0.879821 78.0
137 0.066320 108.0
138 3.030179 657.0
139 1.563867 81.0
140 0.095389 87.0
141 2.343467 801.0
142 0.956355 48.0
143 0.074098 126.0
144 2.352756 732.0
145 2.187963 104.0
146 0.084349 126.0
147 5.887269 666.0
148 1.654195 63.0
149 0.045716 84.0
150 2.716351 558.0
151 43.965773 51.0
152 0.018389 72.0
153 2.642967 644.0
154 21.872594 84.0
155 0.006587 45.0
156 4.874569 666.0
157 10.986323 63.0
158 0.000881 39.0
159 5.687371 672.0
160 11.289928 54.0
[161 rows x 6 columns]
 
%% Cell type:code id: tags:
 
``` python
sns.set()
for (l, g) in ratios_table.groupby("Benchmark"):
plot = sns.relplot(data=g, x="Method name", hue="Best collection class",
y=("Improvement ratio", "mean"), size=("Improvement ratio", "count_nonzero"))
plot.fig.set_size_inches(15,5)
plot.fig.set_dpi(150)
plot.fig.autofmt_xdate()
plot.set_axis_labels("Method name", "Mean improvement ratio")
plot.savefig("methods_and_improvement_{0}.svg".format(labels_to_readable[l]))
```
 
%% Cell type:markdown id: tags:
 
Since we care about the interactions of methods, we want to make a heatmap.
 
I want this heatmap to show me which interactions are the "best" for the data-structure.
Several choices:
 
- For each couple m1 m2: The data structure that had the most fastest runs. Only one map, but no notion of ratio
- For each couple m1 m2: The average improvement ratio (which then doesn't consider all the cases where the datastructure didn't win).
 
The two axes of the heatmap are method names, the value in each cell is the average improvement ratio when method involves i and j.
 
%% Cell type:code id: tags:
 
``` python
import numpy
 
for (label, group) in data["data"].groupby("Param: datastructureName_best"):
features = group.get(data['software_selected_columns'])
# We only select columns where there is any non zero value
features = features.loc[:, (features != 0).any(axis=0)]
# We need some way to relate the values with themselves.
# What we want is just a matrix multiplication with values in [0,1]
# This would count co-occurences of methods in run.
features_bool = (features > 0) * 1
# We multiply the values in 0 1 by the ratio of improvement for the samples
features_with_ratio = features_bool.mul(group.get("Ratio improvement"), axis=0)
 
# features_bool = features_bool.multiply(group.get("Sample weight"), axis=0)
heatmap = features_with_ratio.transpose().dot(features_bool)
heatmap2 = features_bool.transpose().dot(features_bool)
fig = plt.figure(figsize=(10,10))
plt.title(label)
sns.heatmap(heatmap / heatmap2)
fig.savefig("heatmap_{0}.svg".format(label.lower()), bbox_inches="tight")
```
 
%% Output
 
 
 
 
 
 
 
 
 
 
%% Cell type:markdown id: tags:
 
I was confused by the heat maps, because they seemed to indicate that ArrayList wasn't good at `sort`. It is, the only difference is that it's not *much better* than others. Better in most cases, but when it is, it's not much much faster.
 
%% Cell type:code id: tags:
 
``` python
# It seems that sort doesn't work that well for arraylists, let's check that.
# Let's select the samples with a sort in them
sorted_apps = data["data"][data["data"]["sort(Comparator)"] > 0]
print(sorted_apps["Ratio improvement"].mean())
sns.catplot(data=sorted_apps, x="Param: datastructureName_best", y="Ratio improvement")
```
 
%% Output
 
1.3643675556916048
 
<seaborn.axisgrid.FacetGrid at 0x7fd2d852fc50>
 
 
%% Cell type:markdown id: tags:
 
I am confused by the heatmaps, so I will try to get the list of methods, and for each method, the sum of times it won, with the average ratio.
 
%% Cell type:markdown id: tags:
 
# Wait
 
I never thought about plotting the *most obvious plot* ever. The plot of average times for each method, for each data structrure!
 
%% Cell type:code id: tags:
 
``` python
to_plot = pd.DataFrame()
 
for (label, group) in data_grouped:
for method_name in data["software_selected_columns"]:
selected_apps = group[group[method_name] > 0]
if (selected_apps.shape[0] == 0):
continue
winning_collection = selected_apps["Param: datastructureName_best"].mode()[0]
mean_ratio = selected_apps["Ratio improvement"].mean()
to_plot = to_plot.append({ "benchmark" : label,
"method" : method_name,
"best collection" : winning_collection,
"mean ratio" : mean_ratio },
ignore_index=True)
 
#to_plot
plt.figure(figsize=(10, 30))
sns.catplot(data=to_plot, y="method", x="mean ratio", hue="best collection", col="benchmark")
```
 
%% Cell type:markdown id: tags:
 
# LinkedHashMaps' hashCode() mystery
 
I do not know why LinkedHashMap "lose" a lot when using hashCode().
The implementation of hashCode() seems to be stricly the same as the one from HashMap.
 
The hashCode implementation uses the hashCodes of all entries, but these are computed the same as in a HashMap.
 
What do the application that use hashCode look like for maps?
 
- I have looked and they didn't seem biased.
 
%% Cell type:code id: tags:
 
``` python
# We get all the applications that use hashcode
map_apps = data_grouped.get_group("se.lth.cs.jmh.MapApplicationBenchmark.MapApplicationBenchmark")
hashcode_map_apps = map_apps[map_apps["hashCode()"] > 0].get(data["software_selected_columns"])
hashcode_map_apps = hashcode_map_apps.loc[:, (hashcode_map_apps != 0).any(axis=0)]
summed = hashcode_map_apps.sum()
fig = plt.figure()
sns.barplot(x=summed.index, y=summed)
fig.autofmt_xdate()
```
 
%% Output
 
 
%% Cell type:markdown id: tags:
 
## What are the most common winning data structures?
 
We want to know the distribution of "winning" datastructures: the ones that ran the fastest.
To do that, we need to get the counts for each value in the `Param: datastructureName_best` column of the table.
 
%% Cell type:code id: tags:
 
``` python
labels_to_readable = { "se.lth.cs.jmh.ListApplicationBenchmark.ListApplicationBenchmark" : "List",
"se.lth.cs.jmh.MapApplicationBenchmark.MapApplicationBenchmark": "Map",
"se.lth.cs.jmh.SetApplicationBenchmark.SetApplicationBenchmark": "Set" }
 
fig = plt.figure(dpi=150)
sns.countplot(data=data["data"], x="Param: datastructureName_best")
fig.autofmt_xdate()
fig.savefig("winnings_count.svg")
 
# The plots look ugly, don't know why.
# sns.barplot(data=to_plot, x="index", y="Param: datastructureName_best", orient="h", hue="Benchmark")
```
 
%% Output
 
 
%% Cell type:markdown id: tags:
 
We can see here that `ArrayList`, `TreeMap` and `LinkedHashSet` win fairly often. `LinkedHashMap` also gave a fairly good performance, taking almost as many wins as `TreeMap`.
 
The next step is to try to figure out in which cases one or the other wins. I can think of several ways to look at this:
 
- We can try to predict when *x* wins, and see what features are important when it should win, compare to when it loses
- We can try to compare the vectors for applications in some way (or aggregates).
- Monotonicity constraints?
- ???
 
**Warning**: This graph represents *counts*, not "performance". So it does not mean that `LinkedHashMap` performs on average as well as `TreeMap` on a given sample.
 
%% Cell type:code id: tags:
 
``` python
data_grouped_2 = data["data"].groupby(["Benchmark", "Param: applicationSize"])
data_grouped_2.describe()
```
 
%% Output
 
Threads \
count
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 4500.0
100 4500.0
1000 4500.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 3774.0
100 4401.0
1000 4470.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 4498.0
100 4500.0
1000 4500.0
\
mean
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 1.0
100 1.0
1000 1.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 1.0
100 1.0
1000 1.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 1.0
100 1.0
1000 1.0
\
std
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 0.0
100 0.0
1000 0.0
\
min
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 1.0
100 1.0
1000 1.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 1.0
100 1.0
1000 1.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 1.0
100 1.0
1000 1.0
\
25%
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 1.0
100 1.0
1000 1.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 1.0
100 1.0
1000 1.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 1.0
100 1.0
1000 1.0
\
50%
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 1.0
100 1.0
1000 1.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 1.0
100 1.0
1000 1.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 1.0
100 1.0
1000 1.0
\
75%
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 1.0
100 1.0
1000 1.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 1.0
100 1.0
1000 1.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 1.0
100 1.0
1000 1.0
\
max
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 1.0
100 1.0
1000 1.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 1.0
100 1.0
1000 1.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 1.0
100 1.0
1000 1.0
Samples \
count
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 4500.0
100 4500.0
1000 4500.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 3774.0
100 4401.0
1000 4470.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 4498.0
100 4500.0
1000 4500.0
\
mean
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 10.0
100 10.0
1000 10.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 10.0
100 10.0
1000 10.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 10.0
100 10.0
1000 10.0
... \
...
Benchmark Param: applicationSize ...
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 ...
100 ...
1000 ...
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 ...
100 ...
1000 ...
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 ...
100 ...
1000 ...
PAPI_VEC_DP \
75%
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 0.0
100 0.0
1000 0.0
\
max
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 0.0
100 0.0
1000 0.0
PAPI_VEC_SP \
count
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 4500.0
100 4500.0
1000 4500.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 3774.0
100 4401.0
1000 4470.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 4498.0
100 4500.0
1000 4500.0
\
mean
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 0.0
100 0.0
1000 0.0
\
std
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 0.0
100 0.0
1000 0.0
\
min
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 0.0
100 0.0
1000 0.0
\
25%
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 0.0
100 0.0
1000 0.0
\
50%
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 0.0
100 0.0
1000 0.0
\
75%
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 0.0
100 0.0
1000 0.0
max
Benchmark Param: applicationSize
se.lth.cs.jmh.ListApplicationBenchmark.ListAppl... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.MapApplicationBenchmark.MapApplic... 10 0.0
100 0.0
1000 0.0
se.lth.cs.jmh.SetApplicationBenchmark.SetApplic... 10 0.0
100 0.0
1000 0.0
[9 rows x 792 columns]
 
%% Cell type:code id: tags:
 
``` python
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 
lde = LinearDiscriminantAnalysis(n_components=2)
for (label, group) in data_grouped_2:
features = group.get(data['software_selected_columns'])
labels = group.get("Param: datastructureName_best")
group_transformed = lde.fit_transform(features, labels)
plt.figure()
sns.scatterplot(x=group_transformed[:,0], y=group_transformed[:,1], hue=labels)
```
 
%% Output
 
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-61-6647e8072f54> in <module>()
7 group_transformed = lde.fit_transform(features, labels)
8 plt.figure()
----> 9 sns.scatterplot(x=group_transformed[:,0], y=group_transformed[:,1], hue=labels)
 
IndexError: index 1 is out of bounds for axis 1 with size 1
 
 
 
 
%% Cell type:markdown id: tags:
 
### LinkedHashMap /vs/ TreeMap
 
If we want to know why LinkedHashMap wins, and why TreeMap wins.
 
So first, we separate the cluster into two copies, one where the label is `winner == LinkedHashMap` and one where the label is `winner == TreeMap`.
 
%% Cell type:code id: tags:
 
``` python
map_bench_data = data_grouped.get_group("se.lth.cs.jmh.MapApplicationBenchmark.MapApplicationBenchmark")
features = map_bench_data.get(data["software_selected_columns"])
# For each winner, we want:
# 1. To train a classifier to find cases where it wins
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import tree
 
# 2. To get the importance of the features used to differentiate the wins.
for winner in ["LinkedHashMap", "TreeMap"]:
labels = map_bench_data["Param: datastructureName_best"] == winner
classifier = RandomForestClassifier()
classifier.fit(features, labels)
print(classifier.score(features, labels))
 
# We want the most important features
feature_importances = pd.DataFrame()
feature_importances["Feature"] = features.columns
feature_importances["Importance"] = classifier.feature_importances_
feature_importances = feature_importances.sort_values(by="Importance", ascending=False)
feature_importances = feature_importances[feature_importances["Importance"] > 0]
plt.figure(figsize=(5,10))
sns.barplot(x = feature_importances["Importance"], y = feature_importances["Feature"])
```
 
%% Output
 
0.8132858837485172
0.8310794780545671
 
 
 
%% Cell type:markdown id: tags:
 
... Hum.
 
This result seems a but hard to analyse. It seems like the presence of a call to `clear()` plays a big role. Not sure why. Maybe we could try to take a look at the number of times `clear()` is called?
 
%% Cell type:code id: tags:
 
``` python
features.sum().sort_values(ascending=False)
```
 
%% Output
 
containsValue(Object) 381585
size() 379881
get(Object) 359811
containsKey(Object) 358788
hashCode() 356790
clear() 355989
values() 354153
entrySet() 352980
equals(Object) 351840
putAll(Map) 348393
keySet() 346110
put(Object, Object) 341682
remove(int) 333330
isEmpty() 326508
remove(Object) 0
sort(Comparator) 0
add(int, Object) 0
addAll(Collection) 0
addAll(int, Collection) 0
toArray() 0
contains(Object) 0
containsAll(Collection) 0
subList(int, int) 0
retainAll(Collection) 0
set(int, Object) 0
removeAll(Collection) 0
indexOf(Object) 0
iterator() 0
toArray(Object[]) 0
lastIndexOf(Object) 0
listIterator() 0
listIterator(int) 0
add(Object) 0
dtype: int64
 
%% Cell type:markdown id: tags:
 
## What are the most important features
 
Try to visualize the most important features here!
 
%% Cell type:code id: tags:
 
``` python
from pickle import load
# We load the model that we have trained and generated
model = load(open("jbrainy-classifier.pickle", 'r'))
```
 
%% Cell type:markdown id: tags:
 
Problem, the raw data that I have is not the same as the data the classifier was trained on. There have been several transformations!
 
1. The data has been normalized, with PAPI_TOT_INS for the HW performance counters, and with ApplicationSize for the SW performance counters
2. We added the data structures to the table (one hot encoding)
3. We used a poly transformer to get the polynomial features (good news, this one can give me the names of the features!)
4. The features have been "standard scaled": `(x - u) / s` where `u` is the average and `s` is the standard deviation
 
Several ideas: I can save the feature names with the classifier, or I can recover it.
Getting it directly seems like a better idea.
 
%% Cell type:code id: tags:
 
``` python
model.feature_names
```
 
%% Output
 
['1',
'PAPI_BR_CN',
'PAPI_BR_INS',
'PAPI_BR_MSP',
'PAPI_BR_NTK',
'PAPI_BR_PRC',
'PAPI_BR_TKN',
'PAPI_BR_UCN',
'PAPI_DP_OPS',
'PAPI_FDV_INS',
'PAPI_FP_INS',
'PAPI_FP_OPS',
'PAPI_L1_DCM',
'PAPI_L1_ICM',
'PAPI_L1_LDM',
'PAPI_L1_STM',
'PAPI_L1_TCM',
'PAPI_L2_DCA',
'PAPI_L2_DCH',
'PAPI_L2_DCM',
'PAPI_L2_DCR',
'PAPI_L2_DCW',
'PAPI_L2_ICA',
'PAPI_L2_ICH',
'PAPI_L2_ICM',
'PAPI_L2_ICR',
'PAPI_L2_STM',
'PAPI_L2_TCA',
'PAPI_L2_TCM',
'PAPI_L2_TCR',
'PAPI_L2_TCW',
'PAPI_L3_DCA',
'PAPI_L3_DCR',
'PAPI_L3_DCW',
'PAPI_L3_ICA',
'PAPI_L3_ICR',
'PAPI_L3_TCA',
'PAPI_L3_TCM',
'PAPI_L3_TCR',
'PAPI_L3_TCW',
'PAPI_LD_INS',
'PAPI_REF_CYC',
'PAPI_SP_OPS',
'PAPI_SR_INS',
'PAPI_STL_ICY',
'PAPI_TLB_DM',
'PAPI_TLB_IM',
'PAPI_TOT_CYC',
'PAPI_TOT_INS',
'PAPI_VEC_DP',
'PAPI_VEC_SP',
'add(Object)',
'add(int, Object)',
'addAll(Collection)',
'addAll(int, Collection)',
'clear()',
'contains(Object)',
'containsAll(Collection)',
'containsKey(Object)',
'containsValue(Object)',
'entrySet()',
'equals(Object)',
'get(Object)',
'hashCode()',
'indexOf(Object)',
'isEmpty()',
'iterator()',
'keySet()',
'lastIndexOf(Object)',
'listIterator()',
'listIterator(int)',
'put(Object, Object)',
'putAll(Map)',
'remove(Object)',
'remove(int)',
'removeAll(Collection)',
'retainAll(Collection)',
'set(int, Object)',
'size()',
'sort(Comparator)',
'subList(int, int)',
'toArray()',
'toArray(Object[])',
'values()',
'ArrayList',
'HashMap',
'HashSet',
'LinkedHashMap',
'LinkedHashSet',
'LinkedList',
'TreeMap',
'TreeSet',
'Vector',
'PAPI_BR_CN^2',
'PAPI_BR_CN PAPI_BR_INS',
'PAPI_BR_CN PAPI_BR_MSP',
'PAPI_BR_CN PAPI_BR_NTK',
'PAPI_BR_CN PAPI_BR_PRC',
'PAPI_BR_CN PAPI_BR_TKN',
'PAPI_BR_CN PAPI_BR_UCN',
'PAPI_BR_CN PAPI_DP_OPS',
'PAPI_BR_CN PAPI_FDV_INS',
'PAPI_BR_CN PAPI_FP_INS',
'PAPI_BR_CN PAPI_FP_OPS',
'PAPI_BR_CN PAPI_L1_DCM',
'PAPI_BR_CN PAPI_L1_ICM',
'PAPI_BR_CN PAPI_L1_LDM',
'PAPI_BR_CN PAPI_L1_STM',
'PAPI_BR_CN PAPI_L1_TCM',
'PAPI_BR_CN PAPI_L2_DCA',
'PAPI_BR_CN PAPI_L2_DCH',
'PAPI_BR_CN PAPI_L2_DCM',
'PAPI_BR_CN PAPI_L2_DCR',
'PAPI_BR_CN PAPI_L2_DCW',
'PAPI_BR_CN PAPI_L2_ICA',
'PAPI_BR_CN PAPI_L2_ICH',
'PAPI_BR_CN PAPI_L2_ICM',
'PAPI_BR_CN PAPI_L2_ICR',
'PAPI_BR_CN PAPI_L2_STM',
'PAPI_BR_CN PAPI_L2_TCA',
'PAPI_BR_CN PAPI_L2_TCM',
'PAPI_BR_CN PAPI_L2_TCR',
'PAPI_BR_CN PAPI_L2_TCW',
'PAPI_BR_CN PAPI_L3_DCA',
'PAPI_BR_CN PAPI_L3_DCR',
'PAPI_BR_CN PAPI_L3_DCW',
'PAPI_BR_CN PAPI_L3_ICA',
'PAPI_BR_CN PAPI_L3_ICR',
'PAPI_BR_CN PAPI_L3_TCA',
'PAPI_BR_CN PAPI_L3_TCM',
'PAPI_BR_CN PAPI_L3_TCR',
'PAPI_BR_CN PAPI_L3_TCW',
'PAPI_BR_CN PAPI_LD_INS',
'PAPI_BR_CN PAPI_REF_CYC',
'PAPI_BR_CN PAPI_SP_OPS',
'PAPI_BR_CN PAPI_SR_INS',
'PAPI_BR_CN PAPI_STL_ICY',
'PAPI_BR_CN PAPI_TLB_DM',
'PAPI_BR_CN PAPI_TLB_IM',
'PAPI_BR_CN PAPI_TOT_CYC',
'PAPI_BR_CN PAPI_TOT_INS',
'PAPI_BR_CN PAPI_VEC_DP',
'PAPI_BR_CN PAPI_VEC_SP',
'PAPI_BR_CN add(Object)',
'PAPI_BR_CN add(int, Object)',
'PAPI_BR_CN addAll(Collection)',
'PAPI_BR_CN addAll(int, Collection)',
'PAPI_BR_CN clear()',
'PAPI_BR_CN contains(Object)',
'PAPI_BR_CN containsAll(Collection)',
'PAPI_BR_CN containsKey(Object)',
'PAPI_BR_CN containsValue(Object)',
'PAPI_BR_CN entrySet()',
'PAPI_BR_CN equals(Object)',
'PAPI_BR_CN get(Object)',
'PAPI_BR_CN hashCode()',
'PAPI_BR_CN indexOf(Object)',
'PAPI_BR_CN isEmpty()',
'PAPI_BR_CN iterator()',
'PAPI_BR_CN keySet()',
'PAPI_BR_CN lastIndexOf(Object)',
'PAPI_BR_CN listIterator()',
'PAPI_BR_CN listIterator(int)',
'PAPI_BR_CN put(Object, Object)',
'PAPI_BR_CN putAll(Map)',
'PAPI_BR_CN remove(Object)',
'PAPI_BR_CN remove(int)',
'PAPI_BR_CN removeAll(Collection)',
'PAPI_BR_CN retainAll(Collection)',
'PAPI_BR_CN set(int, Object)',
'PAPI_BR_CN size()',
'PAPI_BR_CN sort(Comparator)',
'PAPI_BR_CN subList(int, int)',
'PAPI_BR_CN toArray()',
'PAPI_BR_CN toArray(Object[])',
'PAPI_BR_CN values()',
'PAPI_BR_CN ArrayList',
'PAPI_BR_CN HashMap',
'PAPI_BR_CN HashSet',
'PAPI_BR_CN LinkedHashMap',
'PAPI_BR_CN LinkedHashSet',
'PAPI_BR_CN LinkedList',
'PAPI_BR_CN TreeMap',
'PAPI_BR_CN TreeSet',
'PAPI_BR_CN Vector',
'PAPI_BR_INS^2',
'PAPI_BR_INS PAPI_BR_MSP',
'PAPI_BR_INS PAPI_BR_NTK',
'PAPI_BR_INS PAPI_BR_PRC',
'PAPI_BR_INS PAPI_BR_TKN',
'PAPI_BR_INS PAPI_BR_UCN',
'PAPI_BR_INS PAPI_DP_OPS',
'PAPI_BR_INS PAPI_FDV_INS',
'PAPI_BR_INS PAPI_FP_INS',
'PAPI_BR_INS PAPI_FP_OPS',
'PAPI_BR_INS PAPI_L1_DCM',
'PAPI_BR_INS PAPI_L1_ICM',
'PAPI_BR_INS PAPI_L1_LDM',
'PAPI_BR_INS PAPI_L1_STM',
'PAPI_BR_INS PAPI_L1_TCM',
'PAPI_BR_INS PAPI_L2_DCA',
'PAPI_BR_INS PAPI_L2_DCH',
'PAPI_BR_INS PAPI_L2_DCM',
'PAPI_BR_INS PAPI_L2_DCR',
'PAPI_BR_INS PAPI_L2_DCW',
'PAPI_BR_INS PAPI_L2_ICA',
'PAPI_BR_INS PAPI_L2_ICH',
'PAPI_BR_INS PAPI_L2_ICM',
'PAPI_BR_INS PAPI_L2_ICR',
'PAPI_BR_INS PAPI_L2_STM',
'PAPI_BR_INS PAPI_L2_TCA',
'PAPI_BR_INS PAPI_L2_TCM',
'PAPI_BR_INS PAPI_L2_TCR',
'PAPI_BR_INS PAPI_L2_TCW',
'PAPI_BR_INS PAPI_L3_DCA',
'PAPI_BR_INS PAPI_L3_DCR',
'PAPI_BR_INS PAPI_L3_DCW',
'PAPI_BR_INS PAPI_L3_ICA',
'PAPI_BR_INS PAPI_L3_ICR',
'PAPI_BR_INS PAPI_L3_TCA',
'PAPI_BR_INS PAPI_L3_TCM',
'PAPI_BR_INS PAPI_L3_TCR',
'PAPI_BR_INS PAPI_L3_TCW',
'PAPI_BR_INS PAPI_LD_INS',
'PAPI_BR_INS PAPI_REF_CYC',
'PAPI_BR_INS PAPI_SP_OPS',
'PAPI_BR_INS PAPI_SR_INS',
'PAPI_BR_INS PAPI_STL_ICY',
'PAPI_BR_INS PAPI_TLB_DM',
'PAPI_BR_INS PAPI_TLB_IM',
'PAPI_BR_INS PAPI_TOT_CYC',
'PAPI_BR_INS PAPI_TOT_INS',
'PAPI_BR_INS PAPI_VEC_DP',
'PAPI_BR_INS PAPI_VEC_SP',
'PAPI_BR_INS add(Object)',
'PAPI_BR_INS add(int, Object)',
'PAPI_BR_INS addAll(Collection)',
'PAPI_BR_INS addAll(int, Collection)',
'PAPI_BR_INS clear()',
'PAPI_BR_INS contains(Object)',
'PAPI_BR_INS containsAll(Collection)',
'PAPI_BR_INS containsKey(Object)',
'PAPI_BR_INS containsValue(Object)',
'PAPI_BR_INS entrySet()',
'PAPI_BR_INS equals(Object)',
'PAPI_BR_INS get(Object)',
'PAPI_BR_INS hashCode()',
'PAPI_BR_INS indexOf(Object)',
'PAPI_BR_INS isEmpty()',
'PAPI_BR_INS iterator()',
'PAPI_BR_INS keySet()',
'PAPI_BR_INS lastIndexOf(Object)',
'PAPI_BR_INS listIterator()',
'PAPI_BR_INS listIterator(int)',
'PAPI_BR_INS put(Object, Object)',
'PAPI_BR_INS putAll(Map)',
'PAPI_BR_INS remove(Object)',
'PAPI_BR_INS remove(int)',
'PAPI_BR_INS removeAll(Collection)',
'PAPI_BR_INS retainAll(Collection)',
'PAPI_BR_INS set(int, Object)',
'PAPI_BR_INS size()',
'PAPI_BR_INS sort(Comparator)',
'PAPI_BR_INS subList(int, int)',
'PAPI_BR_INS toArray()',
'PAPI_BR_INS toArray(Object[])',
'PAPI_BR_INS values()',
'PAPI_BR_INS ArrayList',
'PAPI_BR_INS HashMap',
'PAPI_BR_INS HashSet',
'PAPI_BR_INS LinkedHashMap',
'PAPI_BR_INS LinkedHashSet',
'PAPI_BR_INS LinkedList',
'PAPI_BR_INS TreeMap',
'PAPI_BR_INS TreeSet',
'PAPI_BR_INS Vector',
'PAPI_BR_MSP^2',
'PAPI_BR_MSP PAPI_BR_NTK',
'PAPI_BR_MSP PAPI_BR_PRC',
'PAPI_BR_MSP PAPI_BR_TKN',
'PAPI_BR_MSP PAPI_BR_UCN',
'PAPI_BR_MSP PAPI_DP_OPS',
'PAPI_BR_MSP PAPI_FDV_INS',
'PAPI_BR_MSP PAPI_FP_INS',
'PAPI_BR_MSP PAPI_FP_OPS',
'PAPI_BR_MSP PAPI_L1_DCM',
'PAPI_BR_MSP PAPI_L1_ICM',
'PAPI_BR_MSP PAPI_L1_LDM',
'PAPI_BR_MSP PAPI_L1_STM',
'PAPI_BR_MSP PAPI_L1_TCM',
'PAPI_BR_MSP PAPI_L2_DCA',
'PAPI_BR_MSP PAPI_L2_DCH',
'PAPI_BR_MSP PAPI_L2_DCM',
'PAPI_BR_MSP PAPI_L2_DCR',
'PAPI_BR_MSP PAPI_L2_DCW',
'PAPI_BR_MSP PAPI_L2_ICA',
'PAPI_BR_MSP PAPI_L2_ICH',
'PAPI_BR_MSP PAPI_L2_ICM',
'PAPI_BR_MSP PAPI_L2_ICR',
'PAPI_BR_MSP PAPI_L2_STM',
'PAPI_BR_MSP PAPI_L2_TCA',
'PAPI_BR_MSP PAPI_L2_TCM',
'PAPI_BR_MSP PAPI_L2_TCR',
'PAPI_BR_MSP PAPI_L2_TCW',
'PAPI_BR_MSP PAPI_L3_DCA',
'PAPI_BR_MSP PAPI_L3_DCR',
'PAPI_BR_MSP PAPI_L3_DCW',
'PAPI_BR_MSP PAPI_L3_ICA',
'PAPI_BR_MSP PAPI_L3_ICR',
'PAPI_BR_MSP PAPI_L3_TCA',
'PAPI_BR_MSP PAPI_L3_TCM',
'PAPI_BR_MSP PAPI_L3_TCR',
'PAPI_BR_MSP PAPI_L3_TCW',
'PAPI_BR_MSP PAPI_LD_INS',
'PAPI_BR_MSP PAPI_REF_CYC',
'PAPI_BR_MSP PAPI_SP_OPS',
'PAPI_BR_MSP PAPI_SR_INS',
'PAPI_BR_MSP PAPI_STL_ICY',
'PAPI_BR_MSP PAPI_TLB_DM',
'PAPI_BR_MSP PAPI_TLB_IM',
'PAPI_BR_MSP PAPI_TOT_CYC',
'PAPI_BR_MSP PAPI_TOT_INS',
'PAPI_BR_MSP PAPI_VEC_DP',
'PAPI_BR_MSP PAPI_VEC_SP',
'PAPI_BR_MSP add(Object)',
'PAPI_BR_MSP add(int, Object)',
'PAPI_BR_MSP addAll(Collection)',
'PAPI_BR_MSP addAll(int, Collection)',
'PAPI_BR_MSP clear()',
'PAPI_BR_MSP contains(Object)',
'PAPI_BR_MSP containsAll(Collection)',
'PAPI_BR_MSP containsKey(Object)',
'PAPI_BR_MSP containsValue(Object)',
'PAPI_BR_MSP entrySet()',
'PAPI_BR_MSP equals(Object)',
'PAPI_BR_MSP get(Object)',
'PAPI_BR_MSP hashCode()',
'PAPI_BR_MSP indexOf(Object)',
'PAPI_BR_MSP isEmpty()',
'PAPI_BR_MSP iterator()',
'PAPI_BR_MSP keySet()',
'PAPI_BR_MSP lastIndexOf(Object)',
'PAPI_BR_MSP listIterator()',
'PAPI_BR_MSP listIterator(int)',
'PAPI_BR_MSP put(Object, Object)',
'PAPI_BR_MSP putAll(Map)',
'PAPI_BR_MSP remove(Object)',
'PAPI_BR_MSP remove(int)',
'PAPI_BR_MSP removeAll(Collection)',
'PAPI_BR_MSP retainAll(Collection)',
'PAPI_BR_MSP set(int, Object)',
'PAPI_BR_MSP size()',
'PAPI_BR_MSP sort(Comparator)',
'PAPI_BR_MSP subList(int, int)',
'PAPI_BR_MSP toArray()',
'PAPI_BR_MSP toArray(Object[])',
'PAPI_BR_MSP values()',
'PAPI_BR_MSP ArrayList',
'PAPI_BR_MSP HashMap',
'PAPI_BR_MSP HashSet',
'PAPI_BR_MSP LinkedHashMap',
'PAPI_BR_MSP LinkedHashSet',
'PAPI_BR_MSP LinkedList',
'PAPI_BR_MSP TreeMap',
'PAPI_BR_MSP TreeSet',
'PAPI_BR_MSP Vector',
'PAPI_BR_NTK^2',
'PAPI_BR_NTK PAPI_BR_PRC',
'PAPI_BR_NTK PAPI_BR_TKN',
'PAPI_BR_NTK PAPI_BR_UCN',
'PAPI_BR_NTK PAPI_DP_OPS',
'PAPI_BR_NTK PAPI_FDV_INS',
'PAPI_BR_NTK PAPI_FP_INS',
'PAPI_BR_NTK PAPI_FP_OPS',
'PAPI_BR_NTK PAPI_L1_DCM',
'PAPI_BR_NTK PAPI_L1_ICM',
'PAPI_BR_NTK PAPI_L1_LDM',
'PAPI_BR_NTK PAPI_L1_STM',
'PAPI_BR_NTK PAPI_L1_TCM',
'PAPI_BR_NTK PAPI_L2_DCA',
'PAPI_BR_NTK PAPI_L2_DCH',
'PAPI_BR_NTK PAPI_L2_DCM',
'PAPI_BR_NTK PAPI_L2_DCR',
'PAPI_BR_NTK PAPI_L2_DCW',
'PAPI_BR_NTK PAPI_L2_ICA',
'PAPI_BR_NTK PAPI_L2_ICH',
'PAPI_BR_NTK PAPI_L2_ICM',
'PAPI_BR_NTK PAPI_L2_ICR',
'PAPI_BR_NTK PAPI_L2_STM',
'PAPI_BR_NTK PAPI_L2_TCA',
'PAPI_BR_NTK PAPI_L2_TCM',
'PAPI_BR_NTK PAPI_L2_TCR',
'PAPI_BR_NTK PAPI_L2_TCW',
'PAPI_BR_NTK PAPI_L3_DCA',
'PAPI_BR_NTK PAPI_L3_DCR',
'PAPI_BR_NTK PAPI_L3_DCW',
'PAPI_BR_NTK PAPI_L3_ICA',
'PAPI_BR_NTK PAPI_L3_ICR',
'PAPI_BR_NTK PAPI_L3_TCA',
'PAPI_BR_NTK PAPI_L3_TCM',
'PAPI_BR_NTK PAPI_L3_TCR',
'PAPI_BR_NTK PAPI_L3_TCW',
'PAPI_BR_NTK PAPI_LD_INS',
'PAPI_BR_NTK PAPI_REF_CYC',
'PAPI_BR_NTK PAPI_SP_OPS',
'PAPI_BR_NTK PAPI_SR_INS',
'PAPI_BR_NTK PAPI_STL_ICY',
'PAPI_BR_NTK PAPI_TLB_DM',
'PAPI_BR_NTK PAPI_TLB_IM',
'PAPI_BR_NTK PAPI_TOT_CYC',
'PAPI_BR_NTK PAPI_TOT_INS',
'PAPI_BR_NTK PAPI_VEC_DP',
'PAPI_BR_NTK PAPI_VEC_SP',
'PAPI_BR_NTK add(Object)',
'PAPI_BR_NTK add(int, Object)',
'PAPI_BR_NTK addAll(Collection)',
'PAPI_BR_NTK addAll(int, Collection)',
'PAPI_BR_NTK clear()',
'PAPI_BR_NTK contains(Object)',
'PAPI_BR_NTK containsAll(Collection)',
'PAPI_BR_NTK containsKey(Object)',
'PAPI_BR_NTK containsValue(Object)',
'PAPI_BR_NTK entrySet()',
'PAPI_BR_NTK equals(Object)',
'PAPI_BR_NTK get(Object)',
'PAPI_BR_NTK hashCode()',
'PAPI_BR_NTK indexOf(Object)',
'PAPI_BR_NTK isEmpty()',
'PAPI_BR_NTK iterator()',
'PAPI_BR_NTK keySet()',
'PAPI_BR_NTK lastIndexOf(Object)',
'PAPI_BR_NTK listIterator()',
'PAPI_BR_NTK listIterator(int)',
'PAPI_BR_NTK put(Object, Object)',
'PAPI_BR_NTK putAll(Map)',
'PAPI_BR_NTK remove(Object)',
'PAPI_BR_NTK remove(int)',
'PAPI_BR_NTK removeAll(Collection)',
'PAPI_BR_NTK retainAll(Collection)',
'PAPI_BR_NTK set(int, Object)',
'PAPI_BR_NTK size()',
'PAPI_BR_NTK sort(Comparator)',
'PAPI_BR_NTK subList(int, int)',
'PAPI_BR_NTK toArray()',
'PAPI_BR_NTK toArray(Object[])',
'PAPI_BR_NTK values()',
'PAPI_BR_NTK ArrayList',
'PAPI_BR_NTK HashMap',
'PAPI_BR_NTK HashSet',
'PAPI_BR_NTK LinkedHashMap',
'PAPI_BR_NTK LinkedHashSet',
'PAPI_BR_NTK LinkedList',
'PAPI_BR_NTK TreeMap',
'PAPI_BR_NTK TreeSet',
'PAPI_BR_NTK Vector',
'PAPI_BR_PRC^2',
'PAPI_BR_PRC PAPI_BR_TKN',
'PAPI_BR_PRC PAPI_BR_UCN',
'PAPI_BR_PRC PAPI_DP_OPS',
'PAPI_BR_PRC PAPI_FDV_INS',
'PAPI_BR_PRC PAPI_FP_INS',
'PAPI_BR_PRC PAPI_FP_OPS',
'PAPI_BR_PRC PAPI_L1_DCM',
'PAPI_BR_PRC PAPI_L1_ICM',
'PAPI_BR_PRC PAPI_L1_LDM',
'PAPI_BR_PRC PAPI_L1_STM',
'PAPI_BR_PRC PAPI_L1_TCM',
'PAPI_BR_PRC PAPI_L2_DCA',
'PAPI_BR_PRC PAPI_L2_DCH',
'PAPI_BR_PRC PAPI_L2_DCM',
'PAPI_BR_PRC PAPI_L2_DCR',
'PAPI_BR_PRC PAPI_L2_DCW',
'PAPI_BR_PRC PAPI_L2_ICA',
'PAPI_BR_PRC PAPI_L2_ICH',
'PAPI_BR_PRC PAPI_L2_ICM',
'PAPI_BR_PRC PAPI_L2_ICR',
'PAPI_BR_PRC PAPI_L2_STM',
'PAPI_BR_PRC PAPI_L2_TCA',
'PAPI_BR_PRC PAPI_L2_TCM',
'PAPI_BR_PRC PAPI_L2_TCR',
'PAPI_BR_PRC PAPI_L2_TCW',
'PAPI_BR_PRC PAPI_L3_DCA',
'PAPI_BR_PRC PAPI_L3_DCR',
'PAPI_BR_PRC PAPI_L3_DCW',
'PAPI_BR_PRC PAPI_L3_ICA',
'PAPI_BR_PRC PAPI_L3_ICR',
'PAPI_BR_PRC PAPI_L3_TCA',
'PAPI_BR_PRC PAPI_L3_TCM',
'PAPI_BR_PRC PAPI_L3_TCR',
'PAPI_BR_PRC PAPI_L3_TCW',
'PAPI_BR_PRC PAPI_LD_INS',
'PAPI_BR_PRC PAPI_REF_CYC',
'PAPI_BR_PRC PAPI_SP_OPS',
'PAPI_BR_PRC PAPI_SR_INS',
'PAPI_BR_PRC PAPI_STL_ICY',
'PAPI_BR_PRC PAPI_TLB_DM',
'PAPI_BR_PRC PAPI_TLB_IM',
'PAPI_BR_PRC PAPI_TOT_CYC',
'PAPI_BR_PRC PAPI_TOT_INS',
'PAPI_BR_PRC PAPI_VEC_DP',
'PAPI_BR_PRC PAPI_VEC_SP',
'PAPI_BR_PRC add(Object)',
'PAPI_BR_PRC add(int, Object)',
'PAPI_BR_PRC addAll(Collection)',
'PAPI_BR_PRC addAll(int, Collection)',
'PAPI_BR_PRC clear()',
'PAPI_BR_PRC contains(Object)',
'PAPI_BR_PRC containsAll(Collection)',
'PAPI_BR_PRC containsKey(Object)',
'PAPI_BR_PRC containsValue(Object)',
'PAPI_BR_PRC entrySet()',
'PAPI_BR_PRC equals(Object)',
'PAPI_BR_PRC get(Object)',
'PAPI_BR_PRC hashCode()',
'PAPI_BR_PRC indexOf(Object)',
'PAPI_BR_PRC isEmpty()',
'PAPI_BR_PRC iterator()',
'PAPI_BR_PRC keySet()',
'PAPI_BR_PRC lastIndexOf(Object)',
'PAPI_BR_PRC listIterator()',
'PAPI_BR_PRC listIterator(int)',
'PAPI_BR_PRC put(Object, Object)',
'PAPI_BR_PRC putAll(Map)',
'PAPI_BR_PRC remove(Object)',
'PAPI_BR_PRC remove(int)',
'PAPI_BR_PRC removeAll(Collection)',
'PAPI_BR_PRC retainAll(Collection)',
'PAPI_BR_PRC set(int, Object)',
'PAPI_BR_PRC size()',
'PAPI_BR_PRC sort(Comparator)',
'PAPI_BR_PRC subList(int, int)',
'PAPI_BR_PRC toArray()',
'PAPI_BR_PRC toArray(Object[])',
'PAPI_BR_PRC values()',
'PAPI_BR_PRC ArrayList',
'PAPI_BR_PRC HashMap',
'PAPI_BR_PRC HashSet',
'PAPI_BR_PRC LinkedHashMap',
'PAPI_BR_PRC LinkedHashSet',
'PAPI_BR_PRC LinkedList',
'PAPI_BR_PRC TreeMap',
'PAPI_BR_PRC TreeSet',
'PAPI_BR_PRC Vector',
'PAPI_BR_TKN^2',
'PAPI_BR_TKN PAPI_BR_UCN',
'PAPI_BR_TKN PAPI_DP_OPS',
'PAPI_BR_TKN PAPI_FDV_INS',
'PAPI_BR_TKN PAPI_FP_INS',
'PAPI_BR_TKN PAPI_FP_OPS',
'PAPI_BR_TKN PAPI_L1_DCM',
'PAPI_BR_TKN PAPI_L1_ICM',
'PAPI_BR_TKN PAPI_L1_LDM',
'PAPI_BR_TKN PAPI_L1_STM',
'PAPI_BR_TKN PAPI_L1_TCM',
'PAPI_BR_TKN PAPI_L2_DCA',
'PAPI_BR_TKN PAPI_L2_DCH',
'PAPI_BR_TKN PAPI_L2_DCM',
'PAPI_BR_TKN PAPI_L2_DCR',
'PAPI_BR_TKN PAPI_L2_DCW',
'PAPI_BR_TKN PAPI_L2_ICA',
'PAPI_BR_TKN PAPI_L2_ICH',
'PAPI_BR_TKN PAPI_L2_ICM',
'PAPI_BR_TKN PAPI_L2_ICR',
'PAPI_BR_TKN PAPI_L2_STM',
'PAPI_BR_TKN PAPI_L2_TCA',
'PAPI_BR_TKN PAPI_L2_TCM',
'PAPI_BR_TKN PAPI_L2_TCR',
'PAPI_BR_TKN PAPI_L2_TCW',
'PAPI_BR_TKN PAPI_L3_DCA',
'PAPI_BR_TKN PAPI_L3_DCR',
'PAPI_BR_TKN PAPI_L3_DCW',
'PAPI_BR_TKN PAPI_L3_ICA',
'PAPI_BR_TKN PAPI_L3_ICR',
'PAPI_BR_TKN PAPI_L3_TCA',
'PAPI_BR_TKN PAPI_L3_TCM',
'PAPI_BR_TKN PAPI_L3_TCR',
'PAPI_BR_TKN PAPI_L3_TCW',
'PAPI_BR_TKN PAPI_LD_INS',
'PAPI_BR_TKN PAPI_REF_CYC',
'PAPI_BR_TKN PAPI_SP_OPS',
'PAPI_BR_TKN PAPI_SR_INS',
'PAPI_BR_TKN PAPI_STL_ICY',
'PAPI_BR_TKN PAPI_TLB_DM',
'PAPI_BR_TKN PAPI_TLB_IM',
'PAPI_BR_TKN PAPI_TOT_CYC',
'PAPI_BR_TKN PAPI_TOT_INS',
'PAPI_BR_TKN PAPI_VEC_DP',
'PAPI_BR_TKN PAPI_VEC_SP',
'PAPI_BR_TKN add(Object)',
'PAPI_BR_TKN add(int, Object)',
'PAPI_BR_TKN addAll(Collection)',
'PAPI_BR_TKN addAll(int, Collection)',
'PAPI_BR_TKN clear()',
'PAPI_BR_TKN contains(Object)',
'PAPI_BR_TKN containsAll(Collection)',
'PAPI_BR_TKN containsKey(Object)',
'PAPI_BR_TKN containsValue(Object)',
'PAPI_BR_TKN entrySet()',
'PAPI_BR_TKN equals(Object)',
'PAPI_BR_TKN get(Object)',
'PAPI_BR_TKN hashCode()',
'PAPI_BR_TKN indexOf(Object)',
'PAPI_BR_TKN isEmpty()',
'PAPI_BR_TKN iterator()',
'PAPI_BR_TKN keySet()',
'PAPI_BR_TKN lastIndexOf(Object)',
'PAPI_BR_TKN listIterator()',
'PAPI_BR_TKN listIterator(int)',
'PAPI_BR_TKN put(Object, Object)',
'PAPI_BR_TKN putAll(Map)',
'PAPI_BR_TKN remove(Object)',
'PAPI_BR_TKN remove(int)',
'PAPI_BR_TKN removeAll(Collection)',
'PAPI_BR_TKN retainAll(Collection)',
'PAPI_BR_TKN set(int, Object)',
'PAPI_BR_TKN size()',
'PAPI_BR_TKN sort(Comparator)',
'PAPI_BR_TKN subList(int, int)',
'PAPI_BR_TKN toArray()',
'PAPI_BR_TKN toArray(Object[])',
'PAPI_BR_TKN values()',
'PAPI_BR_TKN ArrayList',
'PAPI_BR_TKN HashMap',
'PAPI_BR_TKN HashSet',
'PAPI_BR_TKN LinkedHashMap',
'PAPI_BR_TKN LinkedHashSet',
'PAPI_BR_TKN LinkedList',
'PAPI_BR_TKN TreeMap',
'PAPI_BR_TKN TreeSet',
'PAPI_BR_TKN Vector',
'PAPI_BR_UCN^2',
'PAPI_BR_UCN PAPI_DP_OPS',
'PAPI_BR_UCN PAPI_FDV_INS',
'PAPI_BR_UCN PAPI_FP_INS',
'PAPI_BR_UCN PAPI_FP_OPS',
'PAPI_BR_UCN PAPI_L1_DCM',
'PAPI_BR_UCN PAPI_L1_ICM',
'PAPI_BR_UCN PAPI_L1_LDM',
'PAPI_BR_UCN PAPI_L1_STM',
'PAPI_BR_UCN PAPI_L1_TCM',
'PAPI_BR_UCN PAPI_L2_DCA',
'PAPI_BR_UCN PAPI_L2_DCH',
'PAPI_BR_UCN PAPI_L2_DCM',
'PAPI_BR_UCN PAPI_L2_DCR',
'PAPI_BR_UCN PAPI_L2_DCW',
'PAPI_BR_UCN PAPI_L2_ICA',
'PAPI_BR_UCN PAPI_L2_ICH',
'PAPI_BR_UCN PAPI_L2_ICM',
'PAPI_BR_UCN PAPI_L2_ICR',
'PAPI_BR_UCN PAPI_L2_STM',
'PAPI_BR_UCN PAPI_L2_TCA',
'PAPI_BR_UCN PAPI_L2_TCM',
'PAPI_BR_UCN PAPI_L2_TCR',
'PAPI_BR_UCN PAPI_L2_TCW',
'PAPI_BR_UCN PAPI_L3_DCA',
'PAPI_BR_UCN PAPI_L3_DCR',
'PAPI_BR_UCN PAPI_L3_DCW',
'PAPI_BR_UCN PAPI_L3_ICA',
'PAPI_BR_UCN PAPI_L3_ICR',
'PAPI_BR_UCN PAPI_L3_TCA',
'PAPI_BR_UCN PAPI_L3_TCM',
'PAPI_BR_UCN PAPI_L3_TCR',
'PAPI_BR_UCN PAPI_L3_TCW',
'PAPI_BR_UCN PAPI_LD_INS',
'PAPI_BR_UCN PAPI_REF_CYC',
'PAPI_BR_UCN PAPI_SP_OPS',
'PAPI_BR_UCN PAPI_SR_INS',
'PAPI_BR_UCN PAPI_STL_ICY',
'PAPI_BR_UCN PAPI_TLB_DM',
'PAPI_BR_UCN PAPI_TLB_IM',
'PAPI_BR_UCN PAPI_TOT_CYC',
'PAPI_BR_UCN PAPI_TOT_INS',
'PAPI_BR_UCN PAPI_VEC_DP',
'PAPI_BR_UCN PAPI_VEC_SP',
'PAPI_BR_UCN add(Object)',
'PAPI_BR_UCN add(int, Object)',
'PAPI_BR_UCN addAll(Collection)',
'PAPI_BR_UCN addAll(int, Collection)',
'PAPI_BR_UCN clear()',
'PAPI_BR_UCN contains(Object)',
'PAPI_BR_UCN containsAll(Collection)',
'PAPI_BR_UCN containsKey(Object)',
'PAPI_BR_UCN containsValue(Object)',
'PAPI_BR_UCN entrySet()',
'PAPI_BR_UCN equals(Object)',
'PAPI_BR_UCN get(Object)',
'PAPI_BR_UCN hashCode()',
'PAPI_BR_UCN indexOf(Object)',
'PAPI_BR_UCN isEmpty()',
'PAPI_BR_UCN iterator()',
'PAPI_BR_UCN keySet()',
'PAPI_BR_UCN lastIndexOf(Object)',
'PAPI_BR_UCN listIterator()',
'PAPI_BR_UCN listIterator(int)',
'PAPI_BR_UCN put(Object, Object)',
'PAPI_BR_UCN putAll(Map)',
'PAPI_BR_UCN remove(Object)',
'PAPI_BR_UCN remove(int)',
'PAPI_BR_UCN removeAll(Collection)',
'PAPI_BR_UCN retainAll(Collection)',
'PAPI_BR_UCN set(int, Object)',
'PAPI_BR_UCN size()',
'PAPI_BR_UCN sort(Comparator)',
'PAPI_BR_UCN subList(int, int)',
'PAPI_BR_UCN toArray()',
'PAPI_BR_UCN toArray(Object[])',
'PAPI_BR_UCN values()',
'PAPI_BR_UCN ArrayList',
'PAPI_BR_UCN HashMap',
'PAPI_BR_UCN HashSet',
'PAPI_BR_UCN LinkedHashMap',
'PAPI_BR_UCN LinkedHashSet',
'PAPI_BR_UCN LinkedList',
'PAPI_BR_UCN TreeMap',
'PAPI_BR_UCN TreeSet',
'PAPI_BR_UCN Vector',
'PAPI_DP_OPS^2',
'PAPI_DP_OPS PAPI_FDV_INS',
'PAPI_DP_OPS PAPI_FP_INS',
'PAPI_DP_OPS PAPI_FP_OPS',
'PAPI_DP_OPS PAPI_L1_DCM',
'PAPI_DP_OPS PAPI_L1_ICM',
'PAPI_DP_OPS PAPI_L1_LDM',
'PAPI_DP_OPS PAPI_L1_STM',
'PAPI_DP_OPS PAPI_L1_TCM',
'PAPI_DP_OPS PAPI_L2_DCA',
'PAPI_DP_OPS PAPI_L2_DCH',
'PAPI_DP_OPS PAPI_L2_DCM',
'PAPI_DP_OPS PAPI_L2_DCR',
'PAPI_DP_OPS PAPI_L2_DCW',
'PAPI_DP_OPS PAPI_L2_ICA',
'PAPI_DP_OPS PAPI_L2_ICH',
'PAPI_DP_OPS PAPI_L2_ICM',
'PAPI_DP_OPS PAPI_L2_ICR',
'PAPI_DP_OPS PAPI_L2_STM',
'PAPI_DP_OPS PAPI_L2_TCA',
'PAPI_DP_OPS PAPI_L2_TCM',
'PAPI_DP_OPS PAPI_L2_TCR',
'PAPI_DP_OPS PAPI_L2_TCW',
'PAPI_DP_OPS PAPI_L3_DCA',
'PAPI_DP_OPS PAPI_L3_DCR',
'PAPI_DP_OPS PAPI_L3_DCW',
'PAPI_DP_OPS PAPI_L3_ICA',
'PAPI_DP_OPS PAPI_L3_ICR',
'PAPI_DP_OPS PAPI_L3_TCA',
'PAPI_DP_OPS PAPI_L3_TCM',
'PAPI_DP_OPS PAPI_L3_TCR',
'PAPI_DP_OPS PAPI_L3_TCW',
'PAPI_DP_OPS PAPI_LD_INS',
'PAPI_DP_OPS PAPI_REF_CYC',
'PAPI_DP_OPS PAPI_SP_OPS',
'PAPI_DP_OPS PAPI_SR_INS',
'PAPI_DP_OPS PAPI_STL_ICY',
'PAPI_DP_OPS PAPI_TLB_DM',
'PAPI_DP_OPS PAPI_TLB_IM',
'PAPI_DP_OPS PAPI_TOT_CYC',
'PAPI_DP_OPS PAPI_TOT_INS',
'PAPI_DP_OPS PAPI_VEC_DP',
'PAPI_DP_OPS PAPI_VEC_SP',
'PAPI_DP_OPS add(Object)',
'PAPI_DP_OPS add(int, Object)',
'PAPI_DP_OPS addAll(Collection)',
'PAPI_DP_OPS addAll(int, Collection)',
'PAPI_DP_OPS clear()',
'PAPI_DP_OPS contains(Object)',
'PAPI_DP_OPS containsAll(Collection)',
'PAPI_DP_OPS containsKey(Object)',
'PAPI_DP_OPS containsValue(Object)',
'PAPI_DP_OPS entrySet()',
'PAPI_DP_OPS equals(Object)',
'PAPI_DP_OPS get(Object)',
'PAPI_DP_OPS hashCode()',
'PAPI_DP_OPS indexOf(Object)',
'PAPI_DP_OPS isEmpty()',
'PAPI_DP_OPS iterator()',
'PAPI_DP_OPS keySet()',
'PAPI_DP_OPS lastIndexOf(Object)',
'PAPI_DP_OPS listIterator()',
'PAPI_DP_OPS listIterator(int)',
'PAPI_DP_OPS put(Object, Object)',
'PAPI_DP_OPS putAll(Map)',
'PAPI_DP_OPS remove(Object)',
'PAPI_DP_OPS remove(int)',
'PAPI_DP_OPS removeAll(Collection)',
'PAPI_DP_OPS retainAll(Collection)',
'PAPI_DP_OPS set(int, Object)',
'PAPI_DP_OPS size()',
'PAPI_DP_OPS sort(Comparator)',
'PAPI_DP_OPS subList(int, int)',
'PAPI_DP_OPS toArray()',
'PAPI_DP_OPS toArray(Object[])',
'PAPI_DP_OPS values()',
'PAPI_DP_OPS ArrayList',
'PAPI_DP_OPS HashMap',
'PAPI_DP_OPS HashSet',
'PAPI_DP_OPS LinkedHashMap',
'PAPI_DP_OPS LinkedHashSet',
'PAPI_DP_OPS LinkedList',
'PAPI_DP_OPS TreeMap',
'PAPI_DP_OPS TreeSet',
'PAPI_DP_OPS Vector',
'PAPI_FDV_INS^2',
'PAPI_FDV_INS PAPI_FP_INS',
'PAPI_FDV_INS PAPI_FP_OPS',
'PAPI_FDV_INS PAPI_L1_DCM',
'PAPI_FDV_INS PAPI_L1_ICM',
'PAPI_FDV_INS PAPI_L1_LDM',
'PAPI_FDV_INS PAPI_L1_STM',
'PAPI_FDV_INS PAPI_L1_TCM',
'PAPI_FDV_INS PAPI_L2_DCA',
'PAPI_FDV_INS PAPI_L2_DCH',
'PAPI_FDV_INS PAPI_L2_DCM',
'PAPI_FDV_INS PAPI_L2_DCR',
'PAPI_FDV_INS PAPI_L2_DCW',
'PAPI_FDV_INS PAPI_L2_ICA',
'PAPI_FDV_INS PAPI_L2_ICH',
'PAPI_FDV_INS PAPI_L2_ICM',
'PAPI_FDV_INS PAPI_L2_ICR',
'PAPI_FDV_INS PAPI_L2_STM',
'PAPI_FDV_INS PAPI_L2_TCA',
'PAPI_FDV_INS PAPI_L2_TCM',
'PAPI_FDV_INS PAPI_L2_TCR',
'PAPI_FDV_INS PAPI_L2_TCW',
'PAPI_FDV_INS PAPI_L3_DCA',
'PAPI_FDV_INS PAPI_L3_DCR',
'PAPI_FDV_INS PAPI_L3_DCW',
'PAPI_FDV_INS PAPI_L3_ICA',
'PAPI_FDV_INS PAPI_L3_ICR',
'PAPI_FDV_INS PAPI_L3_TCA',
'PAPI_FDV_INS PAPI_L3_TCM',
'PAPI_FDV_INS PAPI_L3_TCR',
'PAPI_FDV_INS PAPI_L3_TCW',
'PAPI_FDV_INS PAPI_LD_INS',
'PAPI_FDV_INS PAPI_REF_CYC',
'PAPI_FDV_INS PAPI_SP_OPS',
'PAPI_FDV_INS PAPI_SR_INS',
'PAPI_FDV_INS PAPI_STL_ICY',
'PAPI_FDV_INS PAPI_TLB_DM',
'PAPI_FDV_INS PAPI_TLB_IM',
'PAPI_FDV_INS PAPI_TOT_CYC',
'PAPI_FDV_INS PAPI_TOT_INS',
'PAPI_FDV_INS PAPI_VEC_DP',
'PAPI_FDV_INS PAPI_VEC_SP',
'PAPI_FDV_INS add(Object)',
'PAPI_FDV_INS add(int, Object)',
'PAPI_FDV_INS addAll(Collection)',
'PAPI_FDV_INS addAll(int, Collection)',
'PAPI_FDV_INS clear()',
'PAPI_FDV_INS contains(Object)',
'PAPI_FDV_INS containsAll(Collection)',
'PAPI_FDV_INS containsKey(Object)',
'PAPI_FDV_INS containsValue(Object)',
'PAPI_FDV_INS entrySet()',
'PAPI_FDV_INS equals(Object)',
'PAPI_FDV_INS get(Object)',
'PAPI_FDV_INS hashCode()',
'PAPI_FDV_INS indexOf(Object)',
'PAPI_FDV_INS isEmpty()',
'PAPI_FDV_INS iterator()',
'PAPI_FDV_INS keySet()',
'PAPI_FDV_INS lastIndexOf(Object)',
'PAPI_FDV_INS listIterator()',
'PAPI_FDV_INS listIterator(int)',
'PAPI_FDV_INS put(Object, Object)',
'PAPI_FDV_INS putAll(Map)',
'PAPI_FDV_INS remove(Object)',
'PAPI_FDV_INS remove(int)',
'PAPI_FDV_INS removeAll(Collection)',
'PAPI_FDV_INS retainAll(Collection)',
'PAPI_FDV_INS set(int, Object)',
'PAPI_FDV_INS size()',
'PAPI_FDV_INS sort(Comparator)',
'PAPI_FDV_INS subList(int, int)',
'PAPI_FDV_INS toArray()',
'PAPI_FDV_INS toArray(Object[])',
'PAPI_FDV_INS values()',
'PAPI_FDV_INS ArrayList',
'PAPI_FDV_INS HashMap',
'PAPI_FDV_INS HashSet',
'PAPI_FDV_INS LinkedHashMap',
'PAPI_FDV_INS LinkedHashSet',
'PAPI_FDV_INS LinkedList',
'PAPI_FDV_INS TreeMap',
'PAPI_FDV_INS TreeSet',
'PAPI_FDV_INS Vector',
'PAPI_FP_INS^2',
'PAPI_FP_INS PAPI_FP_OPS',
'PAPI_FP_INS PAPI_L1_DCM',
'PAPI_FP_INS PAPI_L1_ICM',
'PAPI_FP_INS PAPI_L1_LDM',
'PAPI_FP_INS PAPI_L1_STM',
'PAPI_FP_INS PAPI_L1_TCM',
'PAPI_FP_INS PAPI_L2_DCA',
'PAPI_FP_INS PAPI_L2_DCH',
'PAPI_FP_INS PAPI_L2_DCM',
'PAPI_FP_INS PAPI_L2_DCR',
'PAPI_FP_INS PAPI_L2_DCW',
'PAPI_FP_INS PAPI_L2_ICA',
'PAPI_FP_INS PAPI_L2_ICH',
'PAPI_FP_INS PAPI_L2_ICM',
'PAPI_FP_INS PAPI_L2_ICR',
'PAPI_FP_INS PAPI_L2_STM',
'PAPI_FP_INS PAPI_L2_TCA',
'PAPI_FP_INS PAPI_L2_TCM',
'PAPI_FP_INS PAPI_L2_TCR',
'PAPI_FP_INS PAPI_L2_TCW',
'PAPI_FP_INS PAPI_L3_DCA',
'PAPI_FP_INS PAPI_L3_DCR',
'PAPI_FP_INS PAPI_L3_DCW',
'PAPI_FP_INS PAPI_L3_ICA',
'PAPI_FP_INS PAPI_L3_ICR',
'PAPI_FP_INS PAPI_L3_TCA',
'PAPI_FP_INS PAPI_L3_TCM',
'PAPI_FP_INS PAPI_L3_TCR',
'PAPI_FP_INS PAPI_L3_TCW',
'PAPI_FP_INS PAPI_LD_INS',
'PAPI_FP_INS PAPI_REF_CYC',
'PAPI_FP_INS PAPI_SP_OPS',
'PAPI_FP_INS PAPI_SR_INS',
'PAPI_FP_INS PAPI_STL_ICY',
'PAPI_FP_INS PAPI_TLB_DM',
'PAPI_FP_INS PAPI_TLB_IM',
'PAPI_FP_INS PAPI_TOT_CYC',
'PAPI_FP_INS PAPI_TOT_INS',
'PAPI_FP_INS PAPI_VEC_DP',
'PAPI_FP_INS PAPI_VEC_SP',
'PAPI_FP_INS add(Object)',
'PAPI_FP_INS add(int, Object)',
'PAPI_FP_INS addAll(Collection)',
'PAPI_FP_INS addAll(int, Collection)',
'PAPI_FP_INS clear()',
'PAPI_FP_INS contains(Object)',
'PAPI_FP_INS containsAll(Collection)',
'PAPI_FP_INS containsKey(Object)',
'PAPI_FP_INS containsValue(Object)',
'PAPI_FP_INS entrySet()',
'PAPI_FP_INS equals(Object)',
'PAPI_FP_INS get(Object)',
'PAPI_FP_INS hashCode()',
'PAPI_FP_INS indexOf(Object)',
'PAPI_FP_INS isEmpty()',
'PAPI_FP_INS iterator()',
'PAPI_FP_INS keySet()',
'PAPI_FP_INS lastIndexOf(Object)',
'PAPI_FP_INS listIterator()',
'PAPI_FP_INS listIterator(int)',
'PAPI_FP_INS put(Object, Object)',
'PAPI_FP_INS putAll(Map)',
'PAPI_FP_INS remove(Object)',
'PAPI_FP_INS remove(int)',
'PAPI_FP_INS removeAll(Collection)',
'PAPI_FP_INS retainAll(Collection)',
'PAPI_FP_INS set(int, Object)',
'PAPI_FP_INS size()',
'PAPI_FP_INS sort(Comparator)',
'PAPI_FP_INS subList(int, int)',
'PAPI_FP_INS toArray()',
'PAPI_FP_INS toArray(Object[])',
'PAPI_FP_INS values()',
'PAPI_FP_INS ArrayList',
'PAPI_FP_INS HashMap',
'PAPI_FP_INS HashSet',
'PAPI_FP_INS LinkedHashMap',
'PAPI_FP_INS LinkedHashSet',
'PAPI_FP_INS LinkedList',
'PAPI_FP_INS TreeMap',
'PAPI_FP_INS TreeSet',
'PAPI_FP_INS Vector',
'PAPI_FP_OPS^2',
'PAPI_FP_OPS PAPI_L1_DCM',
'PAPI_FP_OPS PAPI_L1_ICM',
'PAPI_FP_OPS PAPI_L1_LDM',
'PAPI_FP_OPS PAPI_L1_STM',
'PAPI_FP_OPS PAPI_L1_TCM',
'PAPI_FP_OPS PAPI_L2_DCA',
'PAPI_FP_OPS PAPI_L2_DCH',
'PAPI_FP_OPS PAPI_L2_DCM',
'PAPI_FP_OPS PAPI_L2_DCR',
'PAPI_FP_OPS PAPI_L2_DCW',
'PAPI_FP_OPS PAPI_L2_ICA',
'PAPI_FP_OPS PAPI_L2_ICH',
'PAPI_FP_OPS PAPI_L2_ICM',
'PAPI_FP_OPS PAPI_L2_ICR',
'PAPI_FP_OPS PAPI_L2_STM',
'PAPI_FP_OPS PAPI_L2_TCA',
'PAPI_FP_OPS PAPI_L2_TCM',
'PAPI_FP_OPS PAPI_L2_TCR',
'PAPI_FP_OPS PAPI_L2_TCW',
'PAPI_FP_OPS PAPI_L3_DCA',
'PAPI_FP_OPS PAPI_L3_DCR',
'PAPI_FP_OPS PAPI_L3_DCW',
'PAPI_FP_OPS PAPI_L3_ICA',
'PAPI_FP_OPS PAPI_L3_ICR',
'PAPI_FP_OPS PAPI_L3_TCA',
'PAPI_FP_OPS PAPI_L3_TCM',
'PAPI_FP_OPS PAPI_L3_TCR',
'PAPI_FP_OPS PAPI_L3_TCW',
'PAPI_FP_OPS PAPI_LD_INS',
'PAPI_FP_OPS PAPI_REF_CYC',
'PAPI_FP_OPS PAPI_SP_OPS',
...]
 
%% Cell type:code id: tags:
 
``` python
# Prepare the data:
# We need a data frame mapping the name of features to their importance.
# Then maybe we need to only display the five / ten most important features.
feature_importances = pd.DataFrame()
feature_importances["name"] = model.feature_names
feature_importances["importance"] = model.feature_importances_
 
data_sorted = feature_importances.sort_values(by="importance", ascending=False).head(10)
sns.barplot(data=data_sorted, x="importance", y="name")
```
 
%% Output
 
<matplotlib.axes._subplots.AxesSubplot at 0x7f062d1ace50>
 
 
%% Cell type:markdown id: tags:
 
The above plot is not that easy to read. `PAPI_REF_CYC * remove(int)`? What am I supposed to do with that?
Maybe it would be better to avoid using polynomial features if we want an explainable model.
 
`PAPI_STL_ICY` refers to the number of cycles with no instruction issued. Maybe this is actually a good indicator of poor performance. It is hard to see how this value might be used.
 
%% Cell type:markdown id: tags:
 
##
 
%% Cell type:markdown id: tags:
 
## How does the model compare with the "dumb classifier"
 
The "dumb classifier" always predicts that the data-structure that wins most often in the experiments is the right one. How often does my model disagree with it?!
 
%% Cell type:code id: tags:
 
``` python
# Step one, how do we find the "dumb classifier"?
# The dumb classifier is a function from the type of interface to the most common winning implementation of that interface.
# So we need to group rows by the type of the implemented interface, and then we can select the most common winning one.
grouped_interface = data["data"].groupby("Benchmark")
# I need to map the number of times the data structure shows up in best data structure for each group.
dumb_classification_function = {}
for label, group in grouped_interface:
dumb_classification_function[label] = group["Param: datastructureName_best"].value_counts().argmax()
 
print(dumb_classification_function)
```
 
%% Output
 
{'se.lth.cs.jmh.MapApplicationBenchmark.MapApplicationBenchmark': 'TreeMap', 'se.lth.cs.jmh.SetApplicationBenchmark.SetApplicationBenchmark': 'LinkedHashSet', 'se.lth.cs.jmh.ListApplicationBenchmark.ListApplicationBenchmark': 'ArrayList'}
 
/home/noric/Dev/jbrainy/env/lib/python2.7/site-packages/ipykernel_launcher.py:8: FutureWarning:
The current behaviour of 'Series.argmax' is deprecated, use 'idxmax'
instead.
The behavior of 'argmax' will be corrected to return the positional
maximum in the future. For now, use 'series.values.argmax' or
'np.argmax(np.array(values))' to get the position of the maximum
row.
 
%% Cell type:code id: tags:
 
``` python
# Now that we know how to find the "dumb classification", we can try to look for how often it disagrees
# with the random forest.
 
# We need to make a column with the result of the "classification".
# We make a function for the classifier
classify = lambda row: dumb_classification_function[row["Benchmark"]]
 
classified = data["data"].apply(classify, axis=1)
comparison = (data["data"]["Param: datastructureName_best"] == classified).value_counts()
 
# We plot them
sns.barplot(x=comparison.index, y=comparison)
```
 
%% Output
 
<matplotlib.axes._subplots.AxesSubplot at 0x7ffa0d97f950>
 
 
%% Cell type:markdown id: tags:
 
We can see that the "dumb classifier" agrees with the "smarter" one more often than not. Overall, it would be interesting to try to see if the classification effort is really worth it.
 
For instance, we may want to know which one of the dumb classifier and the random forest are the most accurate (it is pretty likely that it is the random forest). But it might be interesting which ones wins in practice.
 
%% Cell type:markdown id: tags:
 
## How does the model compare with using Neural networks for the same thing?
 
Since the Brainy guys use a neural network, how come I don't use one?
 
SPOILER: I hope to show that in fact, you do not need a neural network!
 
%% Cell type:code id: tags:
 
``` python
# TODO.
```
 
%% Cell type:markdown id: tags:
 
# Appendix
 
Some things for some figures, but not necessarily super important:
 
%% Cell type:code id: tags:
 
``` python
# This prints a "heatmap" for a single application
features = data["data"].iloc[0].get(data["software_selected_columns"])
features_bool = ((features > 0) * 1).to_frame()
product = features_bool.dot(features_bool.transpose()) # Eh bah putain, ca m'a pris trois plombes!
fig = plt.figure(figsize=(10,10))
sns.heatmap(product)
fig.savefig("heatmap_example.pdf")
```
 
%% Output
 
Source diff could not be displayed: it is too large. Options to address this: view the blob.
This diff is collapsed.
......@@ -25,7 +25,7 @@ public class ListApplicationBenchmark {
@Param({"LinkedList", "ArrayList", "Vector"})
public String datastructureName;
@Param({"0", "1000", "2000"})
@Param({"0", "1000", "10000", "1000000"})
public int baseStructureSize;
public Application currentApplication;
......
......@@ -21,7 +21,7 @@ public class MapApplicationBenchmark {
@Param({"HashMap", "TreeMap", "IdentityHashMap", "LinkedHashMap", "WeakHashMap"})
public String datastructureName;
@Param({"0", "1000", "2000"})
@Param({"0", "1000", "10000", "1000000"})
public int baseStructureSize;
public Application currentApplication;
......
......@@ -21,7 +21,7 @@ public class SetApplicationBenchmark {
@Param({"HashSet", "TreeSet", "LinkedHashSet"})
public String datastructureName;
@Param({"0", "1000", "2000"})
@Param({"0", "1000", "10000", "1000000"})
public int baseStructureSize;
public Application currentApplication;
......
......@@ -2,11 +2,13 @@ package se.lth.cs.jmh.commandline
import com.github.ajalt.clikt.core.CliktCommand
import com.github.ajalt.clikt.parameters.options.default
import com.github.ajalt.clikt.parameters.options.flag
import com.github.ajalt.clikt.parameters.options.option
import com.github.ajalt.clikt.parameters.types.int
import com.github.ajalt.clikt.parameters.types.long
import org.openjdk.jmh.results.format.ResultFormatType
import org.openjdk.jmh.runner.Runner
import org.openjdk.jmh.runner.options.CommandLineOptions
import org.openjdk.jmh.runner.options.OptionsBuilder
import org.openjdk.jmh.runner.options.TimeValue
import se.lth.cs.jmh.JMHTimedRunner
......@@ -48,6 +50,12 @@ class JMHCommandLine : CliktCommand() {
val infoFileName : String? by option("--info-file", "-p",
help="File name to write data about JMH run")
/**
* A field to activate a smaller run to test things.
*/
val quickBenchmark : Boolean by option("--quick-bench", "-q",
help="Run a smaller benchmark").flag()
override fun run() {
val seedsText = IntRange(0, numberSeeds - 1)
.map { it.toString() }
......@@ -61,12 +69,19 @@ class JMHCommandLine : CliktCommand() {
.measurementIterations(measurementIterations)
.resultFormat(ResultFormatType.CSV)
.result(String.format("jmh-results-%s.csv", getCommit()))
.param("seed", *seedsText)
if (quickBenchmark) {
opts.param("seed", "0", "1", "2")
.include("List")
.param("baseStructureSize", "100000")
.param("applicationSize", "10", "100", "1000")
} else {
opts.param("seed", *seedsText)
.param("baseStructureSize", "0", "1000", "10000")
.param("applicationSize", "10", "100", "1000")
.build()
}
val r = JMHTimedRunner(opts)
val r = JMHTimedRunner(opts.build())
val results = r.runWithTime()
if (!infoFileName.isNullOrBlank()) {
......
......@@ -289,7 +289,7 @@ def load_training_data(jmh_results_filename,
# OK.
continue
else:
raise "The path '{0}' does not exist".format(file)
raise Exception("The path '{0}' does not exist".format(file))
# Ok here we go
jmh_with_best = load_jmh_data(jmh_results_filename)
......