Commit 5ce6df38 authored by Noric Couderc's avatar Noric Couderc

Implemented weighted benchmarks

Some benchmarks have a lot of duplicates, so we compress the data by
aggregating them, the number of duplicates for each benchmark is then
stored.
parent 6a641951
package se.lth.cs;
import kotlin.Pair;
import se.lth.cs.bcgen.BCBenchmark;
import se.lth.cs.bcgen.BCBenchmarkPackage;
import java.util.*;
......@@ -24,25 +23,28 @@ public class TraceBenchmarkRunner extends SyntheticBenchmarkRunner {
* Groups benchmarks by location, runs them, gets the collection
* that minimizes the sum of running time for the location
* and returns a suggestion
* @param l
* @param s
* @return
*/
public List<TrainingSetValue.CollectionSuggestion>
getSuggestions(Stream<Pair<TraceLoader.TraceData, BCBenchmarkPackage<?>>> s) {
Map<String, List<Pair<TraceLoader.TraceData, BCBenchmarkPackage<?>>>>
getSuggestions(Stream<Pair<TraceLoader.AggregatedTraceData, BCBenchmarkPackage<?>>> s) {
Stream<TraceBenchmark> benchmarks = s.map(it -> new TraceBenchmark(it.component1(), it.component2()));
// We group the benchmarks by location
Map<String, List<TraceBenchmark>>
groups = new TreeMap(
s.collect(Collectors.groupingBy(it -> it.component1().getLocation())));
benchmarks.collect(Collectors.groupingBy(
it -> it.metadata.getLocation()
)));
ArrayList<TrainingSetValue.CollectionSuggestion> suggestions = new ArrayList<>();
for (Map.Entry<String, List<Pair<TraceLoader.TraceData, BCBenchmarkPackage<?>>>> g : groups.entrySet()) {
for (Map.Entry<String, List<TraceBenchmark>> g : groups.entrySet()) {
System.out.println("Getting suggestion for: " + g.getKey());
// We want the location
String location = g.getKey();
// We want the current type at that location
String currentType = g.getValue().get(0).component1().getTarget_type();
String currentType = g.getValue().get(0).metadata.getTarget_type();
// We want the time that minimizes the sum of runtime for all benchmarks with that location
Stream<BCBenchmarkPackage<?>> benchmarks = g.getValue().stream().map(it -> it.component2());
String bestType = getCollectionWithShortestRun(benchmarks.collect(Collectors.toList()));
String bestType = getCollectionWithShortestRun(g.getValue());
suggestions.add(
new TrainingSetValue.CollectionSuggestion(location, currentType, bestType)
);
......@@ -51,22 +53,43 @@ public class TraceBenchmarkRunner extends SyntheticBenchmarkRunner {
return suggestions;
}
class TraceBenchmark {
BCBenchmarkPackage<?> benchmark;
TraceLoader.AggregatedTraceData metadata;
public TraceBenchmark(TraceLoader.AggregatedTraceData metadata, BCBenchmarkPackage<?> benchmark) {
this.benchmark = benchmark;
this.metadata = metadata;
}
private SyntheticBenchmarkRunData evaluate() {
return evaluateApplication(benchmark);
}
/**
*
* @return the cost associated with the benchmark: Number of duplicates * time it takes to run the benchmark
*/
public Double getCost() {
return metadata.getDuplicateCount() * evaluate().getAverage();
}
}
/**
* Gets a collection which got the shortest in a list of collection
* @param benchmarks: List of benchmarks with the SAME location
* @param benchmarks : List of benchmarks with the SAME location
* @return the type that got the lowest sum of running times
*/
private String getCollectionWithShortestRun(List<BCBenchmarkPackage<?>> benchmarks) {
// First you run them all
List<TrainingSetValue> results = runBenchmarks(benchmarks);
private String getCollectionWithShortestRun(List<TraceBenchmark> benchmarks) {
// Then you group them by the type that was used
Map<String, List<TrainingSetValue>> groupedByCollection =
results.stream().collect(Collectors.groupingBy(it -> it.getDataStructure()));
Map<String, List<TraceBenchmark>> groupedByCollection =
benchmarks.stream().collect(Collectors.groupingBy(it -> it.metadata.getTarget_type()));
// Then, you compute for each type the SUM of the average runtimes
// We store them in a hashmap for automatic sorting
// We store them in a treemap for automatic sorting
TreeMap<Double, String> sumRunningTimePerType = new TreeMap<>();
for (Map.Entry<String, List<TrainingSetValue>> e : groupedByCollection.entrySet()) {
double sum = e.getValue().stream().map(it -> it.getAverage()).mapToDouble(it -> it).sum();
for (Map.Entry<String, List<TraceBenchmark>> e : groupedByCollection.entrySet()) {
double sum = e.getValue().stream().mapToDouble(TraceBenchmark::getCost).sum();
sumRunningTimePerType.put(sum, e.getKey());
e.getValue().clear(); // We have all the data we need
}
......
......@@ -21,7 +21,8 @@ class NaiveClassifier(reader : Reader, writer : Writer) {
fun classifyFromReader(tsv : Boolean) {
val loader = TraceLoader()
System.out.println("Generating benchmarks...")
val data = loader.benchmarksFromTraces(reader, tsv = tsv)
val data = loader.benchmarksFromTraces(
loader.removeDuplicates(loader.readCsvData(reader, tsv)))
System.out.println("Generating benchmarks: done.")
val results = TraceBenchmarkRunner().getSuggestions(data!!).toList()
......
......@@ -16,11 +16,32 @@ import java.util.stream.Stream
class TraceLoader {
/**
* A class that represents an individual trace
* - location: The allocation site
* - object_id: The ID of the object that was used
* - target_type: The target collection type
* - methods: The methods in the trace
*/
data class TraceData(val location : String,
val object_id : Int,
val target_type : String,
val methods : List<String>)
/**
* A class that represents many similar traces
* - location : The allocation site
* - target_type: The target collection type
* - methods : The methods in the trace
* - duplicateCounts : How many times we found traces like this in our benchmarks
*/
data class AggregatedTraceData(
val location : String,
val target_type: String,
val methods : List<String>,
val duplicateCount : Int
)
fun readCsvData(reader : Reader, tsv : Boolean): List<TraceData> {
val parser = if (tsv) CSVParser(reader, CSVFormat.TDF.withHeader())
else CSVParser(reader, CSVFormat.DEFAULT.withHeader())
......@@ -36,13 +57,30 @@ class TraceLoader {
val methods = e.value.sortedBy { it["step"] }
.map { it["method"]!!}
TraceData(location, object_id, target_type, methods)
}.toList().map { it.second }.distinctBy {
Triple(it.location, it.target_type, it.methods)
}
}.toList().map { it.second }
return traces
}
fun removeDuplicates(traceData : List<TraceData>): List<AggregatedTraceData> {
val duplicateCounts = traceData.groupBy { Triple(it.location, it.target_type, it.methods) }
.mapValues { it.value.size }
.toMutableMap()
var result = mutableListOf<AggregatedTraceData>()
for (t in traceData) {
val key = Triple(t.location, t.target_type, t.methods)
val count = duplicateCounts[key]
// If we haven't seen it before
if (count != null) {
result.add(AggregatedTraceData(t.location, t.target_type, t.methods, count))
duplicateCounts.remove(key)
} // Otherwise, we ignore this duplicate
}
return result
}
fun readCsvWithMetadata(reader: Reader, tsv: Boolean) : List<Pair<TraceData, BCBenchmarkPackage<out Any>>> {
return readCsvData(reader, tsv).map {
val traceKey = it.location + ":" + it.object_id
......@@ -94,15 +132,16 @@ class TraceLoader {
return apps.stream()
}
private fun expandBenchmark(td : TraceData): List<BCBenchmarkPackage<*>>? {
val traceInfo = td.location + ":" + td.object_id
fun expandBenchmark(td : AggregatedTraceData): List<BCBenchmarkPackage<*>>? {
// We generate an ID we know will be unique for each AggregatedTraceData
val traceInfo = td.location + ":" + td.methods.hashCode()
// TODO: There's probably a better way than re-parsing data.
val interfaceName = getClassForName(td.target_type)?.let { getCandidates(it) }
.let { it?.first?.first() }
return when (interfaceName) {
"java.util.List" -> return ListSyntheticBenchmarkGenerator().createApplications(traceInfo, td.methods).toList()
"java.util.Map" -> return MapSyntheticBenchmarkGenerator().createApplications(traceInfo, td.methods).toList()
"java.util.Set" -> return SetSyntheticBenchmarkGenerator().createApplications(traceInfo, td.methods).toList()
"java.util.List" -> ListSyntheticBenchmarkGenerator().createApplications(traceInfo, td.methods).toList()
"java.util.Map" -> MapSyntheticBenchmarkGenerator().createApplications(traceInfo, td.methods).toList()
"java.util.Set" -> SetSyntheticBenchmarkGenerator().createApplications(traceInfo, td.methods).toList()
else -> throw RuntimeException("Invalid interface ID: $interfaceName")
}
}
......@@ -116,10 +155,10 @@ class TraceLoader {
* @param tsv: If the data is in CSV format
* @return a stream of benchmarks were each trace has several benchmarks (one for each possible replacement)
*/
fun benchmarksFromTraces(r : Reader, tsv : Boolean) : Stream<Pair<TraceData, BCBenchmarkPackage<*>>>? {
return readCsvData(r, tsv).stream().flatMap { p ->
val otherBenchmarks = expandBenchmark(p)?.stream()!!
otherBenchmarks.map { Pair(p, it) }
fun benchmarksFromTraces(traces : List<AggregatedTraceData>) : Stream<Pair<AggregatedTraceData, BCBenchmarkPackage<*>>>? {
return traces.stream().flatMap { t ->
val otherBenchmarks = expandBenchmark(t)?.stream()!!
otherBenchmarks.map { Pair(t, it) }
}
}
......
......@@ -64,8 +64,9 @@ public class SyntheticBenchmarkRunnerTest {
"5,3,loc3,java.util.HashSet,boolean add(E),1\n" +
"6,3,loc3,java.util.HashSet,boolean add(E),2\n";
List<TraceLoader.TraceData> traceData = tl.readCsvData(new StringReader(text), false);
Stream<BCBenchmarkPackage<?>> benchmarks = tl.benchmarksFromTraces(
new StringReader(text), false).map(it -> it.component2());
tl.removeDuplicates(traceData)).map(it -> it.component2());
SyntheticBenchmarkRunner runner = new SyntheticBenchmarkRunner();
List<TrainingSetValue> results = runner.runBenchmarks(benchmarks.collect(Collectors.toList()));
......@@ -104,8 +105,10 @@ public class SyntheticBenchmarkRunnerTest {
"6,3,loc3,java.util.TreeSet,boolean add(E),2\n" +
"7,3,loc3,java.util.TreeSet,boolean add(E),3\n";
Stream<Pair<TraceLoader.TraceData, BCBenchmarkPackage<?>>> benchmarks = tl.benchmarksFromTraces(
new StringReader(text), false);
Stream<Pair<TraceLoader.AggregatedTraceData, BCBenchmarkPackage<?>>> benchmarks = tl.benchmarksFromTraces(
tl.removeDuplicates(
tl.readCsvData(new StringReader(text), false))
);
TraceBenchmarkRunner runner = new TraceBenchmarkRunner(10, 10000);
// Benchmarks are too unstable to get a stable suggested value :(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment