Commit 7cf74770 authored by Noric Couderc's avatar Noric Couderc

Implemented naive classification of collection for traces

Each traces is loaded, grouped, and benchmarks are created to
try each collection for each benchmarks, then suggestions
are created
parent 1a399edf
......@@ -17,8 +17,8 @@ public class SyntheticBenchmarkRunner {
protected Blackhole blackhole = new Blackhole("Today's password is swordfish. I understand instantiating Blackholes directly is dangerous.");
private int numberSamples;
private int numberWarmups;
protected int numberSamples;
protected int numberWarmups;
public SyntheticBenchmarkRunner() {
this.numberSamples = 20;
......
package se.lth.cs;
import kotlin.Pair;
import se.lth.cs.bcgen.BCBenchmark;
import se.lth.cs.bcgen.BCBenchmarkPackage;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/** A class for running benchmarks that are loaded from traces
*/
public class TraceBenchmarkRunner extends SyntheticBenchmarkRunner {
public TraceBenchmarkRunner() {
super();
}
public TraceBenchmarkRunner(int numberWarmups, int numberSamples) {
super(numberWarmups, numberSamples);
}
/**
* Groups benchmarks by location, runs them, gets the collection
* that minimizes the sum of running time for the location
* and returns a suggestion
* @param l
* @return
*/
public List<TrainingSetValue.CollectionSuggestion>
getSuggestions(List<Pair<TraceLoader.TraceData, BCBenchmarkPackage<?>>> l) {
Map<String, List<Pair<TraceLoader.TraceData, BCBenchmarkPackage<?>>>>
groups = new TreeMap(
l.stream().collect(Collectors.groupingBy(it -> it.component1().getLocation())));
ArrayList<TrainingSetValue.CollectionSuggestion> suggestions = new ArrayList<>();
for (Map.Entry<String, List<Pair<TraceLoader.TraceData, BCBenchmarkPackage<?>>>> g : groups.entrySet()) {
// We want the location
String location = g.getKey();
// We want the current type at that location
String currentType = g.getValue().get(0).component1().getTarget_type();
// We want the time that minimizes the sum of runtime for all benchmarks with that location
Stream<BCBenchmarkPackage<?>> benchmarks = g.getValue().stream().map(it -> it.component2());
String bestType = getCollectionWithShortestRun(benchmarks.collect(Collectors.toList()));
suggestions.add(
new TrainingSetValue.CollectionSuggestion(location, currentType, bestType)
);
}
return suggestions;
}
/**
* Gets a collection which got the shortest in a list of collection
* @param benchmarks: List of benchmarks with the SAME location
* @return the type that got the lowest sum of running times
*/
private String getCollectionWithShortestRun(List<BCBenchmarkPackage<?>> benchmarks) {
// First you run them all
List<TrainingSetValue> results = runBenchmarks(benchmarks);
// Then you group them by the type that was used
Map<String, List<TrainingSetValue>> groupedByCollection =
results.stream().collect(Collectors.groupingBy(it -> it.getDataStructure()));
// Then, you compute for each type the SUM of the average runtimes
// We store them in a hashmap for automatic sorting
TreeMap<Double, String> sumRunningTimePerType = new TreeMap<>();
for (Map.Entry<String, List<TrainingSetValue>> e : groupedByCollection.entrySet()) {
double sum = e.getValue().stream().map(it -> it.getAverage()).mapToDouble(it -> it).sum();
sumRunningTimePerType.put(sum, e.getKey());
}
// You return the type that had the _LOWEST_ average runtime
return sumRunningTimePerType.firstEntry().getValue();
}
}
......@@ -2,6 +2,7 @@ package se.lth.cs;
import se.lth.cs.bcgen.*;
import java.util.List;
import java.util.Objects;
public class TrainingSetValue extends SyntheticBenchmarkRunData {
......@@ -15,4 +16,58 @@ public class TrainingSetValue extends SyntheticBenchmarkRunData {
public String getDataStructure() { return syntheticBenchmark.getDataStructureName(); }
public String getBestDataStructure() { return bestDataStructure; }
public static class CollectionSuggestion {
String location;
String currentType;
String suggestion;
public CollectionSuggestion(String location,
String currentType,
String suggestion) {
this.location = location;
this.currentType = currentType;
this.suggestion = suggestion;
}
public String getLocation() {
return location;
}
public String getCurrentType() {
return currentType;
}
public String getSuggestion() {
return suggestion;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
CollectionSuggestion that = (CollectionSuggestion) o;
return location.equals(that.location) &&
currentType.equals(that.currentType) &&
suggestion.equals(that.suggestion);
}
@Override
public int hashCode() {
return Objects.hash(location, currentType, suggestion);
}
@Override
public String toString() {
return "CollectionSuggestion{" +
"location='" + location + '\'' +
", currentType='" + currentType + '\'' +
", suggestion='" + suggestion + '\'' +
'}';
}
}
public CollectionSuggestion getSuggestion() {
return null;
}
}
package se.lth.cs
import org.apache.commons.csv.CSVFormat
import org.apache.commons.csv.CSVPrinter
import java.io.Reader
import java.io.Writer
import java.util.stream.Collectors
/**
* Naive classifier for suggesting collections
* Takes a trace from a reader, runs the benchmarks, and then prints the suggestions
* to the writer
*/
class NaiveClassifier(reader : Reader, writer : Writer) {
private val reader: Reader = reader
private val printer = CSVPrinter(writer,
CSVFormat.DEFAULT.withFirstRecordAsHeader().withRecordSeparator('\n'))
fun classifyFromReader(tsv : Boolean) {
val loader = TraceLoader()
val data = loader.benchmarksFromTraces(reader, tsv = tsv)
val results = TraceBenchmarkRunner().getSuggestions(
data!!.collect(Collectors.toList())).toList()
printHeader()
results.forEach {
printer.printRecord(it.location, it.currentType, it.suggestion)
}
printer.close()
}
fun printHeader() {
printer.printRecord("location", "current_type", "suggestion")
}
}
\ No newline at end of file
......@@ -19,6 +19,44 @@ import java.util.stream.Collectors
import java.util.stream.Stream
class TraceLoader {
data class TraceData(val location : String,
val object_id : Int,
val target_type : String,
val methods : List<String>)
fun readCsvData(reader : Reader, tsv : Boolean): List<TraceData> {
val parser = if (tsv) CSVParser(reader, CSVFormat.TDF.withHeader())
else CSVParser(reader, CSVFormat.DEFAULT.withHeader())
// We group traces by : location (in the code) and target type.
val grouped =
parser.groupBy { r -> ImmutablePair(r["location"], r["id"])}
val traces = grouped.mapValues {
e -> val location = e.key.left!!
val object_id = Integer.parseInt(e.key.right)
val target_type = e.value.first()["target_type"]!!
val methods = e.value.sortedBy { it["step"] }
.map { it["method"]!!}
TraceData(location, object_id, target_type, methods)
}.toList().map { it.second }
return traces
}
fun readCsvWithMetadata(reader: Reader, tsv: Boolean) : List<Pair<TraceData, BCBenchmarkPackage<out Any>>> {
return readCsvData(reader, tsv).map {
val traceKey = it.location + ":" + it.object_id
val candidates = getClassForName(it.target_type)?.let { it1 -> getCandidates(it1) }
Pair(it, generateSyntheticBenchmark(
traceKey,
candidates!!.first,
it.methods,
candidates!!.second!!
))
}
}
@Throws(ClassNotFoundException::class, IllegalAccessException::class, InstantiationException::class)
fun readCsv(reader: Reader, tsv: Boolean): Stream<BCBenchmarkPackage<*>> {
val parser = if (tsv) CSVParser(reader, CSVFormat.TDF.withHeader())
......@@ -48,7 +86,7 @@ class TraceLoader {
val app = generateSyntheticBenchmark(
traceKey,
interfaceCandidates,
kvp.value,
kvp.value.map { it["method"] },
initClass!!)
apps.add(app)
......@@ -78,9 +116,10 @@ class TraceLoader {
* @param tsv: If the data is in CSV format
* @return a stream of benchmarks were each trace has several benchmarks (one for each possible replacement)
*/
fun benchmarksFromTraces(r : Reader, tsv : Boolean) : Stream<BCBenchmarkPackage<*>> {
return readCsv(r, tsv).flatMap {
expandBenchmark(it)?.stream()
fun benchmarksFromTraces(r : Reader, tsv : Boolean) : Stream<Pair<TraceData, BCBenchmarkPackage<*>>>? {
return readCsvWithMetadata(r, tsv).stream().flatMap { p ->
val otherBenchmarks = expandBenchmark(p.second)?.stream()!!
otherBenchmarks.map { Pair(p.first, it) }
}
}
......@@ -111,14 +150,14 @@ class TraceLoader {
private fun generateSyntheticBenchmark(
traceKey : String,
interfaceCandidates: List<String>,
rows : List<CSVRecord>,
rows : List<String>,
initClass: Class<*>): BCBenchmarkPackage<out Any> {
val interfaceName = interfaceCandidates
.first()
.removePrefix("java.util.")
val methods = rows.stream()
.map { r -> r["method"].replace("\"", "") }
.map { r -> r.replace("\"", "") }
//.map { r -> matchFunction(r) }
.filter { m -> m != null }
.collect(Collectors.toList())
......
import kotlin.Pair;
import org.junit.Assert;
import org.junit.Test;
import se.lth.cs.SyntheticBenchmarkGeneration.ListSyntheticBenchmarkGenerator;
import se.lth.cs.SyntheticBenchmarkRunner;
import se.lth.cs.TraceBenchmarkRunner;
import se.lth.cs.TraceLoader;
import se.lth.cs.TrainingSetValue;
import se.lth.cs.bcgen.BCBenchmarkPackage;
......@@ -63,7 +65,7 @@ public class SyntheticBenchmarkRunnerTest {
"6,3,loc3,java.util.HashSet,boolean add(E),2\n";
Stream<BCBenchmarkPackage<?>> benchmarks = tl.benchmarksFromTraces(
new StringReader(text), false);
new StringReader(text), false).map(it -> it.component2());
SyntheticBenchmarkRunner runner = new SyntheticBenchmarkRunner();
List<TrainingSetValue> results = runner.runBenchmarks(benchmarks.collect(Collectors.toList()));
......@@ -87,4 +89,46 @@ public class SyntheticBenchmarkRunnerTest {
Assert.assertFalse(noSamePlan);
}
}
@Test
public void testNaiveBenchmarking() {
// Where we try to load a trace and get suggestions for our traces
TraceLoader tl = new TraceLoader();
String text = ",id,location,target_type,method,step\n" +
// Same id, different location
"1,1,loc1,java.util.LinkedList,boolean add(E),1\n" +
"2,1,loc1,java.util.LinkedList,boolean add(E),2\n" +
"3,2,loc2,java.util.ArrayList,boolean add(E),1\n" +
"4,2,loc2,java.util.ArrayList,boolean add(E),2\n" +
"5,3,loc3,java.util.TreeSet,boolean add(E),1\n" +
"6,3,loc3,java.util.TreeSet,boolean add(E),2\n" +
"7,3,loc3,java.util.TreeSet,boolean add(E),3\n" +
"7,3,loc3,java.util.TreeSet,toArray(Object[]),4";
Stream<Pair<TraceLoader.TraceData, BCBenchmarkPackage<?>>> benchmarks = tl.benchmarksFromTraces(
new StringReader(text), false);
TraceBenchmarkRunner runner = new TraceBenchmarkRunner(10, 10000);
List<TrainingSetValue.CollectionSuggestion> expected = Arrays.asList(
new TrainingSetValue.CollectionSuggestion("loc1", "java.util.LinkedList",
"java.util.ArrayList"),
new TrainingSetValue.CollectionSuggestion("loc2", "java.util.ArrayList",
"java.util.ArrayList")
);
List<?> suggestions = runner.getSuggestions(benchmarks.collect(Collectors.toList()));
Assert.assertEquals(expected.get(0), suggestions.get(0));
Assert.assertEquals(expected.get(1), suggestions.get(1));
// This benchmarking is a bit unstable...
// Sometimes HashSet wins, sometimes LinkedHashSet wins...
// TODO: Would be good to have non-stochastic results when testing!
TrainingSetValue.CollectionSuggestion expected3 =
new TrainingSetValue.CollectionSuggestion("loc3", "java.util.TreeSet",
"java.util.HashSet");
TrainingSetValue.CollectionSuggestion expected4 =
new TrainingSetValue.CollectionSuggestion("loc3", "java.util.TreeSet",
"java.util.LinkedHashSet");
Assert.assertTrue(suggestions.get(2).equals(expected3)
|| suggestions.get(2).equals(expected4));
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment