Skip to content

Commit

Permalink
Merge pull request #121 from VariantSync/demo
Browse files Browse the repository at this point in the history
Adaptions for Demo
  • Loading branch information
pmbittner authored Jan 28, 2024
2 parents c68e83f + 608741e commit 6edc5e1
Show file tree
Hide file tree
Showing 18 changed files with 370 additions and 347 deletions.
17 changes: 17 additions & 0 deletions src/main/java/org/variantsync/diffdetective/AnalysisRunner.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,23 @@ public record Options(
*/
boolean pullRepositoriesBeforeAnalysis
) {
/**
* Creates options with the given parameters and uses default
* values for all other parameters.
* @see Options#Options(Path, Path, Path, Function, Function, boolean, boolean)
* @see Options#DEFAULT(String[])
*/
public Options(Path repositoriesDirectory,
Path outputDirectory,
Path datasetsFile) {
this(
repositoriesDirectory, outputDirectory, datasetsFile,
Repository::getParseOptions,
Repository::getDiffFilter,
true,
false);
}

public static Options DEFAULT(final String[] args) {
final Path datasetsFile;
if (args.length < 1) {
Expand Down
128 changes: 75 additions & 53 deletions src/main/java/org/variantsync/diffdetective/analysis/Analysis.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,9 @@
import org.variantsync.diffdetective.diff.git.GitDiffer;
import org.variantsync.diffdetective.diff.git.PatchDiff;
import org.variantsync.diffdetective.diff.result.CommitDiffResult;
import org.variantsync.diffdetective.diff.result.DiffError;
import org.variantsync.diffdetective.metadata.Metadata;
import org.variantsync.diffdetective.parallel.ScheduledTasksIterator;
import org.variantsync.diffdetective.util.Assert;
import org.variantsync.diffdetective.util.Clock;
import org.variantsync.diffdetective.util.Diagnostics;
import org.variantsync.diffdetective.util.InvocationCounter;
import org.variantsync.diffdetective.util.*;
import org.variantsync.diffdetective.variation.DiffLinesLabel;
import org.variantsync.diffdetective.variation.diff.Time;
import org.variantsync.diffdetective.variation.diff.VariationDiff;
Expand All @@ -42,8 +38,8 @@
* provides access to the current state of the analysis in one thread. Depending on the current
* {@link Hooks phase} only a subset of the state accessible via getters may be valid.
*
* @see forEachRepository
* @see forEachCommit
* @see #forEachRepository
* @see #forEachCommit
* @author Paul Bittner, Benjamin Moosherr
*/
public class Analysis {
Expand All @@ -57,7 +53,7 @@ public class Analysis {
public static final String TOTAL_RESULTS_FILE_NAME = "totalresult" + EXTENSION;
/**
* Default value for <code>commitsToProcessPerThread</code>
* @see forEachCommit(Supplier, int, int)
* @see #forEachCommit(Supplier, int, int)
*/
public static final int COMMITS_TO_PROCESS_PER_THREAD_DEFAULT = 1000;

Expand All @@ -73,6 +69,38 @@ public class Analysis {
protected final Path outputDir;
protected Path outputFile;
protected final AnalysisResult result;

/**
* The effective runtime in seconds that we have when using multithreading.
*/
public final static class TotalNumberOfCommitsResult extends SimpleMetadata<Integer, TotalNumberOfCommitsResult> {
public final static ResultKey<TotalNumberOfCommitsResult> KEY = new ResultKey<>(TotalNumberOfCommitsResult.class.getName());

public TotalNumberOfCommitsResult() {
super(
0,
MetadataKeys.TOTAL_COMMITS,
Integer::sum,
Integer::parseInt
);
}
}

/**
* The effective runtime in seconds that we have when using multithreading.
*/
public final static class RuntimeWithMultithreadingResult extends SimpleMetadata<Double, RuntimeWithMultithreadingResult> {
public final static ResultKey<RuntimeWithMultithreadingResult> KEY = new ResultKey<>(RuntimeWithMultithreadingResult.class.getName());

public RuntimeWithMultithreadingResult() {
super(
0.0,
MetadataKeys.RUNTIME_WITH_MULTITHREADING,
Double::sum,
Double::parseDouble
);
}
}

/**
* The repository this analysis is run on.
Expand Down Expand Up @@ -133,23 +161,23 @@ public Path getOutputFile() {

/**
* The results of the analysis. This may be modified by any hook and should be initialized in
* {@link Hooks#initializeResults} (e.g. by using {@link append}).
* {@link Hooks#initializeResults} (e.g. by using {@link #append}).
* Always valid.
*/
public AnalysisResult getResult() {
return result;
}

/**
* Convenience getter for {@link AnalysisResult#get} on {@link getResult}.
* Convenience getter for {@link AnalysisResult#get} on {@link #getResult}.
* Always valid.
*/
public <T extends Metadata<T>> T get(ResultKey<T> resultKey) {
return result.get(resultKey);
}

/**
* Convenience function for {@link AnalysisResult#append} on {@link getResult}.
* Convenience function for {@link AnalysisResult#append} on {@link #getResult}.
* Always valid.
*/
public <T extends Metadata<T>> void append(ResultKey<T> resultKey, T value) {
Expand Down Expand Up @@ -179,13 +207,13 @@ public <T extends Metadata<T>> void append(ResultKey<T> resultKey, T value) {
* end hooks).
*
* <p>An analysis implementing {@code Hooks} can perform various actions during each hook. This
* includes the {@link append creation} and {@link get modification} of {@link getResult
* includes the {@link #append creation} and {@link #get modification} of {@link #getResult
* analysis results}, modifying their internal state, performing IO operations and throwing
* exceptions. In contrast, the only analysis state hooks are allowed to modify is the {@link
* getResult result} of an {@link Analysis}. All other state (e.g. {@link getCurrentCommit})
* #getResult result} of an {@link Analysis}. All other state (e.g. {@link #getCurrentCommit})
* must not be modified. Care must be taken to avoid the reliance of the internal state on a
* specific commit batch being processed as only the {@link getResult results} of each commit
* batch are merged and returned by {@link forEachCommit}.
* specific commit batch being processed as only the {@link #getResult results} of each commit
* batch are merged and returned by {@link #forEachCommit}.
*
* <p>Hooks that return a {@code boolean} are called filter hooks and can, in addition to the
* above, skip any further processing in the current phase (including following inner phases) by
Expand All @@ -198,8 +226,8 @@ public <T extends Metadata<T>> void append(ResultKey<T> resultKey, T value) {
*/
public interface Hooks {
/**
* Initialization hook for {@link getResult}. All result types should be appended with a
* neutral value using {@link append}. No other side effects should be performed during this
* Initialization hook for {@link #getResult}. All result types should be appended with a
* neutral value using {@link #append}. No other side effects should be performed during this
* methods as it might be called an arbitrary amount of times.
*/
default void initializeResults(Analysis analysis) {}
Expand All @@ -208,7 +236,7 @@ default void beginBatch(Analysis analysis) throws Exception {}
/**
* Signals a parsing failure of all patches in the current commit.
* Called at most once during the commit phase. If this hook is called {@link
* onParsedCommit} and the following patch phase invocations are skipped.
* #onParsedCommit} and the following patch phase invocations are skipped.
*/
default void onFailedCommit(Analysis analysis) throws Exception {}
/**
Expand All @@ -235,9 +263,9 @@ default void endBatch(Analysis analysis) throws Exception {}
/**
* Runs {@code analyzeRepository} on each repository, skipping repositories where an analysis
* was already run. This skipping mechanism doesn't distinguish between different analyses as it
* only checks for the existence of {@link TOTAL_RESULTS_FILE_NAME}. Delete this file to rerun
* only checks for the existence of {@link #TOTAL_RESULTS_FILE_NAME}. Delete this file to rerun
* the analysis.
*
* <p>
* For each repository a directory in {@code outputDir} is passed to {@code analyzeRepository}
* where the results of the given repository should be written.
*
Expand Down Expand Up @@ -289,17 +317,17 @@ public static AnalysisResult forSingleCommit(final String commitHash, final Anal
AnalysisResult result = null;
try {
final RevCommit commit = analysis.differ.getCommit(commitHash);
result = analysis.processCommits(List.of(commit), analysis.differ);
analysis.processCommitBatch(List.of(commit));
result = analysis.getResult();
} catch (Exception e) {
Logger.error("Failed to analyze {}. Exiting.", commitHash);
System.exit(1);
}

final double runtime = clock.getPassedSeconds();
Logger.info("<<< done in {}", Clock.printPassedSeconds(runtime));

result.runtimeWithMultithreadingInSeconds = -1;
result.totalCommits = 1;

result.get(TotalNumberOfCommitsResult.KEY).value++;

exportMetadata(analysis.getOutputDir(), result);
return result;
Expand Down Expand Up @@ -339,8 +367,8 @@ public boolean beginPatch(Analysis analysis) {
}

/**
* Same as {@link forEachCommit(Supplier<Analysis>, int, int)}.
* Defaults to {@link COMMITS_TO_PROCESS_PER_THREAD_DEFAULT} and a machine dependent number of
* Same as {@link #forEachCommit(Supplier, int, int)}.
* Defaults to {@link #COMMITS_TO_PROCESS_PER_THREAD_DEFAULT} and a machine dependent number of
* {@link Diagnostics#getNumberOfAvailableProcessors}.
*/
public static AnalysisResult forEachCommit(Supplier<Analysis> analysis) {
Expand Down Expand Up @@ -370,6 +398,7 @@ public static AnalysisResult forEachCommit(
) {
var analysis = analysisFactory.get();
analysis.differ = new GitDiffer(analysis.getRepository());
analysis.result.append(RuntimeWithMultithreadingResult.KEY, new RuntimeWithMultithreadingResult());

final Clock clock = new Clock();

Expand All @@ -385,7 +414,12 @@ public static AnalysisResult forEachCommit(
),
/// 2.) Create a MiningTask for the list of commits. This task will then be processed by one
/// particular thread.
commitList -> () -> analysisFactory.get().processCommits(commitList, analysis.differ)
commitList -> () -> {
Analysis thisThreadsAnalysis = analysisFactory.get();
thisThreadsAnalysis.differ = analysis.differ;
thisThreadsAnalysis.processCommitBatch(commitList);
return thisThreadsAnalysis.getResult();
}
);
Logger.info("<<< done in {}", clock.printPassedSeconds());

Expand All @@ -411,8 +445,8 @@ public static AnalysisResult forEachCommit(
final double runtime = clock.getPassedSeconds();
Logger.info("<<< done in {}", Clock.printPassedSeconds(runtime));

analysis.getResult().runtimeWithMultithreadingInSeconds = runtime;
analysis.getResult().totalCommits = numberOfTotalCommits.invocationCount().get();
analysis.getResult().get(RuntimeWithMultithreadingResult.KEY).value = runtime;
// analysis.getResult().get(TotalNumberOfCommitsResult.KEY).value = numberOfTotalCommits.invocationCount().get();

exportMetadata(analysis.getOutputDir(), analysis.getResult());
return analysis.getResult();
Expand All @@ -435,39 +469,22 @@ public Analysis(
this.hooks = hooks;
this.repository = repository;
this.outputDir = outputDir;
this.result = new AnalysisResult();

this.result.repoName = repository.getRepositoryName();

this.result = new AnalysisResult(repository.getRepositoryName());
this.result.taskName = taskName;
this.result.append(TotalNumberOfCommitsResult.KEY, new TotalNumberOfCommitsResult());

for (var hook : hooks) {
hook.initializeResults(this);
}
}

/**
* Entry point into a sequential analysis of {@code commits} as one batch.
* Same as {@link processCommits(List<RevCommit>, GitDiffer)} with a default {@link GitDiffer}.
* Sequential analysis of all {@code commits} as one batch.
*
* @param commits the commit batch to be processed
* @see forEachCommit
* @see #forEachCommit
*/
public AnalysisResult processCommits(List<RevCommit> commits) throws Exception {
return processCommits(commits, new GitDiffer(getRepository()));
}

/**
* Entry point into a sequential analysis of {@code commits} as one batch.
*
* @param commits the commit batch to be processed
* @param differ the differ to use
* @see forEachCommit
*/
public AnalysisResult processCommits(List<RevCommit> commits, GitDiffer differ) throws Exception {
this.differ = differ;
processCommitBatch(commits);
return getResult();
}

protected void processCommitBatch(List<RevCommit> commits) throws Exception {
outputFile = outputDir.resolve(commits.get(0).getId().getName());

Expand Down Expand Up @@ -495,6 +512,9 @@ protected void processCommitBatch(List<RevCommit> commits) throws Exception {
}
} finally {
runReverseHook(batchHook, Hooks::endBatch);

// export the thread's result
getResult().exportTo(FileUtils.addExtension(outputFile, Analysis.EXTENSION));
}
}

Expand Down Expand Up @@ -540,6 +560,8 @@ protected void processCommit() throws Exception {
runReverseHook(patchHook, Hooks::endPatch);
}
}

getResult().get(TotalNumberOfCommitsResult.KEY).value++;
}

protected void processPatch() throws Exception {
Expand Down Expand Up @@ -590,7 +612,7 @@ protected <Hook> void runReverseHook(ListIterator<Hook> hook, FailableBiConsumer

/**
* Exports the given metadata object to a file named according
* {@link TOTAL_RESULTS_FILE_NAME} in the given directory.
* {@link #TOTAL_RESULTS_FILE_NAME} in the given directory.
* @param outputDir The directory into which the metadata object file should be written.
* @param metadata The metadata to serialize
* @param <T> Type of the metadata.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,6 @@ public final class AnalysisResult implements Metadata<AnalysisResult> {
*/
public String repoName = NO_REPO;
public String taskName;
/**
* The effective runtime in seconds that we have when using multithreading.
*/
public double runtimeWithMultithreadingInSeconds = 0;
/**
* The total number of commits in the observed history of the given repository.
*/
public int totalCommits = 0;
public final MergeMap<DiffError, Integer> diffErrors = new MergeMap<>(new HashMap<>(), Integer::sum);

private final Map<String, Metadata<?>> results = new HashMap<>();
Expand Down Expand Up @@ -106,8 +98,6 @@ public <T extends Metadata<T>> void append(ResultKey<T> resultKey, T value) {
return ar + "; " + br;
});
a.taskName = Metadata.mergeEqual(a.taskName, b.taskName);
a.runtimeWithMultithreadingInSeconds += b.runtimeWithMultithreadingInSeconds;
a.totalCommits += b.totalCommits;
a.diffErrors.append(b.diffErrors);
b.results.forEach((key, value) -> a.unsafeAppend(key, value));
};
Expand Down Expand Up @@ -147,12 +137,11 @@ public LinkedHashMap<String, Object> snapshot() {
LinkedHashMap<String, Object> snap = new LinkedHashMap<>();
snap.put(MetadataKeys.TASKNAME, taskName);
snap.put(MetadataKeys.REPONAME, repoName);
snap.put(MetadataKeys.RUNTIME_WITH_MULTITHREADING, runtimeWithMultithreadingInSeconds);
snap.put(MetadataKeys.TOTAL_COMMITS, totalCommits);

var statistics = get(StatisticsAnalysis.RESULT);
var globals = get(Analysis.TotalNumberOfCommitsResult.KEY);
if (statistics != null) {
snap.put(MetadataKeys.FILTERED_COMMITS, totalCommits - statistics.processedCommits - statistics.emptyCommits - statistics.failedCommits);
snap.put(MetadataKeys.FILTERED_COMMITS, globals.value - statistics.processedCommits - statistics.emptyCommits - statistics.failedCommits);
}

for (var result : results.values()) {
Expand All @@ -168,14 +157,6 @@ public void setFromSnapshot(LinkedHashMap<String, String> snap) {
repoName = snap.get(MetadataKeys.REPONAME);
taskName = snap.get(MetadataKeys.TASKNAME);

String runtime = snap.get(MetadataKeys.RUNTIME_WITH_MULTITHREADING);
if (runtime.endsWith("s")) {
runtime = runtime.substring(0, runtime.length() - 1);
}
runtimeWithMultithreadingInSeconds = Double.parseDouble(runtime);

totalCommits = Integer.parseInt(snap.get(MetadataKeys.TOTAL_COMMITS));

for (var entry : snap.entrySet()) {
String key = entry.getKey();
if (entry.getKey().startsWith(ERROR_BEGIN)) {
Expand Down
Loading

0 comments on commit 6edc5e1

Please sign in to comment.