Skip to content

Commit

Permalink
Fixes #62
Browse files Browse the repository at this point in the history
* Refactored artifact augmentation out

* Added the ability to inject github client in augmenters to have count of github stars

* The addgithubstars augmenter is now able to get stargazers on github, but how can we get them before a given date?

* Seems like I'm starting to get some good github stars history

* Seems like we *can* extract maven history with github stars

* Allow mappings to be added

* This file should NOT be commited

* Changed Date to LocaDate, because it's better ?

* Seems like it's working quite well for extracting today's numbers

* Seems like I can also extract today's Python artifacts this way!

* Fixing some small glitches

* Some times, the repo doesn't exist

* One more edge case!

* Some times, there are no stargazers
  • Loading branch information
Riduidel authored Oct 8, 2024
1 parent 0eec9ec commit 15033b4
Show file tree
Hide file tree
Showing 41 changed files with 2,502 additions and 195,548 deletions.
1 change: 1 addition & 0 deletions .github/workflows/get_mvnrepository_infos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ jobs:
mv artifacts.json target/artifacts.json
env:
VERSION: "${{steps.version.outputs.release}}"
TOKEN: "${{ secrets.GITHUB_TOKEN }}"

- name: Deploy
uses: s0/git-publish-subdir-action@develop
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/get_npmjs_infos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ jobs:
mv artifacts.json target/artifacts.json
env:
VERSION: "${{steps.version.outputs.release}}"
TOKEN: "${{ secrets.GITHUB_TOKEN }}"

- name: Deploy
uses: s0/git-publish-subdir-action@develop
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/get_pypi_infos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ jobs:
mv artifacts.json target/artifacts.json
env:
VERSION: "${{steps.version.outputs.release}}"
TOKEN: "${{ secrets.GITHUB_TOKEN }}"

- name: Deploy
uses: s0/git-publish-subdir-action@develop
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,6 @@ fabric.properties
**/.gitignore
.idea/
.cache/

*/artifacts.json
*/schema.json
9 changes: 9 additions & 0 deletions model/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.datatype</groupId>
<artifactId>jackson-datatype-jdk8</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.module/jackson-module-jsonSchema-jakarta -->
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
Expand Down Expand Up @@ -49,6 +53,11 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.kohsuke</groupId>
<artifactId>github-api</artifactId>
<version>1.308</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-jdk14</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package org.ndx.aadarchi.technology.detector.augmenters;

import java.time.LocalDate;

import org.ndx.aadarchi.technology.detector.loader.ExtractionContext;
import org.ndx.aadarchi.technology.detector.model.ArtifactDetails;
import org.ndx.aadarchi.technology.detector.model.ArtifactDetailsBuilder;

/**
* An augmenter has the capability, given a context, to add informations to an
Expand All @@ -15,6 +18,13 @@ public interface Augmenter {
public default int order() {
return 1000;
}

public ArtifactDetails augment(ExtractionContext context, ArtifactDetails source);

/**
* Augment the given artifact at a given date
* @param context augmentation context (contains utilities)
* @param source artifact to augment
* @param date date at which we want to have our artifact augmented
* @return an updated artifact, typically created with {@link ArtifactDetailsBuilder#toBuilder(ArtifactDetails)}
*/
public ArtifactDetails augment(ExtractionContext context, ArtifactDetails source, LocalDate date);
}
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
package org.ndx.aadarchi.technology.detector.augmenters;

import java.time.LocalDate;
import java.util.Collection;
import java.util.Comparator;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import java.util.ServiceLoader;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import org.ndx.aadarchi.technology.detector.loader.ExtractionContext;
import org.ndx.aadarchi.technology.detector.model.ArtifactDetails;

public class Augmenters {
private static ServiceLoader<Augmenter> augmentersLoader
= ServiceLoader.load(Augmenter.class);
Expand All @@ -20,4 +26,19 @@ public static List<Augmenter> getAugmenters() {
.collect(Collectors.toList()));
return loaded.get();
}

public static <Context extends ExtractionContext> Collection<ArtifactDetails> augmentArtifacts(Context context,
Collection<ArtifactDetails> artifacts, LocalDate date) {
List<ArtifactDetails> augmented = artifacts.stream()
.map(a -> augmentArtifact(context, a, date))
.collect(Collectors.toList());
return augmented;
}

private static <Context extends ExtractionContext> ArtifactDetails augmentArtifact(Context context, ArtifactDetails artifactdetails, LocalDate date) {
for(Augmenter a : Augmenters.getAugmenters()) {
artifactdetails = a.augment(context, artifactdetails, date);
}
return artifactdetails;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package org.ndx.aadarchi.technology.detector.augmenters.github;

import java.time.LocalDate;
import java.util.Date;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
import java.util.logging.Logger;

import org.ndx.aadarchi.technology.detector.augmenters.Augmenter;
import org.ndx.aadarchi.technology.detector.loader.ExtractionContext;
import org.ndx.aadarchi.technology.detector.model.ArtifactDetails;
import org.ndx.aadarchi.technology.detector.model.ArtifactDetailsBuilder;
import org.ndx.aadarchi.technology.detector.model.GitHubDetailsBuilder;

import io.github.emilyydev.asp.ProvidesService;

@ProvidesService(Augmenter.class)
public class AddGitHub implements Augmenter {
private static final Logger logger = Logger.getLogger(AddGitHub.class.getName());
private final Set<String> alreadyLoggedProjects = new TreeSet<>();
public static final int ADD_GITHUB_OBJECT = 100;

@Override
public int order() {
return ADD_GITHUB_OBJECT;
}

@Override
public ArtifactDetails augment(ExtractionContext context, ArtifactDetails source, LocalDate date) {
if(source.getGithubDetails()==null) {
if(GitHubProjects.contains(source)) {
return doAugment(context, source,GitHubProjects.getGitHubPath(source), date);
} else if(source.getUrls()!=null && source.getUrls().containsKey("github.com")) {
return doAugment(context, source, GitHubProjects.getGitHubPath(source.getUrls().get("github.com")), date);
} else {
if(!alreadyLoggedProjects.contains(source.getIdentifier())) {
logger.warning(String.format("There doesn't seems to be any github repo for "+source.getIdentifier()));
alreadyLoggedProjects.add(source.getIdentifier());
}
}
}
return source;
}

private ArtifactDetails doAugment(ExtractionContext context, ArtifactDetails source, String path, LocalDate date) {
ArtifactDetailsBuilder builder = ArtifactDetailsBuilder.toBuilder(source);
return builder.githubDetails(GitHubDetailsBuilder.gitHubDetails()
.stargazers(Optional.empty())
.path(path)
.build())
.build();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package org.ndx.aadarchi.technology.detector.augmenters.github;

import java.io.File;
import java.io.IOException;
import java.time.Duration;
import java.time.LocalDate;
import java.time.Period;
import java.time.ZoneOffset;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import org.kohsuke.github.GHFileNotFoundException;
import org.kohsuke.github.GHRepository;
import org.kohsuke.github.GHStargazer;
import org.kohsuke.github.PagedIterable;
import org.ndx.aadarchi.technology.detector.augmenters.Augmenter;
import org.ndx.aadarchi.technology.detector.helper.FileHelper;
import org.ndx.aadarchi.technology.detector.loader.ExtractionContext;
import org.ndx.aadarchi.technology.detector.model.ArtifactDetails;
import org.ndx.aadarchi.technology.detector.model.GitHubDetails;

import com.fasterxml.jackson.core.type.TypeReference;

import io.github.emilyydev.asp.ProvidesService;

@ProvidesService(Augmenter.class)
public class AddGitHubStarsAtPeriod implements Augmenter {
private static final Logger logger = Logger.getLogger(AddGitHubStarsAtPeriod.class.getName());

@Override
public int order() {
return AddGitHub.ADD_GITHUB_OBJECT+1;
}

@Override
public ArtifactDetails augment(ExtractionContext context, ArtifactDetails source, LocalDate date) {
if(source.getGithubDetails()!=null) {
return doAugment(context, source, date);
}
return source;
}

private ArtifactDetails doAugment(ExtractionContext context, ArtifactDetails source, LocalDate date) {
GitHubDetails githubDetails = source.getGithubDetails();
if(githubDetails.getStargazers().isEmpty()) {
// We have a special edge case to distinguish between
// history rebuilding and standard data fetching.
// When getting this month stargazers, it's way faster to ask
// GitHub directly instead of getting the precise list of stargazers
LocalDate now = LocalDate.now();
Period period = Period.between(date, now);
if(period.toTotalMonths()>0) {
extractStargazersHistory(context, source, date, githubDetails);
} else {
extractStargazersToday(context, source, githubDetails);
}
}
return source;
}

private void extractStargazersToday(ExtractionContext context, ArtifactDetails source, GitHubDetails githubDetails) {
try {
GHRepository repository = context.getGithub().getRepository(source.getGithubDetails().getPath());
githubDetails.setStargazers(Optional.of(repository.getStargazersCount()));
} catch (IOException e) {
logger.log(Level.WARNING, String.format("Can't get stargazers count for artifact %s (supposedly at %s)", source.getCoordinates(), githubDetails.getPath()), e);
}
}

private void extractStargazersHistory(ExtractionContext context, ArtifactDetails source, LocalDate date,
GitHubDetails githubDetails) {
Date old = Date.from(date.atStartOfDay(ZoneOffset.UTC).toInstant());
List<Stargazer> allStargazers = getAllStargazers(context, source, githubDetails);
long numberOfStargazersBefore = allStargazers.stream()
.filter(s -> s.getStarredAt().compareTo(old)<0)
.count();
githubDetails.setStargazers(Optional.of((int) numberOfStargazersBefore));
}

private List<Stargazer> getAllStargazers(ExtractionContext context, ArtifactDetails source, GitHubDetails details) {
File cache = context.getCache()
.resolve("github")
.resolve(details.getPath())
.resolve("stargazers.json")
.toFile();
cache.getParentFile().mkdirs();
if(!cache.exists() || cache.lastModified()<System.currentTimeMillis()-Duration.ofDays(7).toMillis()) {
List<Stargazer> stargazers = doGetAllStargazers(context, details.getPath());
try {
FileHelper.writeToFile(stargazers, cache);
} catch (IOException e) {
throw new RuntimeException("Can't write stargazers to "+cache.getAbsolutePath(), e);
}
}
try {
return FileHelper.readFromFile(cache, new TypeReference<List<Stargazer>>() {});
} catch (IOException e) {
throw new RuntimeException("Can't read stargazers from "+cache.getAbsolutePath(), e);
}
}

private List<Stargazer> doGetAllStargazers(ExtractionContext context, String githubRepositoryUrl) {
try {
logger.info("Fetching stargazers history of "+githubRepositoryUrl);
GHRepository repository = context.getGithub().getRepository(githubRepositoryUrl);
int total = repository.getStargazersCount();
PagedIterable<GHStargazer> stargazers = repository
.listStargazers2()
.withPageSize(100);
AtomicInteger atomic = new AtomicInteger(0);
List<Stargazer> allStargazers = StreamSupport.stream(stargazers.spliterator(), false)
.peek(consumer -> {
int current = atomic.incrementAndGet();
if(current%100==0) {
logger.info(String.format("Fetched %d/%d stargazers of %s",
current, total, githubRepositoryUrl));
}
})
.map(s -> new Stargazer(s))
.sorted()
.collect(Collectors.toList());
return allStargazers;
} catch (GHFileNotFoundException e) {
logger.log(Level.SEVERE, "Weirdly, repository "+githubRepositoryUrl+" doesn't seems to exist");
return Collections.emptyList();
} catch (IOException e) {
throw new RuntimeException("TODO handle IOException", e);
} finally {
logger.info("Fetched stargazers history of "+githubRepositoryUrl);
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.ndx.aadarchi.technology.detector.augmenters.github;

import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
import java.util.function.Function;

import org.ndx.aadarchi.technology.detector.model.ArtifactDetails;

public class GitHubProjects {
public static final String GITHUB_REPOSITORIES = "github.repositories.properties";

private static final Properties githubProjects;

static {
githubProjects = new Properties();
if(GitHubProjects.class.getClassLoader().getResource(GITHUB_REPOSITORIES)!=null) {
try(InputStream input = GitHubProjects.class.getClassLoader().getResourceAsStream(GITHUB_REPOSITORIES)) {
githubProjects.load(input);
} catch (IOException e) {
throw new RuntimeException("Can't read "+GITHUB_REPOSITORIES, e);
}
}
}

public static Properties get() {
return githubProjects;
}

public static String getGitHubPath(ArtifactDetails details) {
for(Function<ArtifactDetails, String> extractor : ArtifactDetails.GITHUB_REPO_EXTRACTORS) {
String key = extractor.apply(details);
if(key!=null)
return getGitHubPath(get().getProperty(key));
}
return null;
}

public static String getGitHubPath(String githubRepositoryUrl) {
String GITHUB = "github.com/";
String returned = githubRepositoryUrl.substring(githubRepositoryUrl.indexOf(GITHUB)+GITHUB.length());
String[] parts = returned.split("[/#]");
if(parts.length<=2) {
return returned;
} else {
return parts[0]+"/"+parts[1];
}
}

public static boolean contains(ArtifactDetails source) {
for(Function<ArtifactDetails, String> extractor : ArtifactDetails.GITHUB_REPO_EXTRACTORS) {
String key = extractor.apply(source);
if(key!=null && get().containsKey(key))
return true;
}
return false;
}

}
Loading

0 comments on commit 15033b4

Please sign in to comment.