From ec78606b8705a96cfa31b0583b3a0013be5c4c11 Mon Sep 17 00:00:00 2001 From: Muhammad Saad Shamim Date: Wed, 18 Jan 2023 10:39:10 -0600 Subject: [PATCH] update fixes --- src/cli/Main.java | 4 +-- src/cli/clt/bedpe/AnchorFix.java | 44 +++++------------------- src/cli/clt/loops/MotifAssignment.java | 28 ++++++++++----- src/cli/utils/motifs/IndexedBedFile.java | 25 +++++++++++--- 4 files changed, 51 insertions(+), 50 deletions(-) diff --git a/src/cli/Main.java b/src/cli/Main.java index d9fd7c6..7425500 100644 --- a/src/cli/Main.java +++ b/src/cli/Main.java @@ -12,7 +12,7 @@ public class Main { - public static final String VERSION_NUM = "0.103.0"; + public static final String VERSION_NUM = "0.104.0"; public static boolean printVerboseComments = false; public static void printGeneralUsageAndExit(int exitCode, String cUsage) { @@ -91,7 +91,7 @@ public static void main(String[] argv) throws CmdLineParser.UnknownOptionExcepti APA apa = new APA(args, parser, false); apa.run(); } else if (command.startsWith("assign") && command.contains("motif")) { - MotifAssignment motif = new MotifAssignment(args, parser); + MotifAssignment motif = new MotifAssignment(args, parser, command); motif.run(); } else if (command.startsWith("ata")) { ATA ata = new ATA(args, parser); diff --git a/src/cli/clt/bedpe/AnchorFix.java b/src/cli/clt/bedpe/AnchorFix.java index 3030293..a0c253c 100644 --- a/src/cli/clt/bedpe/AnchorFix.java +++ b/src/cli/clt/bedpe/AnchorFix.java @@ -4,28 +4,26 @@ import cli.clt.CommandLineParser; import cli.utils.clique.Node95; import cli.utils.clique.SimpleClustering; -import cli.utils.peaks.Point1D; import javastraw.feature2D.Feature2D; import javastraw.feature2D.Feature2DList; import javastraw.feature2D.Feature2DParser; import javastraw.reader.basics.Chromosome; import javastraw.reader.basics.ChromosomeHandler; import javastraw.reader.basics.ChromosomeTools; -import org.apache.commons.math3.ml.clustering.Cluster; -import org.apache.commons.math3.ml.clustering.DBSCANClusterer; import java.io.File; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; public class AnchorFix { - private static final int widthToConnect = 2; public static String usage = "anchor-fix[-clean] [-r resolution] \n" + "\t\tdefault behavior will fix the hi-res shared anchors for loops\n" + "\t\tclean avoids saving old attributes"; private static int resolution = 100; public static int MAX_DIST = 250; - public static int CLUSTER_DIST = 100; public static void run(String[] args, CommandLineParser parser, String command) { if (args.length != 4) { @@ -48,7 +46,6 @@ private static void fixAnchors(String inputBedpe, String genomeID, String outSte if (Main.printVerboseComments) System.out.println("Processing " + chrom.getName()); List loops = loopList.get(chrom.getIndex(), chrom.getIndex()); if (loops.size() > 0) { - if (true) System.out.println("Processing " + chrom.getName()); List newLoops = recoverLoops(loops); output.addByKey(Feature2DList.getKey(chrom, chrom), newLoops); } @@ -64,9 +61,9 @@ private static List recoverLoops(List loops) { allAnchorBins.addAll(upStreamAnchorBins); allAnchorBins.addAll(downStreamAnchorBins); - List upStreamNodes = getNodes(upStreamAnchorBins); - List downStreamNodes = getNodes(downStreamAnchorBins); - List allNodes = getNodes(allAnchorBins); + List upStreamNodes = getNodes(upStreamAnchorBins, resolution); + List downStreamNodes = getNodes(downStreamAnchorBins, resolution); + List allNodes = getNodes(allAnchorBins, resolution); return fixedList(loops, upStreamNodes, downStreamNodes, allNodes); } @@ -123,8 +120,8 @@ private static List getAllOfFeature(List loops, String attribut } - public static List getNodes(List genomePositions) { - List> clusters = SimpleClustering.cluster(genomePositions, 100); + public static List getNodes(List genomePositions, int resolution) { + List> clusters = SimpleClustering.cluster(genomePositions, resolution); List pointsToReassign = new ArrayList<>(clusters.size() / 2); List> clustersToKeep = new ArrayList<>(clusters.size() / 2); for (List cluster : clusters) { @@ -173,29 +170,6 @@ private static Node95 getNearestNode(long point, List nodes, int maxDist return null; } - private static List> dbscan1D(List points) { - DBSCANClusterer dbscan = new DBSCANClusterer<>(CLUSTER_DIST, 1); - return dbscan.cluster(convert(points)); - } - - private static Collection convert(List points) { - List converted = new ArrayList<>(points.size()); - for (Long point : points) { - converted.add(new Point1D(point)); - } - return converted; - } - - private static int[] getCounts(Map> counts, int maxBin) { - int[] countsTrack = new int[maxBin]; - for (int i = 0; i < maxBin; i++) { - if (counts.containsKey(i)) { - countsTrack[i] = counts.get(i).size(); - } - } - return countsTrack; - } - public static Map buildPositionToNodeMapping(List nodes) { Map mapping = new HashMap<>(); for (Node95 node : nodes) { diff --git a/src/cli/clt/loops/MotifAssignment.java b/src/cli/clt/loops/MotifAssignment.java index 70b1b6e..37b5b3b 100644 --- a/src/cli/clt/loops/MotifAssignment.java +++ b/src/cli/clt/loops/MotifAssignment.java @@ -16,7 +16,7 @@ public class MotifAssignment { - public static String usage = "assign-motifs [--window val] " + + public static String usage = "assign-motifs[-permissive] [--window val] " + " "; protected final int window; private final ChromosomeHandler handler; @@ -25,13 +25,15 @@ public class MotifAssignment { private final String outFile; private Map>> upBed; private Map>> downBed; + private final boolean isPermissive; - public MotifAssignment(String[] args, CommandLineParser parser) { + public MotifAssignment(String[] args, CommandLineParser parser, String command) { if (args.length != 6) { System.out.println(usage); System.exit(6); } + isPermissive = command.contains("permissive"); window = parser.getWindowSizeOption(250); binSize = 3 * window; handler = ChromosomeTools.loadChromosomes(args[1]); @@ -49,6 +51,7 @@ public MotifAssignment(String[] args, CommandLineParser parser) { public void run() { Feature2DList result = new Feature2DList(); + int n = 0; for (Chromosome chromosome : handler.getChromosomeArrayWithoutAllByAll()) { List chrLoops = loopList.get(chromosome.getIndex(), chromosome.getIndex()); List loopsToSave = new ArrayList<>(); @@ -56,17 +59,26 @@ public void run() { Map> downMotifs = downBed.get(chromosome.getIndex()); for (Feature2D loop : chrLoops) { - int[] upMotif = IndexedBedFile.getUniqueMotif(loop.getAttribute("localX"), upMotifs, binSize, window); - int[] downMotif = IndexedBedFile.getUniqueMotif(loop.getAttribute("localY"), downMotifs, binSize, window); - if (upMotif != null && downMotif != null) { - IndexedBedFile.setMotifAttributes(loop, upMotif, true); - IndexedBedFile.setMotifAttributes(loop, downMotif, false); - loopsToSave.add(loop); + try { + long q1 = Long.parseLong(loop.getAttribute("localX")); + long q2 = Long.parseLong(loop.getAttribute("localY")); + if (q1 > 0 && q2 > 0) { + n++; + int[] upMotif = IndexedBedFile.getUniqueMotif(q1, upMotifs, binSize, window, isPermissive); + int[] downMotif = IndexedBedFile.getUniqueMotif(q2, downMotifs, binSize, window, isPermissive); + if (upMotif != null && downMotif != null) { + IndexedBedFile.setMotifAttributes(loop, upMotif, true); + IndexedBedFile.setMotifAttributes(loop, downMotif, false); + loopsToSave.add(loop); + } + } + } catch (Exception ignored) { } } result.addByKey(Feature2DList.getKey(chromosome, chromosome), loopsToSave); } + System.out.println("Number of loops with localization: " + n); result.exportFeatureList(new File(outFile), false, Feature2DList.ListFormat.NA); } } diff --git a/src/cli/utils/motifs/IndexedBedFile.java b/src/cli/utils/motifs/IndexedBedFile.java index c22d559..b41b564 100644 --- a/src/cli/utils/motifs/IndexedBedFile.java +++ b/src/cli/utils/motifs/IndexedBedFile.java @@ -37,17 +37,16 @@ public static Map>> index(String inputBedFile, return bedMap; } - public static int[] getUniqueMotif(String localPos, Map> binnedMotifs, - int binSize, int window) { + public static int[] getUniqueMotif(long position, Map> binnedMotifs, + int binSize, int window, boolean isPermissive) { try { - long x = Long.parseLong(localPos); - int bin = (int) (x / binSize); + int bin = (int) (position / binSize); List motifs = new ArrayList<>(); for (int i = bin - 1; i < bin + 2; i++) { if (binnedMotifs.containsKey(i)) { for (int[] motif : binnedMotifs.get(i)) { - if (Math.abs(motif[MIDPT] - x) < window) { + if (Math.abs(motif[MIDPT] - position) < window) { motifs.add(motif); } } @@ -55,6 +54,8 @@ public static int[] getUniqueMotif(String localPos, Map> bi } if (motifs.size() == 1) { return motifs.get(0); + } else if (motifs.size() > 1 && isPermissive) { + return returnClosest(position, motifs); } } catch (Exception e) { return null; @@ -62,6 +63,20 @@ public static int[] getUniqueMotif(String localPos, Map> bi return null; } + private static int[] returnClosest(long position, List motifs) { + int[] closest = motifs.get(0); + long minDist = Math.abs(closest[MIDPT] - position); + + for (int[] motif : motifs) { + long dist = Math.abs(motif[MIDPT] - position); + if (dist < minDist) { + minDist = dist; + closest = motif; + } + } + return closest; + } + public static void setMotifAttributes(Feature2D loop, int[] motif, boolean isUpStream) { String startKey, endKey, midKey; if (isUpStream) {