From c577cd7a98cd98bed5a8ad7ce400f90e4a2825ab Mon Sep 17 00:00:00 2001 From: Muhammad Saad Shamim Date: Mon, 16 Jan 2023 22:33:24 -0600 Subject: [PATCH] motif assignment --- src/cli/Main.java | 5 +- src/cli/clt/loops/MotifAssignment.java | 72 +++++++++++++++++++++ src/cli/utils/motifs/IndexedBedFile.java | 80 ++++++++++++++++++++++++ 3 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 src/cli/clt/loops/MotifAssignment.java create mode 100644 src/cli/utils/motifs/IndexedBedFile.java diff --git a/src/cli/Main.java b/src/cli/Main.java index 36ac1db..d9fd7c6 100644 --- a/src/cli/Main.java +++ b/src/cli/Main.java @@ -12,7 +12,7 @@ public class Main { - public static final String VERSION_NUM = "0.102.0"; + public static final String VERSION_NUM = "0.103.0"; public static boolean printVerboseComments = false; public static void printGeneralUsageAndExit(int exitCode, String cUsage) { @@ -90,6 +90,9 @@ public static void main(String[] argv) throws CmdLineParser.UnknownOptionExcepti } else if (command.startsWith("apa")) { APA apa = new APA(args, parser, false); apa.run(); + } else if (command.startsWith("assign") && command.contains("motif")) { + MotifAssignment motif = new MotifAssignment(args, parser); + motif.run(); } else if (command.startsWith("ata")) { ATA ata = new ATA(args, parser); ata.run(); diff --git a/src/cli/clt/loops/MotifAssignment.java b/src/cli/clt/loops/MotifAssignment.java new file mode 100644 index 0000000..70b1b6e --- /dev/null +++ b/src/cli/clt/loops/MotifAssignment.java @@ -0,0 +1,72 @@ +package cli.clt.loops; + +import cli.clt.CommandLineParser; +import cli.utils.motifs.IndexedBedFile; +import javastraw.feature2D.Feature2D; +import javastraw.feature2D.Feature2DList; +import javastraw.feature2D.Feature2DParser; +import javastraw.reader.basics.Chromosome; +import javastraw.reader.basics.ChromosomeHandler; +import javastraw.reader.basics.ChromosomeTools; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class MotifAssignment { + + public static String usage = "assign-motifs [--window val] " + + " "; + protected final int window; + private final ChromosomeHandler handler; + private final Feature2DList loopList; + private final int binSize; + private final String outFile; + private Map>> upBed; + private Map>> downBed; + + public MotifAssignment(String[] args, CommandLineParser parser) { + if (args.length != 6) { + System.out.println(usage); + System.exit(6); + } + + window = parser.getWindowSizeOption(250); + binSize = 3 * window; + handler = ChromosomeTools.loadChromosomes(args[1]); + loopList = Feature2DParser.loadFeatures(args[2], handler, + true, null, false); + outFile = args[5]; + try { + upBed = IndexedBedFile.index(args[3], handler, binSize); + downBed = IndexedBedFile.index(args[4], handler, binSize); + } catch (Exception e) { + e.printStackTrace(); + System.exit(7); + } + } + + public void run() { + Feature2DList result = new Feature2DList(); + for (Chromosome chromosome : handler.getChromosomeArrayWithoutAllByAll()) { + List chrLoops = loopList.get(chromosome.getIndex(), chromosome.getIndex()); + List loopsToSave = new ArrayList<>(); + Map> upMotifs = upBed.get(chromosome.getIndex()); + Map> downMotifs = downBed.get(chromosome.getIndex()); + + for (Feature2D loop : chrLoops) { + int[] upMotif = IndexedBedFile.getUniqueMotif(loop.getAttribute("localX"), upMotifs, binSize, window); + int[] downMotif = IndexedBedFile.getUniqueMotif(loop.getAttribute("localY"), downMotifs, binSize, window); + if (upMotif != null && downMotif != null) { + IndexedBedFile.setMotifAttributes(loop, upMotif, true); + IndexedBedFile.setMotifAttributes(loop, downMotif, false); + loopsToSave.add(loop); + } + } + + result.addByKey(Feature2DList.getKey(chromosome, chromosome), loopsToSave); + } + result.exportFeatureList(new File(outFile), false, Feature2DList.ListFormat.NA); + } +} diff --git a/src/cli/utils/motifs/IndexedBedFile.java b/src/cli/utils/motifs/IndexedBedFile.java new file mode 100644 index 0000000..c22d559 --- /dev/null +++ b/src/cli/utils/motifs/IndexedBedFile.java @@ -0,0 +1,80 @@ +package cli.utils.motifs; + +import cli.utils.general.IGVTools; +import javastraw.feature2D.Feature2D; +import javastraw.reader.basics.Chromosome; +import javastraw.reader.basics.ChromosomeHandler; +import org.broad.igv.feature.IGVFeature; +import org.broad.igv.feature.tribble.TribbleIndexNotFoundException; +import org.broad.igv.track.TribbleFeatureSource; + +import java.io.IOException; +import java.util.*; + +public class IndexedBedFile { + + private static final int MIDPT = 0; + private static final int START = 1; + private static final int END = 2; + + public static Map>> index(String inputBedFile, ChromosomeHandler handler, int binSize) throws IOException, TribbleIndexNotFoundException { + TribbleFeatureSource bedFile = IGVTools.loadBed(inputBedFile, handler); + Chromosome[] chromosomes = handler.getAutosomalChromosomesArray(); + Map>> bedMap = new HashMap<>(); + for (Chromosome chromosome : chromosomes) { + Map> binToMotif = new HashMap<>(); + Iterator iterator = bedFile.getFeatures(chromosome.getName(), 0, (int) chromosome.getLength()); + while (iterator.hasNext()) { + IGVFeature feature = (IGVFeature) iterator.next(); + int start = feature.getStart(); + int end = feature.getEnd(); + int mid = (start + end) / 2; + binToMotif.computeIfAbsent(mid / binSize, + k -> new LinkedList<>()).add(new int[]{mid, start, end}); + } + bedMap.put(chromosome.getIndex(), binToMotif); + } + return bedMap; + } + + public static int[] getUniqueMotif(String localPos, Map> binnedMotifs, + int binSize, int window) { + try { + long x = Long.parseLong(localPos); + int bin = (int) (x / binSize); + + List motifs = new ArrayList<>(); + for (int i = bin - 1; i < bin + 2; i++) { + if (binnedMotifs.containsKey(i)) { + for (int[] motif : binnedMotifs.get(i)) { + if (Math.abs(motif[MIDPT] - x) < window) { + motifs.add(motif); + } + } + } + } + if (motifs.size() == 1) { + return motifs.get(0); + } + } catch (Exception e) { + return null; + } + return null; + } + + public static void setMotifAttributes(Feature2D loop, int[] motif, boolean isUpStream) { + String startKey, endKey, midKey; + if (isUpStream) { + startKey = "motif_start_1"; + endKey = "motif_end_1"; + midKey = "motif_mid_1"; + } else { + startKey = "motif_start_2"; + endKey = "motif_end_2"; + midKey = "motif_mid_2"; + } + loop.setAttribute(startKey, "" + motif[START]); + loop.setAttribute(endKey, "" + motif[END]); + loop.setAttribute(midKey, "" + motif[MIDPT]); + } +}