Skip to content

Commit

Permalink
motif assignment
Browse files Browse the repository at this point in the history
  • Loading branch information
sa501428 committed Jan 17, 2023
1 parent 33eada4 commit c577cd7
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 1 deletion.
5 changes: 4 additions & 1 deletion src/cli/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

public class Main {

public static final String VERSION_NUM = "0.102.0";
public static final String VERSION_NUM = "0.103.0";
public static boolean printVerboseComments = false;

public static void printGeneralUsageAndExit(int exitCode, String cUsage) {
Expand Down Expand Up @@ -90,6 +90,9 @@ public static void main(String[] argv) throws CmdLineParser.UnknownOptionExcepti
} else if (command.startsWith("apa")) {
APA apa = new APA(args, parser, false);
apa.run();
} else if (command.startsWith("assign") && command.contains("motif")) {
MotifAssignment motif = new MotifAssignment(args, parser);
motif.run();
} else if (command.startsWith("ata")) {
ATA ata = new ATA(args, parser);
ata.run();
Expand Down
72 changes: 72 additions & 0 deletions src/cli/clt/loops/MotifAssignment.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package cli.clt.loops;

import cli.clt.CommandLineParser;
import cli.utils.motifs.IndexedBedFile;
import javastraw.feature2D.Feature2D;
import javastraw.feature2D.Feature2DList;
import javastraw.feature2D.Feature2DParser;
import javastraw.reader.basics.Chromosome;
import javastraw.reader.basics.ChromosomeHandler;
import javastraw.reader.basics.ChromosomeTools;

import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

public class MotifAssignment {

public static String usage = "assign-motifs [--window val] <genomeID> <loops.bedpe> " +
"<upstream.motifs.bed> <downstream.motifs.bed> <output.bedpe>";
protected final int window;
private final ChromosomeHandler handler;
private final Feature2DList loopList;
private final int binSize;
private final String outFile;
private Map<Integer, Map<Integer, List<int[]>>> upBed;
private Map<Integer, Map<Integer, List<int[]>>> downBed;

public MotifAssignment(String[] args, CommandLineParser parser) {
if (args.length != 6) {
System.out.println(usage);
System.exit(6);
}

window = parser.getWindowSizeOption(250);
binSize = 3 * window;
handler = ChromosomeTools.loadChromosomes(args[1]);
loopList = Feature2DParser.loadFeatures(args[2], handler,
true, null, false);
outFile = args[5];
try {
upBed = IndexedBedFile.index(args[3], handler, binSize);
downBed = IndexedBedFile.index(args[4], handler, binSize);
} catch (Exception e) {
e.printStackTrace();
System.exit(7);
}
}

public void run() {
Feature2DList result = new Feature2DList();
for (Chromosome chromosome : handler.getChromosomeArrayWithoutAllByAll()) {
List<Feature2D> chrLoops = loopList.get(chromosome.getIndex(), chromosome.getIndex());
List<Feature2D> loopsToSave = new ArrayList<>();
Map<Integer, List<int[]>> upMotifs = upBed.get(chromosome.getIndex());
Map<Integer, List<int[]>> downMotifs = downBed.get(chromosome.getIndex());

for (Feature2D loop : chrLoops) {
int[] upMotif = IndexedBedFile.getUniqueMotif(loop.getAttribute("localX"), upMotifs, binSize, window);
int[] downMotif = IndexedBedFile.getUniqueMotif(loop.getAttribute("localY"), downMotifs, binSize, window);
if (upMotif != null && downMotif != null) {
IndexedBedFile.setMotifAttributes(loop, upMotif, true);
IndexedBedFile.setMotifAttributes(loop, downMotif, false);
loopsToSave.add(loop);
}
}

result.addByKey(Feature2DList.getKey(chromosome, chromosome), loopsToSave);
}
result.exportFeatureList(new File(outFile), false, Feature2DList.ListFormat.NA);
}
}
80 changes: 80 additions & 0 deletions src/cli/utils/motifs/IndexedBedFile.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package cli.utils.motifs;

import cli.utils.general.IGVTools;
import javastraw.feature2D.Feature2D;
import javastraw.reader.basics.Chromosome;
import javastraw.reader.basics.ChromosomeHandler;
import org.broad.igv.feature.IGVFeature;
import org.broad.igv.feature.tribble.TribbleIndexNotFoundException;
import org.broad.igv.track.TribbleFeatureSource;

import java.io.IOException;
import java.util.*;

public class IndexedBedFile {

private static final int MIDPT = 0;
private static final int START = 1;
private static final int END = 2;

public static Map<Integer, Map<Integer, List<int[]>>> index(String inputBedFile, ChromosomeHandler handler, int binSize) throws IOException, TribbleIndexNotFoundException {
TribbleFeatureSource bedFile = IGVTools.loadBed(inputBedFile, handler);
Chromosome[] chromosomes = handler.getAutosomalChromosomesArray();
Map<Integer, Map<Integer, List<int[]>>> bedMap = new HashMap<>();
for (Chromosome chromosome : chromosomes) {
Map<Integer, List<int[]>> binToMotif = new HashMap<>();
Iterator<?> iterator = bedFile.getFeatures(chromosome.getName(), 0, (int) chromosome.getLength());
while (iterator.hasNext()) {
IGVFeature feature = (IGVFeature) iterator.next();
int start = feature.getStart();
int end = feature.getEnd();
int mid = (start + end) / 2;
binToMotif.computeIfAbsent(mid / binSize,
k -> new LinkedList<>()).add(new int[]{mid, start, end});
}
bedMap.put(chromosome.getIndex(), binToMotif);
}
return bedMap;
}

public static int[] getUniqueMotif(String localPos, Map<Integer, List<int[]>> binnedMotifs,
int binSize, int window) {
try {
long x = Long.parseLong(localPos);
int bin = (int) (x / binSize);

List<int[]> motifs = new ArrayList<>();
for (int i = bin - 1; i < bin + 2; i++) {
if (binnedMotifs.containsKey(i)) {
for (int[] motif : binnedMotifs.get(i)) {
if (Math.abs(motif[MIDPT] - x) < window) {
motifs.add(motif);
}
}
}
}
if (motifs.size() == 1) {
return motifs.get(0);
}
} catch (Exception e) {
return null;
}
return null;
}

public static void setMotifAttributes(Feature2D loop, int[] motif, boolean isUpStream) {
String startKey, endKey, midKey;
if (isUpStream) {
startKey = "motif_start_1";
endKey = "motif_end_1";
midKey = "motif_mid_1";
} else {
startKey = "motif_start_2";
endKey = "motif_end_2";
midKey = "motif_mid_2";
}
loop.setAttribute(startKey, "" + motif[START]);
loop.setAttribute(endKey, "" + motif[END]);
loop.setAttribute(midKey, "" + motif[MIDPT]);
}
}

0 comments on commit c577cd7

Please sign in to comment.