Skip to content

Commit

Permalink
create BEDUtilities for FASTAExtract
Browse files Browse the repository at this point in the history
create a new class for shared methods to be used across script classes, starting with FASTAExtract's loadCoord method.

This static method loads a BED file into an ArrayList of BEDCoord objects.

Other minor changes:
- CLI PrintStream object set to STDERR
  • Loading branch information
owlang committed Nov 26, 2022
1 parent 06bfee1 commit 40d3ef4
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 51 deletions.
2 changes: 1 addition & 1 deletion src/cli/Sequence_Analysis/FASTAExtractCLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public Integer call() throws Exception {
System.exit(1);
}

FASTAExtract script_obj = new FASTAExtract(genomeFASTA, bedFile, output, forceStrand, bedHeader, null);
FASTAExtract script_obj = new FASTAExtract(genomeFASTA, bedFile, output, forceStrand, bedHeader, System.err);
script_obj.run();

System.err.println("Extraction Complete.");
Expand Down
54 changes: 4 additions & 50 deletions src/scripts/Sequence_Analysis/FASTAExtract.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Scanner;

import util.FASTAUtilities;
import util.BEDUtilities;

public class FASTAExtract {
private File GENOME = null;
Expand Down Expand Up @@ -41,19 +40,16 @@ public FASTAExtract(File gen, File b, File out, boolean str, boolean head, Print
}

public void run() throws IOException, InterruptedException {

if (PS == null)
PS = System.err;
System.out.println("STRAND:" + STRAND);
System.out.println("COORD:" + HEADER);
PS.println("STRAND:" + STRAND);
PS.println("COORD:" + HEADER);

try {
IndexedFastaSequenceFile QUERY = new IndexedFastaSequenceFile(GENOME);
PS.println("Proccessing File: " + BED.getName());
// Open Output File
OUT = new PrintStream(OUTFILE);

ArrayList<BEDCoord> BED_Coord = loadCoord(BED);
ArrayList<BEDCoord> BED_Coord = BEDUtilities.loadCoord(BED, HEADER);

for (int y = 0; y < BED_Coord.size(); y++) {
try {
Expand All @@ -76,47 +72,5 @@ public void run() throws IOException, InterruptedException {
} catch (SAMException e) {
PS.println(e.getMessage());
}

}

public ArrayList<BEDCoord> loadCoord(File INPUT) throws FileNotFoundException {
Scanner scan = new Scanner(INPUT);
ArrayList<BEDCoord> COORD = new ArrayList<BEDCoord>();
while (scan.hasNextLine()) {
String[] temp = scan.nextLine().split("\t");
if (temp.length > 2) {
if (!temp[0].contains("track") && !temp[0].contains("#")) {
String name = "";

if (!HEADER) { // create genomic coordinate name if requested
if (temp.length > 5) {
name = temp[0] + ":" + temp[1] + "-" + temp[2] + "(" + temp[5] + ")";
} else {
name = temp[0] + ":" + temp[1] + "-" + temp[2] + "(.)";
}
} else { // else create name based on BED file name or create one if non-existent
if (temp.length > 3) {
name = temp[3];
} else {
name = temp[0] + ":" + temp[1] + "-" + temp[2] + "(" + temp[5] + ")";
}
}

if (Integer.parseInt(temp[1]) >= 0) {
if (temp[5].equals("+")) {
COORD.add(new BEDCoord(temp[0], Integer.parseInt(temp[1]), Integer.parseInt(temp[2]), "+",
name));
} else {
COORD.add(new BEDCoord(temp[0], Integer.parseInt(temp[1]), Integer.parseInt(temp[2]), "-",
name));
}
} else {
System.out.println("Invalid Coordinate in File!!!\n" + Arrays.toString(temp));
}
}
}
}
scan.close();
return COORD;
}
}
82 changes: 82 additions & 0 deletions src/util/BEDUtilities.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.zip.GZIPInputStream;

import objects.CoordinateObjects.BEDCoord;

/**
* Class containing a set of shared methods to be used across script classes.
*
* @author Olivia Lang
* @see scripts.Sequence_Analysis.FASTAExtract
*/
public class BEDUtilities {

/**
* Load a list of BEDCoord objects from a file.
*
* @param INPUT the BED-formatted input file to load
* @param HEADER the style of FASTA-header to use for the output (true = BED
* coord name, false = use Genomic Coordinate)
* @return
* @throws IOException
* @throws UnsupportedEncodingException
*/
public static ArrayList<BEDCoord> loadCoord(File input, boolean HEADER) throws UnsupportedEncodingException, IOException {
ArrayList<BEDCoord> COORD = new ArrayList<BEDCoord>();
// Check if file is gzipped and instantiate appropriate BufferedReader
BufferedReader br;
if (GZipUtilities.isGZipped(input)) {
br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(input)), "UTF-8"));
} else {
br = new BufferedReader(new InputStreamReader(new FileInputStream(input), "UTF-8"));
}
// Initialize line variable to loop through
String line = br.readLine();
while (line != null) {
String[] temp = line.split("\t");
if (temp.length > 2) {
if (!temp[0].contains("track") && !temp[0].contains("#")) {
String name = "";

if (!HEADER) { // create genomic coordinate name if requested
if (temp.length > 5) {
name = temp[0] + ":" + temp[1] + "-" + temp[2] + "(" + temp[5] + ")";
} else {
name = temp[0] + ":" + temp[1] + "-" + temp[2] + "(.)";
}
} else { // else create name based on BED file name or create one if non-existent
if (temp.length > 3) {
name = temp[3];
} else {
name = temp[0] + ":" + temp[1] + "-" + temp[2] + "(" + temp[5] + ")";
}
}

if (Integer.parseInt(temp[1]) >= 0) {
if (temp[5].equals("+")) {
COORD.add(new BEDCoord(temp[0], Integer.parseInt(temp[1]), Integer.parseInt(temp[2]), "+",
name));
} else {
COORD.add(new BEDCoord(temp[0], Integer.parseInt(temp[1]), Integer.parseInt(temp[2]), "-",
name));
}
} else {
System.out.println("Invalid Coordinate in File!!!\n" + Arrays.toString(temp));
}
}
}
line = br.readLine();
}
br.close();
return COORD;
}
}

1 comment on commit 40d3ef4

@owlang
Copy link
Collaborator Author

@owlang owlang commented on 40d3ef4 Nov 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Related too #65

Please sign in to comment.