Skip to content

Commit

Permalink
add gzip support to in/out of SearchMotif
Browse files Browse the repository at this point in the history
#91 contribution for the SearchMotif tool

CLI
- add new `-z` flag to output options
- update default output to use `.gz` extension
- update script call to include gzip output
script
- update input and output filestream object setup to dynamically handle if input is gzip compressed and gzip compress output if specified
Output
-update output window to add `.gz` extension as appropriate
-update script call to include gzip output
Window
-add gzip output checkbox option
-allow for `fa.gz` extension to be shown as options in the file selector
-update script call to include gzip output
  • Loading branch information
owlang committed Nov 26, 2022
1 parent a2c9992 commit 06bfee1
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 20 deletions.
10 changes: 7 additions & 3 deletions src/cli/Sequence_Analysis/SearchMotifCLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ public class SearchMotifCLI implements Callable<Integer> {

@Option(names = { "-o", "--output" }, description = "Specify output filename (default = <motif>_<num>Mismatch_<fastaFilename>.bed)")
private File output = null;
@Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)")
private boolean gzOutput = false;
@Option(names = { "-m", "--motif" }, required = true, description = "the IUPAC motif to search for")
private String motif;
@Option(names = { "-n", "--mismatches" }, description = "the number of mismatches allowed (default=0)")
Expand All @@ -41,7 +43,7 @@ public Integer call() throws Exception {
System.exit(1);
}

SearchMotif script_obj = new SearchMotif(fastaFile, motif, ALLOWED_MISMATCH, output, System.err);
SearchMotif script_obj = new SearchMotif(fastaFile, motif, ALLOWED_MISMATCH, output, System.err, gzOutput);
script_obj.run();

System.err.println("Search Complete.");
Expand All @@ -63,8 +65,10 @@ private String validateInput() throws IOException {
}
// set default output filename
if (output == null) {
output = new File(motif + "_" + Integer.toString(ALLOWED_MISMATCH) + "Mismatch_"
+ ExtensionFileFilter.stripExtension(fastaFile) + ".bed");
String NAME = motif + "_" + Integer.toString(ALLOWED_MISMATCH) + "Mismatch_"
+ ExtensionFileFilter.stripExtension(fastaFile) + ".bed";
NAME += gzOutput ? ".gz" : "";
output = new File(NAME);
// check output filename is valid
} else {
// check ext
Expand Down
52 changes: 39 additions & 13 deletions src/scripts/Sequence_Analysis/SearchMotif.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
package scripts.Sequence_Analysis;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.sql.Timestamp;
Expand All @@ -13,23 +14,29 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import util.GZipUtilities;

public class SearchMotif {

private int ALLOWED_MISMATCH;
private Map<String, String> IUPAC_HASH = new HashMap<>();
private Map<String, String> RC_HASH = new HashMap<>();
private String motif;
private File INPUT = null;
private PrintStream OUT;
private File input;
private File out_filepath;
private PrintStream PS;
private boolean gzOutput;

public SearchMotif(File input, String mot, int num, File output, PrintStream ps) throws IOException {
public SearchMotif(File i, String mot, int num, File output, PrintStream ps, boolean gz) {
ALLOWED_MISMATCH = num;
motif = mot;
INPUT = input;
OUT = new PrintStream(output);
input = i;
out_filepath = output;
PS = ps;
gzOutput = gz;

IUPAC_HASH.put("A", "A");
IUPAC_HASH.put("T", "T");
Expand Down Expand Up @@ -63,7 +70,7 @@ public SearchMotif(File input, String mot, int num, File output, PrintStream ps)
}

public void run() throws IOException, InterruptedException {
PS.println("Searching motif: " + motif + " in " + INPUT.getName());
PS.println("Searching motif: " + motif + " in " + input.getName());
PS.println("Starting: " + getTimeStamp());

char[] ORIG = motif.toUpperCase().toCharArray();
Expand All @@ -90,10 +97,28 @@ public void run() throws IOException, InterruptedException {
int currentEND = 0;
String ID;

InputStream inputStream = new FileInputStream(INPUT);
BufferedReader lines = new BufferedReader(new InputStreamReader(inputStream), 100);
while (lines.ready()) {
String line = lines.readLine().trim();
// Initialize output writer
PrintStream OUT = System.out;
if (out_filepath != null) {
if (gzOutput) {
OUT = new PrintStream(
new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(out_filepath))));
} else {
OUT = new PrintStream(new BufferedOutputStream(new FileOutputStream(out_filepath)));
}
}

// Check if file is gzipped and instantiate appropriate BufferedReader
BufferedReader br;
if (GZipUtilities.isGZipped(input)) {
br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(input)), "UTF-8"));
} else {
br = new BufferedReader(new InputStreamReader(new FileInputStream(input), "UTF-8"));
}
// Initialize line variable to loop through
String line = br.readLine();
while (line != null) {
line = line.trim();
if (line.startsWith(">")) {
currentChrom = line.substring(1);
currentLine = "";
Expand Down Expand Up @@ -146,9 +171,10 @@ public void run() throws IOException, InterruptedException {
// System.out.println(tmp);
currentLine = tmp;
}
line = br.readLine();
}
inputStream.close();
lines.close();
br.close();
OUT.close();
PS.println("Completing: " + getTimeStamp());
}

Expand Down
7 changes: 5 additions & 2 deletions src/window_interface/Sequence_Analysis/SearchMotifOutput.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@ public class SearchMotifOutput extends JFrame {
private String motif;
private File INPUTFILE = null;
private File OUT_DIR;
private boolean gzOutput = false;

private JTextArea textArea;

public SearchMotifOutput(File input, String mot, int num, File out_dir) throws IOException {
public SearchMotifOutput(File input, String mot, int num, File out_dir, boolean gz) throws IOException {
setTitle("Motif Search Progress");
setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE);
setBounds(150, 150, 600, 800);
Expand All @@ -39,6 +40,7 @@ public SearchMotifOutput(File input, String mot, int num, File out_dir) throws I
motif = mot;
INPUTFILE = input;
OUT_DIR = out_dir;
gzOutput = gz;
}

public void run() throws IOException, InterruptedException {
Expand All @@ -48,8 +50,9 @@ public void run() throws IOException, InterruptedException {
if (OUT_DIR != null) {
BASENAME = OUT_DIR.getCanonicalPath() + File.separator + BASENAME;
}
BASENAME += gzOutput ? ".gz" : "";

SearchMotif script_obj = new SearchMotif(INPUTFILE, motif, ALLOWED_MISMATCH, new File(BASENAME), PS);
SearchMotif script_obj = new SearchMotif(INPUTFILE, motif, ALLOWED_MISMATCH, new File(BASENAME), PS, gzOutput);
script_obj.run();

Thread.sleep(2000);
Expand Down
11 changes: 9 additions & 2 deletions src/window_interface/Sequence_Analysis/SearchMotifWindow.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import javax.swing.DefaultListModel;
import javax.swing.JButton;
import javax.swing.JCheckBox;
import javax.swing.JFileChooser;
import javax.swing.JFrame;
import javax.swing.JLabel;
Expand Down Expand Up @@ -43,6 +44,7 @@ public class SearchMotifWindow extends JFrame implements ActionListener, Propert
private JPanel contentPane;
private JTextField txtMotif;
private JTextField txtMismatch;
private static JCheckBox chckbxGzipOutput;
private JProgressBar progressBar;

public Task task;
Expand All @@ -64,7 +66,7 @@ public Void doInBackground() throws IOException, InterruptedException {
setProgress(0);
for (int gfile = 0; gfile < GenomeFiles.size(); gfile++) {
SearchMotifOutput search = new SearchMotifOutput(GenomeFiles.get(gfile), txtMotif.getText(),
Integer.parseInt(txtMismatch.getText()), OUT_DIR);
Integer.parseInt(txtMismatch.getText()), OUT_DIR, chckbxGzipOutput.isSelected());
search.setVisible(true);
search.run();
int percentComplete = (int) (((double) (gfile + 1) / (GenomeFiles.size())) * 100);
Expand Down Expand Up @@ -122,6 +124,11 @@ public void actionPerformed(ActionEvent e) {
});
contentPane.add(btnOutputDirectory);

chckbxGzipOutput = new JCheckBox("Output GZIP");
sl_contentPane.putConstraint(SpringLayout.NORTH, chckbxGzipOutput, 0, SpringLayout.NORTH, btnOutputDirectory);
sl_contentPane.putConstraint(SpringLayout.EAST, chckbxGzipOutput, -10, SpringLayout.EAST, contentPane);
contentPane.add(chckbxGzipOutput);

JLabel lblCurrentOutput = new JLabel("Current Output:");
sl_contentPane.putConstraint(SpringLayout.NORTH, lblNewLabel, 5, SpringLayout.SOUTH, lblCurrentOutput);
sl_contentPane.putConstraint(SpringLayout.NORTH, lblCurrentOutput, 6, SpringLayout.SOUTH, btnOutputDirectory);
Expand Down Expand Up @@ -182,7 +189,7 @@ public void actionPerformed(ActionEvent e) {

btnLoadFASTA.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
File[] newGenomeFiles = FileSelection.getFiles(fc, "fa");
File[] newGenomeFiles = FileSelection.getFiles(fc, "fa", true);
if (newGenomeFiles != null) {
for (int x = 0; x < newGenomeFiles.length; x++) {
GenomeFiles.add(newGenomeFiles[x]);
Expand Down

0 comments on commit 06bfee1

Please sign in to comment.