diff --git a/src/cli/Sequence_Analysis/SearchMotifCLI.java b/src/cli/Sequence_Analysis/SearchMotifCLI.java index 201f624fb..ffad6c4bc 100644 --- a/src/cli/Sequence_Analysis/SearchMotifCLI.java +++ b/src/cli/Sequence_Analysis/SearchMotifCLI.java @@ -26,6 +26,8 @@ public class SearchMotifCLI implements Callable { @Option(names = { "-o", "--output" }, description = "Specify output filename (default = _Mismatch_.bed)") private File output = null; + @Option(names = {"-z", "--gzip"}, description = "gzip output (default=false)") + private boolean gzOutput = false; @Option(names = { "-m", "--motif" }, required = true, description = "the IUPAC motif to search for") private String motif; @Option(names = { "-n", "--mismatches" }, description = "the number of mismatches allowed (default=0)") @@ -41,7 +43,7 @@ public Integer call() throws Exception { System.exit(1); } - SearchMotif script_obj = new SearchMotif(fastaFile, motif, ALLOWED_MISMATCH, output, System.err); + SearchMotif script_obj = new SearchMotif(fastaFile, motif, ALLOWED_MISMATCH, output, System.err, gzOutput); script_obj.run(); System.err.println("Search Complete."); @@ -63,8 +65,10 @@ private String validateInput() throws IOException { } // set default output filename if (output == null) { - output = new File(motif + "_" + Integer.toString(ALLOWED_MISMATCH) + "Mismatch_" - + ExtensionFileFilter.stripExtension(fastaFile) + ".bed"); + String NAME = motif + "_" + Integer.toString(ALLOWED_MISMATCH) + "Mismatch_" + + ExtensionFileFilter.stripExtension(fastaFile) + ".bed"; + NAME += gzOutput ? ".gz" : ""; + output = new File(NAME); // check output filename is valid } else { // check ext diff --git a/src/scripts/Sequence_Analysis/SearchMotif.java b/src/scripts/Sequence_Analysis/SearchMotif.java index 665e881be..b6dfdf8b5 100644 --- a/src/scripts/Sequence_Analysis/SearchMotif.java +++ b/src/scripts/Sequence_Analysis/SearchMotif.java @@ -1,10 +1,11 @@ package scripts.Sequence_Analysis; +import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.io.PrintStream; import java.sql.Timestamp; @@ -13,6 +14,10 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +import util.GZipUtilities; public class SearchMotif { @@ -20,16 +25,18 @@ public class SearchMotif { private Map IUPAC_HASH = new HashMap<>(); private Map RC_HASH = new HashMap<>(); private String motif; - private File INPUT = null; - private PrintStream OUT; + private File input; + private File out_filepath; private PrintStream PS; + private boolean gzOutput; - public SearchMotif(File input, String mot, int num, File output, PrintStream ps) throws IOException { + public SearchMotif(File i, String mot, int num, File output, PrintStream ps, boolean gz) { ALLOWED_MISMATCH = num; motif = mot; - INPUT = input; - OUT = new PrintStream(output); + input = i; + out_filepath = output; PS = ps; + gzOutput = gz; IUPAC_HASH.put("A", "A"); IUPAC_HASH.put("T", "T"); @@ -63,7 +70,7 @@ public SearchMotif(File input, String mot, int num, File output, PrintStream ps) } public void run() throws IOException, InterruptedException { - PS.println("Searching motif: " + motif + " in " + INPUT.getName()); + PS.println("Searching motif: " + motif + " in " + input.getName()); PS.println("Starting: " + getTimeStamp()); char[] ORIG = motif.toUpperCase().toCharArray(); @@ -90,10 +97,28 @@ public void run() throws IOException, InterruptedException { int currentEND = 0; String ID; - InputStream inputStream = new FileInputStream(INPUT); - BufferedReader lines = new BufferedReader(new InputStreamReader(inputStream), 100); - while (lines.ready()) { - String line = lines.readLine().trim(); + // Initialize output writer + PrintStream OUT = System.out; + if (out_filepath != null) { + if (gzOutput) { + OUT = new PrintStream( + new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(out_filepath)))); + } else { + OUT = new PrintStream(new BufferedOutputStream(new FileOutputStream(out_filepath))); + } + } + + // Check if file is gzipped and instantiate appropriate BufferedReader + BufferedReader br; + if (GZipUtilities.isGZipped(input)) { + br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(input)), "UTF-8")); + } else { + br = new BufferedReader(new InputStreamReader(new FileInputStream(input), "UTF-8")); + } + // Initialize line variable to loop through + String line = br.readLine(); + while (line != null) { + line = line.trim(); if (line.startsWith(">")) { currentChrom = line.substring(1); currentLine = ""; @@ -146,9 +171,10 @@ public void run() throws IOException, InterruptedException { // System.out.println(tmp); currentLine = tmp; } + line = br.readLine(); } - inputStream.close(); - lines.close(); + br.close(); + OUT.close(); PS.println("Completing: " + getTimeStamp()); } diff --git a/src/window_interface/Sequence_Analysis/SearchMotifOutput.java b/src/window_interface/Sequence_Analysis/SearchMotifOutput.java index 75ab167c1..9d3aa1d4a 100644 --- a/src/window_interface/Sequence_Analysis/SearchMotifOutput.java +++ b/src/window_interface/Sequence_Analysis/SearchMotifOutput.java @@ -20,10 +20,11 @@ public class SearchMotifOutput extends JFrame { private String motif; private File INPUTFILE = null; private File OUT_DIR; + private boolean gzOutput = false; private JTextArea textArea; - public SearchMotifOutput(File input, String mot, int num, File out_dir) throws IOException { + public SearchMotifOutput(File input, String mot, int num, File out_dir, boolean gz) throws IOException { setTitle("Motif Search Progress"); setDefaultCloseOperation(JFrame.DISPOSE_ON_CLOSE); setBounds(150, 150, 600, 800); @@ -39,6 +40,7 @@ public SearchMotifOutput(File input, String mot, int num, File out_dir) throws I motif = mot; INPUTFILE = input; OUT_DIR = out_dir; + gzOutput = gz; } public void run() throws IOException, InterruptedException { @@ -48,8 +50,9 @@ public void run() throws IOException, InterruptedException { if (OUT_DIR != null) { BASENAME = OUT_DIR.getCanonicalPath() + File.separator + BASENAME; } + BASENAME += gzOutput ? ".gz" : ""; - SearchMotif script_obj = new SearchMotif(INPUTFILE, motif, ALLOWED_MISMATCH, new File(BASENAME), PS); + SearchMotif script_obj = new SearchMotif(INPUTFILE, motif, ALLOWED_MISMATCH, new File(BASENAME), PS, gzOutput); script_obj.run(); Thread.sleep(2000); diff --git a/src/window_interface/Sequence_Analysis/SearchMotifWindow.java b/src/window_interface/Sequence_Analysis/SearchMotifWindow.java index a06a2ebae..add45e609 100644 --- a/src/window_interface/Sequence_Analysis/SearchMotifWindow.java +++ b/src/window_interface/Sequence_Analysis/SearchMotifWindow.java @@ -14,6 +14,7 @@ import javax.swing.DefaultListModel; import javax.swing.JButton; +import javax.swing.JCheckBox; import javax.swing.JFileChooser; import javax.swing.JFrame; import javax.swing.JLabel; @@ -43,6 +44,7 @@ public class SearchMotifWindow extends JFrame implements ActionListener, Propert private JPanel contentPane; private JTextField txtMotif; private JTextField txtMismatch; + private static JCheckBox chckbxGzipOutput; private JProgressBar progressBar; public Task task; @@ -64,7 +66,7 @@ public Void doInBackground() throws IOException, InterruptedException { setProgress(0); for (int gfile = 0; gfile < GenomeFiles.size(); gfile++) { SearchMotifOutput search = new SearchMotifOutput(GenomeFiles.get(gfile), txtMotif.getText(), - Integer.parseInt(txtMismatch.getText()), OUT_DIR); + Integer.parseInt(txtMismatch.getText()), OUT_DIR, chckbxGzipOutput.isSelected()); search.setVisible(true); search.run(); int percentComplete = (int) (((double) (gfile + 1) / (GenomeFiles.size())) * 100); @@ -122,6 +124,11 @@ public void actionPerformed(ActionEvent e) { }); contentPane.add(btnOutputDirectory); + chckbxGzipOutput = new JCheckBox("Output GZIP"); + sl_contentPane.putConstraint(SpringLayout.NORTH, chckbxGzipOutput, 0, SpringLayout.NORTH, btnOutputDirectory); + sl_contentPane.putConstraint(SpringLayout.EAST, chckbxGzipOutput, -10, SpringLayout.EAST, contentPane); + contentPane.add(chckbxGzipOutput); + JLabel lblCurrentOutput = new JLabel("Current Output:"); sl_contentPane.putConstraint(SpringLayout.NORTH, lblNewLabel, 5, SpringLayout.SOUTH, lblCurrentOutput); sl_contentPane.putConstraint(SpringLayout.NORTH, lblCurrentOutput, 6, SpringLayout.SOUTH, btnOutputDirectory); @@ -182,7 +189,7 @@ public void actionPerformed(ActionEvent e) { btnLoadFASTA.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { - File[] newGenomeFiles = FileSelection.getFiles(fc, "fa"); + File[] newGenomeFiles = FileSelection.getFiles(fc, "fa", true); if (newGenomeFiles != null) { for (int x = 0; x < newGenomeFiles.length; x++) { GenomeFiles.add(newGenomeFiles[x]);