Skip to content

Commit

Permalink
Merge pull request #3 from p2m2/test_gluco
Browse files Browse the repository at this point in the history
Test gluco
  • Loading branch information
ofilangi authored Jan 21, 2025
2 parents ddf106d + 767896d commit 253963d
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 64 deletions.
4 changes: 4 additions & 0 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ java -cp ./assembly/pack.jar MainDistributionMzIons
java -cp ./assembly/pack.jar MainDistributionDiffMzIons -i 50000
```

```bash
java -cp ./assembly/pack.jar MainGlucosinolates src/test/resources/Orbitrap_Exploris_240_precision64.mzXML --output test.out --startRT 0 --endRT 2 --minIntensity 7500 --deltaMp0Mp2 1.9958 --carbonMin 3 --carbonMax 35 --sulfurMin 1.5 --sulfurMax 5 --precisionMz 0.0001
```

## ammonite example

### Precursor Mz search
Expand Down
14 changes: 7 additions & 7 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@ ThisBuild / compile / javacOptions ++= Seq(
libraryDependencies ++= Seq(
"com.github.scopt" %% "scopt" % "4.1.0",
"com.lucidchart" %% "xtract" % "2.3.0",
"co.fs2" %% "fs2-core" % "3.9.2",
"co.fs2" %% "fs2-io" % "3.9.2",
"co.fs2" %% "fs2-core" % "3.9.2",
"co.fs2" %% "fs2-core" % "3.11.0",
"co.fs2" %% "fs2-io" % "3.11.0",
"co.fs2" %% "fs2-core" % "3.11.0",
"org.gnieh" %%"fs2-data-xml"%"1.8.0",
"com.github.marklister" %% "base64" % "0.3.0",
"com.github.nscala-time" %% "nscala-time" %"2.32.0",
"org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.4",
"org.typelevel" %% "cats-effect-testing-utest" % "1.5.0" % Test,
"com.lihaoyi" %% "utest" % "0.8.1" % Test,
"com.github.nscala-time" %% "nscala-time" %"3.0.0",
"org.scala-lang.modules" %% "scala-parallel-collections" % "1.2.0",
"org.typelevel" %% "cats-effect-testing-utest" % "1.6.0" % Test,
"com.lihaoyi" %% "utest" % "0.8.5" % Test,
)

testFrameworks += new TestFramework("utest.runner.Framework")
Expand Down
164 changes: 107 additions & 57 deletions src/main/scala/MainGlucosinolates.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import org.joda.time.format.PeriodFormat
import scopt.OParser
import scala.math._
import cats.effect.kernel.Ref
import cats.effect.unsafe.implicits.global


object MainGlucosinolates extends IOApp {

Expand All @@ -27,34 +29,57 @@ object MainGlucosinolates extends IOApp {

private val builder = OParser.builder[Config]
private val parser1 = {
import builder._
OParser.sequence(
programName("MainGlucosinolates"),
head("MainGlucosinolates", "1.0"),
opt[String]('o', "output")
.required()
.action((x, c) => c.copy(outputFile = Some(x)))
.text("Output file"),
opt[Double]('s', "startRT")
.optional()
.action((x, c) => c.copy(startRT = Some(x)))
.text("start RT"),
opt[Double]('e', "endRT")
.optional()
.action((x, c) => c.copy(endRT = Some(x)))
.text("end RT"),
opt[Double]('i', "minIntensity")
.optional()
.action((x, c) => c.copy(minIntensity = x))
.text("Minimum intensity threshold selecting the ions of interest"),
arg[String]("<file>")
.required()
.action((x, c) => c.copy(mzFile = Some(x)))
.text("Input mzXML file"),
help("help").text("prints this usage text")
)
}

import builder._
OParser.sequence(
programName("MainGlucosinolates"),
head("MainGlucosinolates", "1.0"),
opt[String]('o', "output")
.required()
.action((x, c) => c.copy(outputFile = Some(x)))
.text("Output file"),
opt[Double]('s', "startRT")
.optional()
.action((x, c) => c.copy(startRT = Some(x)))
.text("Start retention time"),
opt[Double]('e', "endRT")
.optional()
.action((x, c) => c.copy(endRT = Some(x)))
.text("End retention time"),
opt[Double]('i', "minIntensity")
.optional()
.action((x, c) => c.copy(minIntensity = x))
.text("Minimum intensity threshold"),
opt[Double]("deltaMp0Mp2")
.optional()
.action((x, c) => c.copy(deltaMp0Mp2 = x))
.text("Delta between M0 and M2 peaks"),
opt[Int]("carbonMin")
.optional()
.action((x, c) => c.copy(numberCarbonMin = x))
.text("Minimum number of carbon atoms"),
opt[Int]("carbonMax")
.optional()
.action((x, c) => c.copy(numberCarbonMax = x))
.text("Maximum number of carbon atoms"),
opt[Double]("sulfurMin")
.optional()
.action((x, c) => c.copy(numberSulfurMin = x))
.text("Minimum number of sulfur atoms"),
opt[Double]("sulfurMax")
.optional()
.action((x, c) => c.copy(numberSulfurMax = x))
.text("Maximum number of sulfur atoms"),
opt[Double]("precisionMz")
.optional()
.action((x, c) => c.copy(precisionMz = x))
.text("Precision for m/z matching"),
arg[String]("<file>")
.required()
.action((x, c) => c.copy(mzFile = Some(x)))
.text("Input mzXML file"),
help("help").text("prints this usage text")
)
}
def run(args: List[String]): IO[ExitCode] = {
OParser.parse(parser1, args, Config()) match {
case Some(config) =>
Expand All @@ -63,54 +88,79 @@ object MainGlucosinolates extends IOApp {
IO.raiseError(new IllegalArgumentException("Failed to parse arguments")).as(ExitCode.Error)
}
}

private def processFile(config: Config): IO[Unit] = {
private def processFile(config: Config): IO[Unit] = {
for {
mzXMLFile <- IO.fromOption(config.mzFile)(new IllegalArgumentException("Missing mzXML file"))
startTime = config.startRT.getOrElse(0.0)
endTime = config.endRT.getOrElse(Double.MaxValue)

processStart <- IO.realTime.map(rt => DateTime.now().plus(rt.toMillis))
_ <- IO.println(s"Start analyze: $mzXMLFile")
//resultsRef <-
//Ref.of[IO, List[(Double, ((Double, Double), (Double, Double), (Double, Double)))]](List.empty)
_ <- SpectrumRequest(mzXMLFile)

// Progress tracking
progressRef <- Ref[IO].of(0)
totalSpectraRef <- Ref[IO].of(0)

// First, count total spectra
totalSpectra <- SpectrumRequest(mzXMLFile)
.msLevel(1)
.filter(_.isDefined)
.map(_.get)

.map(s => {
s.retentionTimeInSeconds.foreach(rt => println(f"RT: $rt%.2f"))
s
})
.filter(s => s.retentionTimeInSeconds.exists(rt => rt >= startTime && rt <= endTime))
.evalMap(spectrum => IO.blocking(processSpectrum(config, spectrum)))
// .evalMap(results => resultsRef.update(_ ++ results))
//.chunks
//.mapAsync(4)(chunk => IO.cede *> IO.pure(chunk))
.map(x => x.map(y => y.toString + "\n").mkString)
.through(text.utf8.encode)
.through(Files[IO].writeAll(Path(config.outputFile.getOrElse("output.txt"))))
.filter(s => s.retentionTimeInSeconds.exists(rt => rt >= startTime && rt <= endTime))
.compile
.drain
.count

_ <- totalSpectraRef.set(totalSpectra.toInt)

results <- SpectrumRequest(mzXMLFile)
.msLevel(1)
.filter(_.isDefined)
.map(_.get)
.filter(s => s.retentionTimeInSeconds.exists(rt => rt >= startTime && rt <= endTime))
.evalMap { spectrum =>
for {
_ <- progressRef.update(_ + 1)
current <- progressRef.get
total <- totalSpectraRef.get
_ <- IO.println(s"Progress: ${(current.toDouble / total * 100).round}% ($current/$total)")
result <- IO.blocking(processSpectrum(config, spectrum))
} yield result
}
.compile
.toList

/*
results <- resultsRef.get
sortedResults = results.sortBy { case (_, ((mz0, _), _, _)) => -mz0 }
_ <- IO.println(s"Sorting completed. Number of results: ${sortedResults.length}")
_ <- Stream.emits(sortedResults)
.evalMap(result => IO.println(result.toString))
_ <- IO.println(s"Processing completed. Number of results: ${results.length}")
sortedResults = results.flatten.sortBy { case (_, ((mz0, _), _, _)) => -mz0.toDouble }

_ <- IO.println(s"Sorting completed. Number of results: ${sortedResults.size}")

// CSV Header
csvHeader = "RetentionTime;M0_mz;M0_intensity;M1_mz;M1_intensity;M2_mz;M2_intensity"

_ <- Stream.emits(Seq(csvHeader) ++ sortedResults)
.map {
case header: String => header
case result: Product =>
result match {
case (rt: Double, ((mz0: Double, int0: Double), (mz1: Double, int1: Double), (mz2: Double, int2: Double))) =>
s"$rt;$mz0;$int0;$mz1;$int1;$mz2;$int2"
case _ => ""
}
}
.map(_ + "\n")
.through(text.utf8.encode)
.through(Files[IO].writeAll(Path(config.outputFile.getOrElse("output.csv"))))
.compile
.drain

processEnd <- IO.realTime.map(rt => DateTime.now().plus(rt.toMillis))
duration = new Interval(processStart, processEnd).toPeriod
_ <- IO.println(s"Duration: ${PeriodFormat.getDefault.print(duration)}")*/
_ <- IO.println(s"Duration: ${PeriodFormat.getDefault.print(duration)}")

} yield ()
}



private def processSpectrum(config: Config, spectrum: Spectrum): Seq[(Double, ((Double, Double), (Double, Double), (Double, Double)))] = {
private def processSpectrum(config: Config, spectrum: Spectrum): Seq[(Double, ((Double, Double), (Double, Double), (Double, Double)))] = {
spectrum.peaks
.filter { case (_, int0) => int0 > config.minIntensity }
.flatMap { case (mz0, int0) =>
Expand Down

0 comments on commit 253963d

Please sign in to comment.