diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d5c6e8f..39ade5f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,7 +2,7 @@ name: Test & lint package & deploy documentation on: push: - branches: [main] + branches: [main, dev] pull_request: branches: [main] diff --git a/.vscode/launch.json b/.vscode/launch.json index 7b7e632..05920e8 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,5 +1,5 @@ { - "version": "0.2.0", + "version": "0.3.0", "configurations": [ { "name": "Python: Current File", diff --git a/CITATION.cff b/CITATION.cff index 708056f..3fa7c26 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -6,6 +6,6 @@ authors: - family-names: "Puelles" given-names: "Victor" title: "pytximport: Fast gene count estimation from transcript quantification files in Python" -version: 0.2.0 +version: 0.3.0 date-released: 2024-06-01 url: "https://github.com/complextissue/pytximport" diff --git a/README.md b/README.md index c8ef165..89fcca9 100755 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ results = tximport( Please cite both the original publication as well as this Python implementation: - Charlotte Soneson, Michael I. Love, Mark D. Robinson. Differential analyses for RNA-seq: transcript-level estimates improve gene-level inferences, F1000Research, 4:1521, December 2015. doi: 10.12688/f1000research.7563.1 -- Kuehl, M., & Puelles, V. (2024). pytximport: Fast gene count estimation from transcript quantification files in Python (Version 0.2.0) [Computer software]. https://github.com/complextissue/pytximport +- Kuehl, M., & Puelles, V. (2024). pytximport: Fast gene count estimation from transcript quantification files in Python (Version 0.3.0) [Computer software]. https://github.com/complextissue/pytximport ## License diff --git a/docs/source/conf.py b/docs/source/conf.py index e581058..fb5dcd0 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -18,7 +18,7 @@ author = "Malte Kuehl" # The full version, including alpha/beta/rc tags -release = "0.2.0" +release = "0.3.0" # -- General configuration --------------------------------------------------- @@ -106,7 +106,7 @@ html_theme = "furo" html_theme_options = { - "announcement": "pytximport 0.2.0 has been released!", + "announcement": "pytximport has been released!", } html_title = "pytximport" diff --git a/docs/source/example.ipynb b/docs/source/example.ipynb index b67d870..01b542e 100644 --- a/docs/source/example.ipynb +++ b/docs/source/example.ipynb @@ -361,7 +361,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Reading quantification files: 2it [00:00, 322.27it/s]\n" + "Reading quantification files: 2it [00:00, 289.77it/s]\n" ] }, { @@ -739,9 +739,9 @@ "Data variables:\n", " abundance (gene_id, file) float64 8kB 0.08291 0.0 0.09854 ... 0.4618 0.0\n", " counts (gene_id, file) float64 8kB 1.001 0.0 1.042 ... 2.0 6.184 0.0\n", - " length (gene_id, file) float64 8kB 509.1 509.1 445.8 ... 564.6 564.6
  • " ], "text/plain": [ " Size: 87kB\n", @@ -891,6 +891,7 @@ " [\"../../test/data/salmon/multiple/Sample_1.sf\", \"../../test/data/salmon/multiple/Sample_2.sf\"],\n", " \"salmon\",\n", " transcript_gene_mapping_mouse,\n", + " output_type=\"xarray\", # or \"anndata\"\n", ")\n", "txi" ] @@ -918,7 +919,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Reading quantification files: 1it [00:00, 399.19it/s]\n" + "Reading quantification files: 1it [00:00, 291.80it/s]\n" ] }, { @@ -991,8 +992,8 @@ " counts_from_abundance=\"length_scaled_tpm\",\n", " return_transcript_data=True,\n", ")\n", - "pd.DataFrame(txi[\"counts\"], index=txi.coords[\"transcript_id\"], columns=txi.coords[\"file_path\"]).sort_values(\n", - " by=txi.coords[\"file_path\"].data[0],\n", + "pd.DataFrame(txi.X.T, index=txi.var.index, columns=txi.obs.index).sort_values(\n", + " by=txi.obs.index[0],\n", " ascending=False,\n", ").head(5)" ] @@ -1013,7 +1014,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Reading quantification files: 1it [00:00, 447.01it/s]\n" + "Reading quantification files: 1it [00:00, 534.10it/s]\n" ] }, { @@ -1085,6 +1086,7 @@ " \"salmon\",\n", " transcript_gene_mapping_human,\n", " counts_from_abundance=\"length_scaled_tpm\",\n", + " output_type=\"xarray\",\n", " return_transcript_data=False,\n", ")\n", "pd.DataFrame(txi[\"counts\"], index=txi.coords[\"gene_id\"], columns=txi.coords[\"file_path\"]).sort_values(\n", @@ -1123,7 +1125,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Reading quantification files: 1it [00:00, 372.17it/s]\n" + "Reading quantification files: 1it [00:00, 457.64it/s]\n" ] }, { @@ -1166,17 +1168,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "2024-05-31 09:42:58,831: Starting the import.\n", - "Reading quantification files: 1it [00:00, 277.84it/s]\n", - "2024-05-31 09:42:59,020: Converting transcript-level expression to gene-level expression.\n", - "2024-05-31 09:42:59,194: Matching gene_ids.\n", - "2024-05-31 09:42:59,323: Creating gene abundance.\n", - "2024-05-31 09:42:59,460: Creating gene counts.\n", - "2024-05-31 09:42:59,463: Creating lengths.\n", - "2024-05-31 09:42:59,466: Replacing missing lengths.\n", - "2024-05-31 09:42:59,467: Creating gene expression dataset.\n", - "2024-05-31 09:42:59,470: Saving the gene-level expression to: ../../test/data/salmon/quant.h5ad.\n", - "2024-05-31 09:42:59,474: Finished the import in 0.64 seconds.\n" + "2024-06-04 19:23:31,595: Starting the import.\n", + "Reading quantification files: 1it [00:00, 292.14it/s]\n", + "2024-06-04 19:23:31,761: Converting transcript-level expression to gene-level expression.\n", + "2024-06-04 19:23:31,964: Matching gene_ids.\n", + "2024-06-04 19:23:32,120: Creating gene abundance.\n", + "2024-06-04 19:23:32,257: Creating gene counts.\n", + "2024-06-04 19:23:32,260: Creating lengths.\n", + "2024-06-04 19:23:32,264: Replacing missing lengths.\n", + "2024-06-04 19:23:32,265: Creating gene expression dataset.\n", + "2024-06-04 19:23:32,269: Saving the gene-level expression to: ../../test/data/salmon/quant.h5ad.\n", + "2024-06-04 19:23:32,276: Finished the import in 0.68 seconds.\n" ] } ], @@ -1238,7 +1240,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Reading quantification files: 4it [00:01, 3.28it/s]\n", + "Reading quantification files: 4it [00:01, 3.03it/s]\n", "WARNING:root:Not all transcripts are present in the mapping. 33815 out of 253181 missing.\n" ] }, @@ -1265,7 +1267,6 @@ " \"salmon\",\n", " transcript_gene_mapping,\n", " counts_from_abundance=\"length_scaled_tpm\",\n", - " output_type=\"anndata\",\n", ")\n", "result" ] @@ -1367,21 +1368,21 @@ "output_type": "stream", "text": [ "Fitting size factors...\n", - "... done in 0.00 seconds.\n", + "... done in 0.01 seconds.\n", "\n", "Fitting dispersions...\n", - "... done in 0.59 seconds.\n", + "... done in 1.30 seconds.\n", "\n", "Fitting dispersion trend curve...\n", - "... done in 0.32 seconds.\n", + "... done in 0.63 seconds.\n", "\n", "/Users/au734063/Documents/code/pytximport-publish/pytximport/.venv/lib/python3.12/site-packages/pydeseq2/dds.py:448: UserWarning: As the residual degrees of freedom is less than 3, the distribution of log dispersions is especially asymmetric and likely to be poorly estimated by the MAD.\n", " self.fit_dispersion_prior()\n", "Fitting MAP dispersions...\n", - "... done in 0.77 seconds.\n", + "... done in 0.95 seconds.\n", "\n", "Fitting LFCs...\n", - "... done in 0.53 seconds.\n", + "... done in 1.42 seconds.\n", "\n", "Replacing 0 outlier genes.\n", "\n" @@ -1451,7 +1452,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "... done in 0.32 seconds.\n", + "... done in 0.35 seconds.\n", "\n" ] } diff --git a/docs/source/start.md b/docs/source/start.md index 42a3095..b73b98b 100755 --- a/docs/source/start.md +++ b/docs/source/start.md @@ -61,7 +61,7 @@ The `tximport` package has become a main stay in the bulk RNA sequencing communi Please cite both the original publication as well as this Python implementation: - Charlotte Soneson, Michael I. Love, Mark D. Robinson. Differential analyses for RNA-seq: transcript-level estimates improve gene-level inferences, F1000Research, 4:1521, December 2015. doi: 10.12688/f1000research.7563.1 -- Kuehl, M., & Puelles, V. (2024). pytximport: Fast gene count estimation from transcript quantification files in Python (Version 0.2.0) [Computer software]. https://github.com/complextissue/pytximport +- Kuehl, M., & Puelles, V. (2024). pytximport: Fast gene count estimation from transcript quantification files in Python (Version 0.3.0) [Computer software]. https://github.com/complextissue/pytximport ## Differences diff --git a/pytximport/_cli.py b/pytximport/_cli.py index 34962d8..3102a28 100644 --- a/pytximport/_cli.py +++ b/pytximport/_cli.py @@ -32,6 +32,13 @@ type=click.Path(exists=True), help="The path to the transcript to gene mapping file.", ) +@click.option( + "-c", + "--counts_from_abundance", + "--counts-from-abundance", + type=click.Choice(["scaled_tpm", "length_scaled_tpm"]), + help="The type of counts to convert to.", +) @click.option( "-o", "--save_path", @@ -43,24 +50,17 @@ @click.option( "--ignore_after_bar", "--ignore-after-bar", - is_flag=True, + type=bool, default=True, help="Whether to split the transcript id after the bar character (`|`).", ) @click.option( "--ignore_transcript_version", "--ignore-transcript-version", - is_flag=True, + type=bool, default=True, help="Whether to ignore the transcript version.", ) -@click.option( - "-c", - "--counts_from_abundance", - "--counts-from-abundance", - type=click.Choice(["scaled_tpm", "length_scaled_tpm"]), - help="The type of counts to convert to.", -) @click.option( "--return_transcript_data", "--return-transcript-data", @@ -71,24 +71,28 @@ "-id", "--id_column", "--id-column", + type=str, help="The column name for the transcript id.", ) @click.option( "-counts", "--counts_column", "--counts-column", + type=str, help="The column name for the counts.", ) @click.option( "-length", "--length_column", "--length-column", + type=str, help="The column name for the length.", ) @click.option( "-tpm", "--abundance_column", "--abundance-column", + type=str, help="The column name for the abundance.", ) @click.option( diff --git a/pytximport/_version.py b/pytximport/_version.py index a4ae98e..143d250 100644 --- a/pytximport/_version.py +++ b/pytximport/_version.py @@ -1,4 +1,4 @@ """Version information for the pytximport package.""" # This package will follow Semantic Versioning after version 1.0.0: https://semver.org/ -__version__ = "0.2.0" +__version__ = "0.3.0" diff --git a/pytximport/core/_tximport.py b/pytximport/core/_tximport.py index 7bbd528..8c645e7 100644 --- a/pytximport/core/_tximport.py +++ b/pytximport/core/_tximport.py @@ -37,7 +37,7 @@ def tximport( sparse_threshold: Optional[float] = None, read_length: Optional[int] = None, # arguments exclusive to the pytximport implementation - output_type: Literal["xarray", "anndata"] = "xarray", + output_type: Literal["xarray", "anndata"] = "anndata", output_format: Literal["csv", "h5ad"] = "csv", save_path: Optional[Union[str, Path]] = None, return_data: bool = True, @@ -80,7 +80,7 @@ def tximport( sparse_threshold (Optional[float], optional): The threshold for the sparse matrix. Currently, sparse input is not supported. Defaults to None. read_length (Optional[int], optional): The read length for the stringtie quantification. Defaults to None. - output_type (Literal["xarray", "anndata"], optional): The type of output. Defaults to "xarray". + output_type (Literal["xarray", "anndata"], optional): The type of output. Defaults to "anndata". output_format (Literal["csv", "h5ad"], optional): The type of output file. Defaults to "csv". save_path (Optional[Union[str, Path]], optional): The path to save the gene-level expression. Defaults to None. return_data (bool, optional): Whether to return the gene-level expression. Defaults to True. @@ -270,6 +270,18 @@ def tximport( counts_from_abundance, ) + if output_type == "anndata": + # convert to AnnData + return ad.AnnData( + X=transcript_data["counts"].values.T, + obs=pd.DataFrame(index=transcript_data.coords["file_path"].values), + var=pd.DataFrame(index=transcript_data.coords["transcript_id"].values), + obsm={ + "length": transcript_data["length"].values.T, + "abundance": transcript_data["abundance"].values.T, + }, + ) + return transcript_data # convert to gene-level expression diff --git a/pytximport/utils/_convert_transcripts_to_genes.py b/pytximport/utils/_convert_transcripts_to_genes.py index bf2a9e6..1b067e2 100644 --- a/pytximport/utils/_convert_transcripts_to_genes.py +++ b/pytximport/utils/_convert_transcripts_to_genes.py @@ -57,6 +57,9 @@ def convert_transcripts_to_genes( # check that at least one transcript is protein-coding assert any(transcript_keep_boolean), "No transcripts with the desired biotype are present in the data." + # calculate the total abundance before filtering + total_abundance = transcript_data["abundance"].sum(axis=0) + transcript_data = transcript_data.isel( transcript_id=transcript_keep_boolean, drop=True, @@ -67,6 +70,11 @@ def convert_transcripts_to_genes( ) transcript_ids = transcript_data.coords["transcript_id"].values + # recalculate the abundance for each sample + new_abundance = transcript_data["abundance"].sum(axis=0) + ratio = total_abundance / new_abundance + transcript_data["abundance"] = (transcript_data["abundance"].T * ratio).T + if ignore_after_bar: # ignore the part of the transcript ID after the bar transcript_ids = [transcript_id.split("|")[0] for transcript_id in transcript_ids] diff --git a/pytximport/utils/_replace_missing_average_transcript_length.py b/pytximport/utils/_replace_missing_average_transcript_length.py index 30fbf2e..17f9324 100644 --- a/pytximport/utils/_replace_missing_average_transcript_length.py +++ b/pytximport/utils/_replace_missing_average_transcript_length.py @@ -18,6 +18,9 @@ def replace_missing_average_transcript_length( # get the rows of the DataArray with missing values nan_rows = np.where(length.isnull().any(dim="file") == True)[0] # noqa: E712 + gene_ids = [] + lengths = [] + for nan_idx in nan_rows: row = length.isel({"gene_id": nan_idx}) gene_id = row.coords["gene_id"].data @@ -37,6 +40,11 @@ def replace_missing_average_transcript_length( average_gene_length = np.exp(np.mean(np.log(length.loc[{"gene_id": gene_id}].data[~column_is_nan]))) # replace the missing row with the average gene length - length.loc[{"gene_id": gene_id}] = length.loc[{"gene_id": gene_id}].fillna(average_gene_length) + gene_ids.append(gene_id) + lengths.append(length.loc[{"gene_id": gene_id}].fillna(average_gene_length)) + + # batching updates seems to be faster than updating the DataArray row by row + if len(gene_ids) > 0: + length.loc[{"gene_id": gene_ids}] = lengths return length diff --git a/test/test_comparison.ipynb b/test/test_comparison.ipynb index e34e80c..3be6dcc 100644 --- a/test/test_comparison.ipynb +++ b/test/test_comparison.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -25,7 +25,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -122,53 +122,53 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "2024-05-30 21:04:42,063: Starting the import.\n", - "Reading quantification files: 4it [00:01, 3.58it/s]\n", - "2024-05-30 21:04:43,294: Converting transcript-level expression to gene-level expression.\n", - "2024-05-30 21:04:43,671: Not all transcripts are present in the mapping. 31380 out of 253181 missing.\n", - "2024-05-30 21:04:43,952: Matching gene_ids.\n", - "2024-05-30 21:04:44,116: Creating gene abundance.\n", - "2024-05-30 21:04:44,213: Creating gene counts.\n", - "2024-05-30 21:04:44,342: Creating lengths.\n", - "2024-05-30 21:04:44,495: Replacing missing lengths.\n", - "2024-05-30 21:04:50,105: Creating gene expression dataset.\n", - "2024-05-30 21:04:50,136: Saving the gene-level expression to: data/fabry_disease/counts_pytximport_no.csv.\n", - "2024-05-30 21:04:50,207: Finished the import in 8.14 seconds.\n", - "2024-05-30 21:04:51,406: Starting the import.\n", - "Reading quantification files: 4it [00:01, 3.63it/s]\n", - "2024-05-30 21:04:52,631: Converting transcript-level expression to gene-level expression.\n", - "2024-05-30 21:04:53,053: Not all transcripts are present in the mapping. 31380 out of 253181 missing.\n", - "2024-05-30 21:04:53,313: Matching gene_ids.\n", - "2024-05-30 21:04:53,467: Creating gene abundance.\n", - "2024-05-30 21:04:53,561: Creating gene counts.\n", - "2024-05-30 21:04:53,663: Creating lengths.\n", - "2024-05-30 21:04:53,755: Replacing missing lengths.\n", - "2024-05-30 21:04:59,260: Recreating gene counts from abundances.\n", - "2024-05-30 21:04:59,260: Setting the counts to scaled TPM.\n", - "2024-05-30 21:04:59,261: Creating gene expression dataset.\n", - "2024-05-30 21:04:59,290: Saving the gene-level expression to: data/fabry_disease/counts_pytximport_scaledTPM.csv.\n", - "2024-05-30 21:04:59,449: Finished the import in 8.04 seconds.\n", - "2024-05-30 21:05:00,696: Starting the import.\n", - "Reading quantification files: 4it [00:01, 3.45it/s]\n", - "2024-05-30 21:05:01,977: Converting transcript-level expression to gene-level expression.\n", - "2024-05-30 21:05:02,376: Not all transcripts are present in the mapping. 31380 out of 253181 missing.\n", - "2024-05-30 21:05:02,656: Matching gene_ids.\n", - "2024-05-30 21:05:02,816: Creating gene abundance.\n", - "2024-05-30 21:05:02,910: Creating gene counts.\n", - "2024-05-30 21:05:03,002: Creating lengths.\n", - "2024-05-30 21:05:03,096: Replacing missing lengths.\n", - "2024-05-30 21:05:08,628: Recreating gene counts from abundances.\n", - "2024-05-30 21:05:08,628: Setting counts to length scaled TPM.\n", - "2024-05-30 21:05:08,631: Creating gene expression dataset.\n", - "2024-05-30 21:05:08,658: Saving the gene-level expression to: data/fabry_disease/counts_pytximport_lengthScaledTPM.csv.\n", - "2024-05-30 21:05:08,751: Finished the import in 8.06 seconds.\n" + "2024-06-04 19:00:50,027: Starting the import.\n", + "Reading quantification files: 4it [00:01, 2.84it/s]\n", + "2024-06-04 19:00:51,561: Converting transcript-level expression to gene-level expression.\n", + "2024-06-04 19:00:51,981: Not all transcripts are present in the mapping. 31380 out of 253181 missing.\n", + "2024-06-04 19:00:52,335: Matching gene_ids.\n", + "2024-06-04 19:00:52,542: Creating gene abundance.\n", + "2024-06-04 19:00:52,820: Creating gene counts.\n", + "2024-06-04 19:00:52,936: Creating lengths.\n", + "2024-06-04 19:00:53,089: Replacing missing lengths.\n", + "2024-06-04 19:00:58,156: Creating gene expression dataset.\n", + "2024-06-04 19:00:58,193: Saving the gene-level expression to: data/fabry_disease/counts_pytximport_no.csv.\n", + "2024-06-04 19:00:58,273: Finished the import in 8.25 seconds.\n", + "2024-06-04 19:00:59,769: Starting the import.\n", + "Reading quantification files: 4it [00:01, 2.66it/s]\n", + "2024-06-04 19:01:01,409: Converting transcript-level expression to gene-level expression.\n", + "2024-06-04 19:01:01,837: Not all transcripts are present in the mapping. 31380 out of 253181 missing.\n", + "2024-06-04 19:01:02,160: Matching gene_ids.\n", + "2024-06-04 19:01:02,321: Creating gene abundance.\n", + "2024-06-04 19:01:02,599: Creating gene counts.\n", + "2024-06-04 19:01:02,673: Creating lengths.\n", + "2024-06-04 19:01:02,791: Replacing missing lengths.\n", + "2024-06-04 19:01:07,705: Recreating gene counts from abundances.\n", + "2024-06-04 19:01:07,706: Setting the counts to scaled TPM.\n", + "2024-06-04 19:01:07,707: Creating gene expression dataset.\n", + "2024-06-04 19:01:07,736: Saving the gene-level expression to: data/fabry_disease/counts_pytximport_scaledTPM.csv.\n", + "2024-06-04 19:01:07,827: Finished the import in 8.06 seconds.\n", + "2024-06-04 19:01:09,437: Starting the import.\n", + "Reading quantification files: 4it [00:01, 3.27it/s]\n", + "2024-06-04 19:01:10,794: Converting transcript-level expression to gene-level expression.\n", + "2024-06-04 19:01:11,424: Not all transcripts are present in the mapping. 31380 out of 253181 missing.\n", + "2024-06-04 19:01:11,870: Matching gene_ids.\n", + "2024-06-04 19:01:12,045: Creating gene abundance.\n", + "2024-06-04 19:01:12,385: Creating gene counts.\n", + "2024-06-04 19:01:12,462: Creating lengths.\n", + "2024-06-04 19:01:12,586: Replacing missing lengths.\n", + "2024-06-04 19:01:17,438: Recreating gene counts from abundances.\n", + "2024-06-04 19:01:17,439: Setting counts to length scaled TPM.\n", + "2024-06-04 19:01:17,444: Creating gene expression dataset.\n", + "2024-06-04 19:01:17,484: Saving the gene-level expression to: data/fabry_disease/counts_pytximport_lengthScaledTPM.csv.\n", + "2024-06-04 19:01:17,586: Finished the import in 8.15 seconds.\n" ] } ], @@ -180,7 +180,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -201,6 +201,13 @@ "pd.testing.assert_frame_equal(counts_tximport_scaledTPM, counts_pytximport_scaledTPM)\n", "pd.testing.assert_frame_equal(counts_tximport_lengthScaledTPM, counts_pytximport_lengthScaledTPM)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/test/test_correctness.py b/test/test_correctness.py index 0ec23f6..8b6184e 100644 --- a/test/test_correctness.py +++ b/test/test_correctness.py @@ -27,6 +27,7 @@ def test_correctness( fabry_directory / "transcript_gene_mapping_human.csv", ignore_transcript_version=True, ignore_after_bar=True, + output_type="xarray", counts_from_abundance=counts_from_abundance, # type: ignore ) diff --git a/test/test_kallisto.py b/test/test_kallisto.py index 8b5e836..40b4adf 100644 --- a/test/test_kallisto.py +++ b/test/test_kallisto.py @@ -16,7 +16,7 @@ def test_kallisto( """Test importing a kallisto quantification file. Args: - kallisto_file (Path): [description] + kallisto_file (Path): Path to the kallisto quantification file. """ for counts_from_abundance in [None, "scaled_tpm", "length_scaled_tpm"]: result = tximport( @@ -25,6 +25,7 @@ def test_kallisto( transcript_gene_mapping_human, ignore_transcript_version=True, ignore_after_bar=True, + output_type="xarray", counts_from_abundance=counts_from_abundance, # type: ignore ) @@ -42,7 +43,7 @@ def test_multiple_kallisto( """Test importing kallisto quantification files. Args: - kallisto_multiple_files (Path): [description] + kallisto_multiple_files (Path): List of paths to the kallisto quantification files. """ for counts_from_abundance in [None, "scaled_tpm", "length_scaled_tpm"]: for existence_optional in [True, False]: @@ -63,6 +64,7 @@ def test_multiple_kallisto( abundance_column="tpm", ignore_transcript_version=True, ignore_after_bar=True, + output_type="xarray", counts_from_abundance=counts_from_abundance, # type: ignore existence_optional=existence_optional, ) diff --git a/test/test_salmon.py b/test/test_salmon.py index ca9ed71..16e1bfb 100644 --- a/test/test_salmon.py +++ b/test/test_salmon.py @@ -18,7 +18,7 @@ def test_salmon( """Test importing a salmon quantification file. Args: - salmon_file (Path): [description] + salmon_file (Path): Path to the salmon quantification file. """ for counts_from_abundance in [None, "scaled_tpm", "length_scaled_tpm"]: for output_type in ["xarray", "anndata"]: @@ -51,7 +51,7 @@ def test_multiple_salmon( """Test importing salmon quantification files. Args: - salmon_multiple_files (Path): [description] + salmon_multiple_files (Path): List of paths to the salmon quantification files. """ for counts_from_abundance in [None, "scaled_tpm", "length_scaled_tpm"]: for biotype_filter in [None, biotype_filters.GENCODE_PROTEIN_CODING]: @@ -69,6 +69,7 @@ def test_multiple_salmon( transcript_gene_mapping_mouse, ignore_transcript_version=True, ignore_after_bar=True, + output_type="xarray", counts_from_abundance=counts_from_abundance, # type: ignore biotype_filter=biotype_filter, existence_optional=existence_optional,