Skip to content

Commit

Permalink
added tokenized bed functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
khoroshevskyi committed Jun 7, 2024
1 parent 0cbb853 commit bea5288
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 29 deletions.
3 changes: 2 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ include bedboss/bedqc/*
include bedboss/qdrant_index/*
include bedboss/bedbuncher/*
include bedboss/bedbuncher/tools/*
include bedboss/bedclassifier/*
include bedboss/bedclassifier/*
include bedboss/tokens/*
122 changes: 99 additions & 23 deletions bedboss/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,29 +440,105 @@ def delete_bedset(
print(f"BedSet {identifier} deleted from the bedbase database")


#
# @app.command(help="Tokenize a bed file")
# def tokenize_bed(
# bed_id: str = typer.Option(
# ...,
# help="Path to the bed file",
# exists=True,
# file_okay=True,
# readable=True,
# ),
# universe_id: str = typer.Option(
# None,
# help="Universe ID",
# ),
# bedbase_config: str = typer.Option(
# ...,
# help="Path to the bedbase config file",
# exists=True,
# file_okay=True,
# readable=True,
# ),
#
# )
@app.command(help="Tokenize a bedfile")
def tokenize_bed(
bed_id: str = typer.Option(
...,
help="Path to the bed file",
),
universe_id: str = typer.Option(
...,
help="Universe ID",
),
cache_folder: str = typer.Option(
None,
help="Path to the cache folder",
),
add_to_db: bool = typer.Option(
False,
help="Add the tokenized bed file to the bedbase database",
),
bedbase_config: str = typer.Option(
None,
help="Path to the bedbase config file",
exists=True,
file_okay=True,
readable=True,
),
overwrite: bool = typer.Option(
False,
help="Overwrite the existing tokenized bed file",
),
):
from bedboss.tokens.tokens import tokenize_bed_file

tokenize_bed_file(
universe=universe_id,
bed=bed_id,
cache_folder=cache_folder,
add_to_db=add_to_db,
config=bedbase_config,
overwrite=overwrite,
)


@app.command(help="Delete tokenized bed file")
def delete_tokenized(
universe_id: str = typer.Option(
...,
help="Universe ID",
),
bed_id: str = typer.Option(
...,
help="Bed ID",
),
config: str = typer.Option(
None,
help="Path to the bedbase config file",
exists=True,
file_okay=True,
readable=True,
),
):
from bedboss.tokens.tokens import delete_tokenized

delete_tokenized(
universe=universe_id,
bed=bed_id,
config=config,
)


@app.command(help="Convert bed file to universe")
def convert_universe(
bed_id: str = typer.Option(
...,
help="Path to the bed file",
),
config: str = typer.Option(
...,
help="Path to the bedbase config file",
exists=True,
file_okay=True,
readable=True,
),
method: str = typer.Option(
None,
help="Method used to create the universe",
),
bedset: str = typer.Option(
None,
help="Bedset used to create the universe",
),
):
from bbconf.bbagent import BedBaseAgent

bbagent = BedBaseAgent(config)
bbagent.bed.add_universe(
bedfile_id=bed_id,
bedset_id=bedset,
construct_method=method,
)


@app.command(help="check installed R packages")
Expand Down
11 changes: 6 additions & 5 deletions bedboss/tokens/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@
from bbconf.bbagent import BedBaseAgent
from geniml.bbclient import BBClient
from geniml.bbclient.const import DEFAULT_CACHE_FOLDER
from geniml.io import RegionSet

# from genimtools.tokenizers import RegionSet

from genimtools.tokenizers import TreeTokenizer

Expand All @@ -22,6 +19,7 @@ def tokenize_bed_file(
cache_folder: Union[str, os.PathLike] = DEFAULT_CACHE_FOLDER,
add_to_db: bool = False,
config: str = None,
overwrite: bool = False,
) -> None:
"""
Tokenize all bed file and add to the local cache
Expand All @@ -31,10 +29,11 @@ def tokenize_bed_file(
:param cache_folder: path to the cache folder
:param add_to_db: flag to add tokenized bed file to the bedbase database [config should be provided if True]
:param config: path to the bedbase config file
:param overwrite: flag to overwrite the existing tokenized bed file
:return: None
"""
bbc = BBClient(cache_folder=cache_folder)
bbc = BBClient(cache_folder=cache_folder or DEFAULT_CACHE_FOLDER)

tokenizer = TreeTokenizer(bbc.seek(universe))
rs = bbc.load_bed(bed)
Expand All @@ -54,7 +53,9 @@ def tokenize_bed_file(
)

bbagent = BedBaseAgent(config=config)
bbagent.bed.add_tokenized(bed_id=bed, universe_id=universe, token_vector=tokens)
bbagent.bed.add_tokenized(
bed_id=bed, universe_id=universe, token_vector=tokens, overwrite=overwrite
)
_LOGGER.info(f"Tokenized bed file '{bed}' added to the database")


Expand Down

0 comments on commit bea5288

Please sign in to comment.