Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
andreea-popescu-reef committed Sep 3, 2024
1 parent f3cfcb0 commit 9b2f593
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 43 deletions.
30 changes: 6 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
# Compute Horde Prompt Gen
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)

Script to generate batches of prompts for the Compute Horde project synthetic jobs.
The prompt that generates prompts is inspired from [Bittensor Subnet 18 (Cortex. t)] (https://github.com/Datura-ai/cortex.t/blob/main/cortext/utils.py#L139)
Script to generate batches of random unique prompts to be used in the Compute Horde project synthetic jobs.
The prompt that generates prompts is inspired from [Bittensor Subnet 18 (Cortex. t)] (https://github.com/Datura-ai/cortex.t/blob/276cfcf742e8b442500435a1c1862ac4dffa9e20/cortext/utils.py#L193) (licensed under the MIT License.)
The generated prompts will be saved in `<output_folder_path>/prompts_<uuid>.txt`, each line of the text file containing a prompt.


### build image


```bash
# download the model data from huggingface
python3 download_model.py --hugging face_api_key <API_KEY>
python3 download_model.py --huggingface_token <API_KEY>

cd src/compute_horde_prompt_gen
docker build -t compute-horde-prompt-gen .
Expand All @@ -19,34 +20,15 @@ docker build -t compute-horde-prompt-gen .

### run image
```bash
docker run -v ./output/:/app/output/ compute-horde-prompt-gen --dynamic_number_of_batches_in_a_single_go 3 --dynamic_number_of_prompts_in_a_batch 4 --uui uuid1,uuid2,uuid3
docker run -v ./output/:/app/output/ compute-horde-prompt-gen --number_of_batches 3 --number_of_prompts_per_batch 4 --uuids uuid1,uuid2,uuid3
```

### testint
```bash
python3 run.py --mock_model --dynamic_number_of_batches_in_a_single_go 3 --dynamic_number_of_prompts_in_a_batch 4 --uui uuid1,uuid2,uuid3
python3 run.py --mock_model --number_of_batches 3 --number_of_prompts_per_batch 4 --uuids uuid1,uuid2,uuid3
```

---

## License
This repository is licensed under the MIT License.
```text
# The MIT License (MIT)
# Copyright © 2023 Yuma Rao
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
# the Software.
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
```

10 changes: 5 additions & 5 deletions download_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Save huggingface model")
parser.add_argument(
"--huggingface_token",
type=str,
required=True,
help="Huggingface token to use",
)
"--huggingface_token",
type=str,
required=True,
help="Huggingface token to use",
)
parser.add_argument(
"--model_name",
type=str,
Expand Down
37 changes: 23 additions & 14 deletions src/compute_horde_prompt_gen/run.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import os
import logging
import argparse

Expand All @@ -10,13 +11,13 @@


def generate_prompts(
model: MockModel | GenerativeModel,
model,
total_prompts,
batch_size: int = 5,
num_return_sequences: int = 5,
max_new_tokens: int = 2000,
temperature: float = 1.0,
filename: str = "prompts.txt",
filepath: str = "prompts.txt",
):
prompt_generator = PromptGeneratingPrompt()

Expand Down Expand Up @@ -53,7 +54,7 @@ def generate_prompts(
new_prompts = new_prompts[:total_prompts]

total_prompts -= len(new_prompts)
append_to_file(new_prompts, filename)
append_to_file(new_prompts, filepath)

if total_prompts == 0:
break
Expand Down Expand Up @@ -98,36 +99,44 @@ def generate_prompts(
help="Path to load the model and tokenizer from",
)
parser.add_argument(
"--dynamic_number_of_batches_in_a_single_go",
"--number_of_batches",
type=int,
required=True,
default=None,
help="Number of batches to generate",
)
parser.add_argument(
"--dynamic_number_of_prompts_in_a_batch",
"--number_of_prompts_per_batch",
type=int,
required=True,
help="Number of prompts per batch",
help="Number of prompts per uuid batch",
)
parser.add_argument(
"--uuids",
type=str,
required=True,
help="Comma separated list of uuids to upload batch of prompts for",
help="Comma separated list of uuids, used as file names of output batches, i.e. `output/prompts_{uuid}.txt`",
)
parser.add_argument(
"--mock_model",
action="store_true",
default=False,
help="Mock llama3 model for testing purposes only",
)
parser.add_argument(
"--output_folder_path",
type=str,
default="output/",
help="Folder path to save the generated prompts to",
)

args = parser.parse_args()

uuids = args.uuids.split(",")
assert (
len(uuids) == args.dynamic_number_of_batches_in_a_single_go
), "Number of uuids should be equal to number of batches requested"

if args.number_of_batches:
assert (
len(uuids) == args.number_of_batches
), "Number of uuids should be equal to number of batches requested"

model = (
GenerativeModel(model_path=args.model_path, quantize=args.quantize)
Expand All @@ -139,14 +148,14 @@ def generate_prompts(
start_ts = datetime.datetime.now()
generate_prompts(
model,
total_prompts=args.dynamic_number_of_prompts_in_a_batch,
total_prompts=args.number_of_prompts_per_batch,
batch_size=args.batch_size,
num_return_sequences=args.num_return_sequences,
max_new_tokens=args.max_new_tokens,
temperature=args.temperature,
filename=f"output/prompts_{uuid}.txt",
filepath=os.path.join(args.output_folder_path, f"prompts_{uuid}.txt"),
)
seconds_taken = (datetime.datetime.now() - start_ts).total_seconds()
log.info(
f"Finished generating {uuid} batch with {args.dynamic_number_of_prompts_in_a_batch} prompts in {seconds_taken:.2f} seconds"
f"Finished generating {uuid} batch with {args.number_of_prompts_per_batch} prompts in {seconds_taken:.2f} seconds"
)
5 changes: 5 additions & 0 deletions src/compute_horde_prompt_gen/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import os
import logging
import collections

Expand Down Expand Up @@ -45,6 +46,10 @@ def check_prompts_quality(prompts: list[str]):


def append_to_file(prompts: list[str], filepath: str = "prompts.txt"):
folder = os.path.dirname(filepath)
if not os.path.exists(folder):
os.makedirs(folder)

try:
with open(filepath, "a") as f:
for prompt in prompts:
Expand Down

0 comments on commit 9b2f593

Please sign in to comment.