Skip to content

Commit

Permalink
Merge pull request #22 from sparks-baird/ramseys_branch
Browse files Browse the repository at this point in the history
fixing packing generation submitit
  • Loading branch information
sgbaird authored Feb 28, 2023
2 parents 7e3095e + 2814265 commit 4e7d329
Show file tree
Hide file tree
Showing 8 changed files with 215 additions and 55 deletions.
33 changes: 30 additions & 3 deletions notebooks/particle_packing/1.0-sgb-collect-from-mongodb.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 688693/688693 [02:41<00:00, 4271.45it/s]\n"
"100%|██████████| 688693/688693 [00:41<00:00, 16617.13it/s]\n"
]
}
],
Expand All @@ -43,7 +43,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand All @@ -56,7 +56,7 @@
" dtype='object')"
]
},
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -67,6 +67,33 @@
"df.to_csv(\"../../data/external/particle_packing_sobol.csv\")\n",
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"54.97133601808166"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"runtime\"].mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
24 changes: 22 additions & 2 deletions notebooks/particle_packing/1.4-sgb-model-usage.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,26 @@
"fpath = \"../../models/particle_packing/surrogate_models.pkl\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pystow import ensure\n",
"key = \n",
"\n",
"class ParticlePackingSurrogate(object):\n",
" def __init__(self, fpath):\n",
" if fpath is None:\n",
" \n",
" self.models = models\n",
"\n",
"with open()\n",
"\n",
"def predict(mu1, mu2, mu3, std1, std2, std3, comp1, comp2, comp3, num_particles, safety_factor):"
]
},
{
"cell_type": "code",
"execution_count": 14,
Expand Down Expand Up @@ -63,7 +83,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "matsci-opt-benchmarks",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand All @@ -82,7 +102,7 @@
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "01883adffc5ff99e80740fdb2688c7d7f1b5220f2274814f600fbe3b3887f376"
"hash": "e19409612ae03435658261429b42da85155baf08f8f23ce7d549f0541569b2a3"
}
}
},
Expand Down
Binary file modified reports/particle_packing/Datainbrief.docx
Binary file not shown.
177 changes: 142 additions & 35 deletions scripts/particle_packing/packing_generation_submitit.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@
from time import time
from uuid import uuid4

import pandas as pd
import pymongo
import requests
import torch
from ax.modelbridge.factory import get_sobol
from ax.service.ax_client import AxClient
from my_secrets import MONGODB_API_KEY
from my_secrets import MONGODB_API_KEY, MONGODB_PASSWORD, MONGODB_USERNAME
from submitit import AutoExecutor
from tqdm import tqdm

from matsci_opt_benchmarks.particle_packing.utils.data import get_parameters
from matsci_opt_benchmarks.particle_packing.utils.packing_generation import evaluate
Expand Down Expand Up @@ -44,7 +48,7 @@
std_names_out,
orig_mean_names,
orig_std_names,
) = get_parameters(remove_composition_degeneracy=True, remove_scaling_degeneracy=True)
) = get_parameters(remove_composition_degeneracy=False, remove_scaling_degeneracy=False)

parameters.append({"name": "num_particles", "type": "range", "bounds": [100, 1000]})
parameters.append({"name": "safety_factor", "type": "range", "bounds": [1.0, 2.5]})
Expand All @@ -54,28 +58,119 @@
parameters=parameters,
objective_name="packing_fraction",
minimize=False,
parameter_constraints=["std1 <= std2", "comp1 + comp2 <= 1.0"],
parameter_constraints=["std1 <= std2", "std2 <= std3"],
)
search_space = ax_client.experiment.search_space
m = get_sobol(search_space, fallback_to_sample_polytope=True, seed=SEED)
gr = m.gen(n=num_samples)
param_df = gr.param_df.copy()
# param_df["num_particles"] = 1000
# https://github.com/facebook/Ax/issues/740
# https://github.com/facebook/Ax/issues/1439
fallback = False
m = get_sobol(search_space, fallback_to_sample_polytope=fallback, seed=SEED)
# https://github.com/facebook/Ax/issues/1439
torch.manual_seed(SEED)
# increase max_rs_draws if you get an error about too many random draws
# as a last resort, switch fallback to True (above)
max_rs_draws = 1000000
gr = m.gen(n=num_samples, model_gen_options={"max_rs_draws": max_rs_draws})
# gr.param_df not working https://github.com/facebook/Ax/issues/1437
# param_df = gr.param_df.copy()
param_df = pd.DataFrame([arm.parameters for arm in gr.arms])


app_name = "data-oeodi"
url = f"https://us-east-1.aws.data.mongodb-api.com/app/{app_name}/endpoint/data/v1/action/insertOne" # noqa: E501
# noqa: E501
collection_name = "sobol"
database_name = "particle-packing"
dataSource = "Cluster0"
cluster_uri = "cluster0.n03mvdg"

# to find this string, click connect to your MongoDB cluster on the website
# also needed to go to "Network Access", click "Add IP address", click "Allow access
# from anywhere", and add
client = pymongo.MongoClient(
f"mongodb+srv://{MONGODB_USERNAME}:{MONGODB_PASSWORD}@{cluster_uri}.mongodb.net/?retryWrites=true&w=majority" # noqa: E501
)
db = client[database_name]
collection = db[collection_name]

posts = collection.find({})
results = [post for post in tqdm(posts)]

parameter_names = [
"mu1",
"mu2",
"mu3",
"std1",
"std2",
"std3",
"comp1",
"comp2",
"comp3",
"num_particles",
"safety_factor",
]

if len(results) > 0:
mongo_df = pd.DataFrame(results)
mongo_param_df: pd.DataFrame = mongo_df[parameter_names]

# remove the entries that are already in the database, including repeats
# the repeats are necessary for the variance calculation
# this is sort of a setdiff

mongo_param_df = mongo_param_df.groupby(
mongo_param_df.columns.tolist(), as_index=False
).size() # type: ignore
mongo_param_df["group_id"] = (
mongo_param_df[parameter_names]
.round(6)
.apply(lambda row: "_".join(row.values.astype(str)), axis=1)
)
param_df["group_id"] = (
param_df[parameter_names]
.round(6)
.apply(lambda row: "_".join(row.values.astype(str)), axis=1)
)

# pd.concat((param_df["group_id"], mongo_param_df["group_id"])).drop_duplicates()

param_df = param_df[~param_df["group_id"].isin(mongo_param_df["group_id"])]
param_df = param_df[parameter_names]

mongo_param_df = mongo_param_df[(num_repeats - mongo_param_df["size"]) > 0]

# repeat the rows of mongo_param_df based on the size column
# iterate through rows of mongo_param_df
sub_dfs = []
for index, row in mongo_param_df.iterrows():
sub_dfs.append(
pd.concat([row[parameter_names]] * row["size"], axis=1, ignore_index=True).T
)

if len(sub_dfs) > 0:
mongo_param_df = pd.concat(sub_dfs, axis=0, ignore_index=True)[parameter_names]
else:
mongo_param_df = pd.DataFrame(columns=parameter_names)

# repeat the rows of param_df num_repeats times
param_df = pd.concat([param_df] * num_repeats, ignore_index=True)

if len(results) > 0:
param_df = pd.concat([param_df, mongo_param_df]) # type: ignore

param_df["util_dir"] = path.join(
"src", "matsci_opt_benchmarks", "particle_packing", "utils"
)
param_df["data_dir"] = path.join("data", "interim", "particle_packing")
parameter_sets = param_df.to_dict(orient="records")
parameter_sets = parameter_sets * num_repeats
# parameter_sets = parameter_sets * num_repeats
shuffle(parameter_sets)

if dummy:
parameter_sets = parameter_sets[:10]
# parameter_sets = parameter_sets[:10]
batch_size = 5
else:
batch_size = 700

url = "https://data.mongodb-api.com/app/data-plyju/endpoint/data/v1/action/insertOne" # noqa: E501
batch_size = 1400


def mongodb_evaluate(parameter_set, verbose=False):
Expand All @@ -99,9 +194,9 @@ def mongodb_evaluate(parameter_set, verbose=False):

payload = json.dumps(
{
"collection": "sobol",
"database": "particle-packing",
"dataSource": "matsci-opt-benchmarks",
"collection": collection_name,
"database": database_name,
"dataSource": dataSource,
"document": results,
}
)
Expand Down Expand Up @@ -138,21 +233,49 @@ def chunks(lst, n):
# use `myallocation` command to see available account/partition combos
# account = "sparks"
# partition = "kingspeak"
account = "owner-guest"
partition = "kingspeak-guest"
# account = "owner-guest"
# partition = "kingspeak-guest"
account = "sparks"
partition = "notchpeak-shared-freecycle" # to allow for node sharing
executor = AutoExecutor(folder=log_folder)
executor.update_parameters(
timeout_min=walltime_min,
slurm_nodes=None,
slurm_partition=partition,
# slurm_cpus_per_task=1,
slurm_additional_parameters={"ntasks": 1, "account": account},
)

# mongodb_evaluate(parameter_sets[0], verbose=True)

# sbatch array
jobs = executor.map_array(mongodb_evaluate_batch, parameter_batch_sets)
# jobs = executor.map_array(mongodb_evaluate, parameter_sets)
print("Submitted jobs")

results = [job.result() for job in jobs]

1 + 1

# %% Code Graveyard
# import pymongo
# from urllib.parse import quote_plus
# password needs to be URL encoded
# client = pymongo.MongoClient(
# f"mongodb+srv://{USERNAME}:{quote_plus(PASSWORD)}@matsci-opt-benchmarks.ehu7qrh.mongodb.net/?retryWrites=true&w=majority"# noqa: E501
# )
# collection = client["particle-packing"]["sobol"]
# collection.insert_one(result)

# import cloudpickle as pickle

# param_df = param_df[~param_df.isin(mongo_param_df[parameter_names]).all(1)]
# param_df = param_df[~param_df.isin(mongo_param_df).all(1)]

# setdiff between two dataframes
# https://stackoverflow.com/questions/17071871/select-rows-from-a-dataframe-

# param_df["num_particles"] = 1000


# job_ids = [job.job_id for job in jobs]
# # https://www.hpc2n.umu.se/documentation/batchsystem/job-dependencies
# job_ids_str = ":".join(job_ids) # e.g. "3937257_0:3937257_1:..."
Expand All @@ -179,19 +302,3 @@ def chunks(lst, n):
# print( f"Waiting for submission jobs ({job_ids_str}) to complete before running
# collector job ({collector_job.job_id}). Pickled results file saved to
# {slurm_savepath} after all jobs have run." )

results = [job.result() for job in jobs]

1 + 1

# %% Code Graveyard
# import pymongo
# from urllib.parse import quote_plus
# password needs to be URL encoded
# client = pymongo.MongoClient(
# f"mongodb+srv://{USERNAME}:{quote_plus(PASSWORD)}@matsci-opt-benchmarks.ehu7qrh.mongodb.net/?retryWrites=true&w=majority"# noqa: E501
# )
# collection = client["particle-packing"]["sobol"]
# collection.insert_one(result)

# import cloudpickle as pickle
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ install_requires =
matbench
torch
zenodo-client
ray[tune]
kaleido


Expand Down
3 changes: 2 additions & 1 deletion src/matsci_opt_benchmarks/particle_packing/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

import ray
import torch
from boppf.utils.ax import optimize_ppf
from psutil import cpu_count

from matsci_opt_benchmarks.particle_packing.utils.ax import optimize_ppf


class BOPPF:
def __init__(
Expand Down
Loading

0 comments on commit 4e7d329

Please sign in to comment.