From bcdb76b85f532be9a4056dfdfb7cadfacf9a37ee Mon Sep 17 00:00:00 2001 From: t-yuxuanlei Date: Thu, 14 Nov 2024 20:17:06 -0800 Subject: [PATCH 1/2] fix conflicts in folder path --- RecExplainer/README.md | 13 ++++++++-- RecExplainer/shell/eval_explan.sh | 4 ++-- RecExplainer/shell/infer_alignment.sh | 9 ++++--- RecExplainer/shell/infer_explan.sh | 9 ++++--- RecExplainer/shell/merge.sh | 4 ++-- RecExplainer/shell/preprocess_recmodel.sh | 8 +++---- .../shell/recexplainer_data_pipeline.sh | 24 +++++++++---------- RecExplainer/shell/train.sh | 18 +++++++------- RecExplainer/shell/unirec_prepare_data.sh | 2 +- 9 files changed, 47 insertions(+), 44 deletions(-) diff --git a/RecExplainer/README.md b/RecExplainer/README.md index 6f603fb..d7dcef7 100644 --- a/RecExplainer/README.md +++ b/RecExplainer/README.md @@ -76,17 +76,26 @@ bash shell/unirec_mf_train.sh You need to copy some files to the UniRec directory in advance. ```bash cp preprocess/unirec_utils/data4Exp.py $HOME/UniRec/unirec/main -cp $HOME/RecExplainer/data/unirec_raw_data/amazon_video_games_v3/train_ids.csv $HOME/UniRec/data/amazon_video_games_v3 -cp $HOME/RecExplainer/data/unirec_raw_data/amazon_video_games_v3/test_ids.csv $HOME/UniRec/data/amazon_video_games_v3 +cp $HOME/RecAI/RecExplainer/data/unirec_raw_data/amazon_video_games_v3/train_ids.csv $HOME/UniRec/data/amazon_video_games_v3 +cp $HOME/RecAI/RecExplainer/data/unirec_raw_data/amazon_video_games_v3/test_ids.csv $HOME/UniRec/data/amazon_video_games_v3 ``` For SASRec model: ```bash bash shell/unirec_sasrec_infer.sh ``` +After inference, please copy the contents of `$HOME/UniRec/output/$DATASET_NAME/SASRec/RecExplainer/xxx/` to `$HOME/RecAI/RecExplainer/data/amazon_video_games_v3` + +Finally, there should exist these files in `$HOME/RecAI/RecExplainer/data/amazon_video_games_v3`: datamaps.json, metadata.json, SASRec.pth, sequential_data.txt, sim_item.txt, test_top.txt, train_top.txt + For MF model: ```bash bash shell/unirec_mf_infer.sh ``` +After inference, please copy the contents of `$HOME/UniRec/output/$DATASET_NAME/MF/RecExplainer/xxx/` to `$HOME/RecAI/RecExplainer/data/mf_amazon_video_games_v3` + +At the same time, copy these files from `$HOME/RecAI/RecExplainer/data/amazon_video_games_v3` to `$HOME/RecAI/RecExplainer/data/mf_amazon_video_games_v3`: datamaps.json, metadata.json, sequential_data.txt + +Finally, there should exist these files in `$HOME/RecAI/RecExplainer/data/mf_amazon_video_games_v3`: datamaps.json, metadata.json, MF.pth, sequential_data.txt, sim_item.txt, test_top.txt, train_top.txt ## Dataset Preparation for RecExplainer Model ```bash diff --git a/RecExplainer/shell/eval_explan.sh b/RecExplainer/shell/eval_explan.sh index 1cdaffe..71c4e27 100644 --- a/RecExplainer/shell/eval_explan.sh +++ b/RecExplainer/shell/eval_explan.sh @@ -1,9 +1,9 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -DATA_DIR=$HOME/RecExplainer/output/amazon_video_games_v3/explan +DATA_DIR=$HOME/RecAI/RecExplainer/output/amazon_video_games_v3/explan -cd $HOME/RecExplainer/preprocess +cd $HOME/RecAI/RecExplainer/preprocess python eval_explan.py --model_names "recexplainer-B,recexplainer-I,recexplainer-H,llama3,chatgpt" \ --model_response_files "$DATA_DIR/recexplainer-B_response.csv,$DATA_DIR/recexplainer-I_response.csv,$DATA_DIR/recexplainer-H_response.csv,$DATA_DIR/llama3_response.csv,$DATA_DIR/chatgpt_response.csv" \ diff --git a/RecExplainer/shell/infer_alignment.sh b/RecExplainer/shell/infer_alignment.sh index 60571b2..276cfba 100644 --- a/RecExplainer/shell/infer_alignment.sh +++ b/RecExplainer/shell/infer_alignment.sh @@ -1,10 +1,9 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -DATA_DIR=$HOME/blob/RecExplainer/amazon_video_games_v3 -UNIREC_DATA_DIR="$HOME/blob/RecExplainer/amazon_video_games_v3" +DATA_DIR=$HOME/RecAI/RecExplainer/data/amazon_video_games_v3 -output_dir=$HOME/RecExplainer/output/amazon_video_games_v3/ +output_dir=$HOME/RecAI/RecExplainer/output/amazon_video_games_v3/ model_name_or_path="path to your merged model" validation_file=$DATA_DIR/both_valid.json sequential_file=$DATA_DIR/sequential_data.txt @@ -15,13 +14,13 @@ task_type="both" template_name="llama-3" metadata_file=$DATA_DIR/metadata.json -test_top_file=$UNIREC_DATA_DIR/test_top.txt +test_top_file=$DATA_DIR/test_top.txt torch_dtype="bfloat16" attn_implementation="flash_attention_2" rec_model_type="SASRec" -cd $HOME/RecExplainer +cd $HOME/RecAI/RecExplainer ## infer for item recovery task accelerate launch --config_file ./shell/config/infer_single_node.yaml ./src/inference.py \ diff --git a/RecExplainer/shell/infer_explan.sh b/RecExplainer/shell/infer_explan.sh index c2e76cd..0284f0a 100644 --- a/RecExplainer/shell/infer_explan.sh +++ b/RecExplainer/shell/infer_explan.sh @@ -1,10 +1,9 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -DATA_DIR="$HOME/blob/RecExplainer/amazon_video_games_v3" -UNIREC_DATA_DIR="$HOME/blob/RecExplainer/amazon_video_games_v3" +DATA_DIR="$HOME/RecAI/RecExplainer/data/amazon_video_games_v3" -output_dir=$HOME/RecExplainer/output/amazon_video_games_v3/explan_valid.csv +output_dir=$HOME/RecAI/RecExplainer/output/amazon_video_games_v3/explan/recexplainer-H_response.csv model_name_or_path="path to your merged model" validation_file=$DATA_DIR/explan_both_valid.json sequential_file=$DATA_DIR/sequential_data.txt @@ -15,12 +14,12 @@ task_type="both" template_name="llama-3" metadata_file=$DATA_DIR/metadata.json -test_top_file=$UNIREC_DATA_DIR/test_top.txt +test_top_file=$DATA_DIR/test_top.txt torch_dtype="bfloat16" attn_implementation="flash_attention_2" rec_model_type="SASRec" -cd $HOME/RecExplainer +cd $HOME/RecAI/RecExplainer accelerate launch --config_file ./shell/config/infer.yaml ./src/inference.py \ --preprocessing_num_workers 4 \ diff --git a/RecExplainer/shell/merge.sh b/RecExplainer/shell/merge.sh index 7615383..2c4ccfd 100644 --- a/RecExplainer/shell/merge.sh +++ b/RecExplainer/shell/merge.sh @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -cd $HOME/RecExplainer +cd $HOME/RecAI/RecExplainer ### --model_name_or_path: the path to the original LLM @@ -10,7 +10,7 @@ python ./src/merge.py \ --cache_dir $HOME/.cache \ --peft_model_name path/to/your/training/checkpoint \ --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \ - --rec_model_name_or_path $HOME/blob/RecExplainer/amazon_video_games_v3/SASRec.pth \ + --rec_model_name_or_path $HOME/RecAI/RecExplainer/data/amazon_video_games_v3/SASRec.pth \ --task_type both \ --torch_dtype bfloat16 \ --attn_implementation flash_attention_2 \ diff --git a/RecExplainer/shell/preprocess_recmodel.sh b/RecExplainer/shell/preprocess_recmodel.sh index 8e5e68d..c77b3c0 100644 --- a/RecExplainer/shell/preprocess_recmodel.sh +++ b/RecExplainer/shell/preprocess_recmodel.sh @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -RAW_DATA_DIR="$HOME/RecExplainer/data/amazon_video_games_v3/raw_data" +RAW_DATA_DIR="$HOME/RecAI/RecExplainer/data/amazon_video_games_v3/raw_data" full_data_name="Video_Games" meta_file="$RAW_DATA_DIR/meta_Video_Games.json.gz" review_file="$RAW_DATA_DIR/Video_Games_5.json.gz" @@ -9,16 +9,16 @@ raw_save_data_file="$RAW_DATA_DIR/sequential_data.txt" raw_save_metadata_file="$RAW_DATA_DIR/metadata.json" raw_save_datamaps_file="$RAW_DATA_DIR/datamaps.json" -PROCESS_DATA_DIR="$HOME/RecExplainer/data/amazon_video_games_v3/process_data" +PROCESS_DATA_DIR="$HOME/RecAI/RecExplainer/data/amazon_video_games_v3" process_save_data_file="$PROCESS_DATA_DIR/sequential_data.txt" process_save_metadata_file="$PROCESS_DATA_DIR/metadata.json" process_save_datamaps_file="$PROCESS_DATA_DIR/datamaps.json" item_thred=2000 user_thred=4000 -UNIREC_RAW_DATA_DIR="$HOME/RecExplainer/data/unirec_raw_data/amazon_video_games_v3" +UNIREC_RAW_DATA_DIR="$HOME/RecAI/RecExplainer/data/unirec_raw_data/amazon_video_games_v3" -EXE_DIR="$HOME/RecExplainer/preprocess" +EXE_DIR="$HOME/RecAI/RecExplainer/preprocess" cd $EXE_DIR python data_preprocess_amazon.py --full_data_name $full_data_name --meta_file $meta_file --review_file $review_file \ diff --git a/RecExplainer/shell/recexplainer_data_pipeline.sh b/RecExplainer/shell/recexplainer_data_pipeline.sh index 7464e7d..f2f99c9 100644 --- a/RecExplainer/shell/recexplainer_data_pipeline.sh +++ b/RecExplainer/shell/recexplainer_data_pipeline.sh @@ -1,19 +1,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -RAW_DATA_DIR="$HOME/blob/RecExplainer/amazon_video_games_v3" -PROCESS_DATA_DIR="$HOME/blob/RecExplainer/amazon_video_games_v3" -UNIREC_DATA_DIR="$HOME/blob/RecExplainer/amazon_video_games_v3" +RAW_DATA_DIR="$HOME/RecAI/RecExplainer/data/amazon_video_games_v3/raw_data" +PROCESS_DATA_DIR="$HOME/RecAI/RecExplainer/data/amazon_video_games_v3" -MF_PROCESS_DATA_DIR="$HOME/blob/RecExplainer/mf_amazon_video_games_v3" -MF_UNIREC_DATA_DIR="$HOME/blob/RecExplainer/mf_amazon_video_games_v3" +MF_PROCESS_DATA_DIR="$HOME/RecAI/RecExplainer/data/mf_amazon_video_games_v3" gpt_response_file="$PROCESS_DATA_DIR/gpt4_data/test_response.csv" max_seq_len=9 model_name="meta-llama/Meta-Llama-3-8B-Instruct" model_max_length=1024 -EXE_DIR="$HOME/RecExplainer/preprocess" +EXE_DIR="$HOME/RecAI/RecExplainer/preprocess" cd $EXE_DIR @@ -28,7 +26,7 @@ if [[ -e $gpt_response_file ]]; then else echo "generate gpt_response_file" - python preprocess/gpt_api.py --input_file $PROCESS_DATA_DIR/gpt4_data/test_query.csv --output_file $gpt_response_file + python gpt_api.py --input_file $PROCESS_DATA_DIR/gpt4_data/test_query.csv --output_file $gpt_response_file fi @@ -37,18 +35,18 @@ fi ### generate training and testing data for alignment tasks python amazon_generate_v3.py --sharegpt_file $RAW_DATA_DIR/ShareGPT_V3_unfiltered_cleaned_split.json \ --seqdata_file $PROCESS_DATA_DIR/sequential_data.txt --metadata_file $PROCESS_DATA_DIR/metadata.json \ - --sim_item_file $UNIREC_DATA_DIR/sim_item.txt --train_top_file $UNIREC_DATA_DIR/train_top.txt --test_top_file $UNIREC_DATA_DIR/test_top.txt \ + --sim_item_file $PROCESS_DATA_DIR/sim_item.txt --train_top_file $PROCESS_DATA_DIR/train_top.txt --test_top_file $PROCESS_DATA_DIR/test_top.txt \ --gpt_response_file $gpt_response_file \ --save_intention_file $PROCESS_DATA_DIR/intention --save_behavior_file $PROCESS_DATA_DIR/behaviour --save_both_file $PROCESS_DATA_DIR/both \ --max_seq_len $max_seq_len --model_name $model_name --model_max_length $model_max_length ### generate testing data for explanation task python explan_data_gen.py --data_dir $PROCESS_DATA_DIR --seqdata_file $PROCESS_DATA_DIR/sequential_data.txt --metadata_file $PROCESS_DATA_DIR/metadata.json \ - --test_top_file $UNIREC_DATA_DIR/test_top.txt --max_seq_len $max_seq_len --max_samples 500 + --test_top_file $PROCESS_DATA_DIR/test_top.txt --max_seq_len $max_seq_len --max_samples 500 ### generate training data for explanation task, used to train classifier and score predictor python explan_data_gen.py --data_dir $PROCESS_DATA_DIR --seqdata_file $PROCESS_DATA_DIR/sequential_data.txt --metadata_file $PROCESS_DATA_DIR/metadata.json \ - --test_top_file $UNIREC_DATA_DIR/train_top.txt --max_seq_len $max_seq_len --max_samples 2000 --split "train" + --test_top_file $PROCESS_DATA_DIR/train_top.txt --max_seq_len $max_seq_len --max_samples 2000 --split "train" ###################################################### @@ -58,15 +56,15 @@ python explan_data_gen.py --data_dir $PROCESS_DATA_DIR --seqdata_file $PROCESS_D ### generate training and testing data for alignment tasks python mf_amazon_video_games_generate.py --sharegpt_file $RAW_DATA_DIR/ShareGPT_V3_unfiltered_cleaned_split.json \ --seqdata_file $MF_PROCESS_DATA_DIR/sequential_data.txt --metadata_file $MF_PROCESS_DATA_DIR/metadata.json \ - --sim_item_file $MF_UNIREC_DATA_DIR/sim_item.txt --test_top_file $MF_UNIREC_DATA_DIR/test_top.txt \ + --sim_item_file $MF_PROCESS_DATA_DIR/sim_item.txt --test_top_file $MF_PROCESS_DATA_DIR/test_top.txt \ --gpt_response_file $gpt_response_file \ --save_intention_file $MF_PROCESS_DATA_DIR/intention --save_behavior_file $MF_PROCESS_DATA_DIR/behaviour --save_both_file $MF_PROCESS_DATA_DIR/both \ --max_seq_len $max_seq_len --model_name $model_name --model_max_length $model_max_length ### generate testing data for explanation task python explan_data_gen.py --data_dir $MF_PROCESS_DATA_DIR --seqdata_file $MF_PROCESS_DATA_DIR/sequential_data.txt --metadata_file $MF_PROCESS_DATA_DIR/metadata.json \ - --test_top_file $MF_UNIREC_DATA_DIR/test_top.txt --max_seq_len $max_seq_len --max_samples 500 --rec_model_type "MF" + --test_top_file $MF_PROCESS_DATA_DIR/test_top.txt --max_seq_len $max_seq_len --max_samples 500 --rec_model_type "MF" ### generate training data for explanation task, used to train classifier and score predictor python explan_data_gen.py --data_dir $MF_PROCESS_DATA_DIR --seqdata_file $MF_PROCESS_DATA_DIR/sequential_data.txt --metadata_file $MF_PROCESS_DATA_DIR/metadata.json \ - --test_top_file $MF_UNIREC_DATA_DIR/test_top.txt --max_seq_len $max_seq_len --max_samples 2000 --split "train" --rec_model_type "MF" + --test_top_file $MF_PROCESS_DATA_DIR/test_top.txt --max_seq_len $max_seq_len --max_samples 2000 --split "train" --rec_model_type "MF" diff --git a/RecExplainer/shell/train.sh b/RecExplainer/shell/train.sh index 788ac58..dc2f1f7 100644 --- a/RecExplainer/shell/train.sh +++ b/RecExplainer/shell/train.sh @@ -8,12 +8,11 @@ export DISABLE_MLFLOW_INTEGRATION=true; export WANDB_DIR=$HOME/.cache/ export WANDB_PROJECT="RecExplainer" -DATA_DIR="$HOME/blob/RecExplainer/amazon_video_games_v3" -UNIREC_DATA_DIR="$HOME/blob/RecExplainer/amazon_video_games_v3" +DATA_DIR="$HOME/RecAI/RecExplainer/data/amazon_video_games_v3" attn_implementation="flash_attention_2" model_name_or_path="meta-llama/Meta-Llama-3-8B-Instruct" -rec_model_name_or_path=$UNIREC_DATA_DIR/SASRec.pth +rec_model_name_or_path=$DATA_DIR/SASRec.pth rec_model_type="SASRec" model_max_length=1024 torch_dtype="bfloat16" @@ -27,7 +26,7 @@ template_name="llama-3" output_dir=$DATA_DIR/output/both_flashattn2_llam3-8b_len1024_bf16_lr1e-4_epoch20_batch4_accu4_warmratio0.1_4gpus -cd $HOME/RecExplainer +cd $HOME/RecAI/RecExplainer torchrun --nnodes=1 --nproc_per_node 4 --master_port=29501 ./src/sft_training.py \ --seed 2024 \ @@ -61,7 +60,7 @@ torchrun --nnodes=1 --nproc_per_node 4 --master_port=29501 ./src/sft_training.py --save_strategy epoch \ --evaluation_strategy epoch \ --report_to wandb \ - --run_name "amazon_video_games_v3_both_flashattn2_llam3-8b_len1024_bf16_lr1e-4_epoch20_batch4_accu4_warmratio0.1_4gpus" > $HOME/RecExplainer/training.log 2>&1 + --run_name "amazon_video_games_v3_both_flashattn2_llam3-8b_len1024_bf16_lr1e-4_epoch20_batch4_accu4_warmratio0.1_4gpus" > $HOME/RecAI/RecExplainer/training.log 2>&1 ### for training MF model @@ -70,12 +69,11 @@ export DISABLE_MLFLOW_INTEGRATION=true; export WANDB_DIR=$HOME/.cache/ export WANDB_PROJECT="RecExplainer" -DATA_DIR="$HOME/blob/RecExplainer/mf_amazon_video_games_v3" -UNIREC_DATA_DIR="$HOME/blob/RecExplainer/mf_amazon_video_games_v3" +DATA_DIR="$HOME/RecAI/RecExplainer/data/mf_amazon_video_games_v3" attn_implementation="flash_attention_2" model_name_or_path="meta-llama/Meta-Llama-3-8B-Instruct" -rec_model_name_or_path=$UNIREC_DATA_DIR/MF.pth +rec_model_name_or_path=$DATA_DIR/MF.pth rec_model_type="MF" model_max_length=1024 torch_dtype="bfloat16" @@ -89,7 +87,7 @@ template_name="llama-3" output_dir=$DATA_DIR/output/both_flashattn2_llam3-8b_len1024_bf16_lr1e-4_epoch20_batch4_accu4_warmratio0.1_4gpus -cd $HOME/RecExplainer +cd $HOME/RecAI/RecExplainer torchrun --nnodes=1 --nproc_per_node 4 --master_port=29501 ./src/sft_training.py \ --seed 2024 \ @@ -123,5 +121,5 @@ torchrun --nnodes=1 --nproc_per_node 4 --master_port=29501 ./src/sft_training.py --save_strategy epoch \ --evaluation_strategy epoch \ --report_to wandb \ - --run_name "mf_amazon_video_games_v3_both_flashattn2_llam3-8b_len1024_bf16_lr1e-4_epoch20_batch4_accu4_warmratio0.1_4gpus" > $HOME/RecExplainer/mf_training.log 2>&1 + --run_name "mf_amazon_video_games_v3_both_flashattn2_llam3-8b_len1024_bf16_lr1e-4_epoch20_batch4_accu4_warmratio0.1_4gpus" > $HOME/RecAI/RecExplainer/mf_training.log 2>&1 \ No newline at end of file diff --git a/RecExplainer/shell/unirec_prepare_data.sh b/RecExplainer/shell/unirec_prepare_data.sh index 8d204b4..125901b 100644 --- a/RecExplainer/shell/unirec_prepare_data.sh +++ b/RecExplainer/shell/unirec_prepare_data.sh @@ -2,7 +2,7 @@ # Licensed under the MIT license. -RAW_DATA_DIR="$HOME/RecExplainer/data/unirec_raw_data/" +RAW_DATA_DIR="$HOME/RecAI/RecExplainer/data/unirec_raw_data/" ROOT_DIR="$HOME/UniRec" DATA_ROOT="$ROOT_DIR/data" From ad14e23559e08a7f46453729c3608a3f79a59fe8 Mon Sep 17 00:00:00 2001 From: t-yuxuanlei Date: Thu, 14 Nov 2024 20:27:34 -0800 Subject: [PATCH 2/2] update readme --- RecExplainer/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/RecExplainer/README.md b/RecExplainer/README.md index d7dcef7..f04c3ff 100644 --- a/RecExplainer/README.md +++ b/RecExplainer/README.md @@ -49,7 +49,7 @@ export MODEL=xxx; ## Dataset Preparation for Target Recommender Model -For data preparation, you need to download three raw files: Amazon review, Amazon metadata, ShareGPT +For data preparation, you need to download three raw files: Amazon review, Amazon metadata, ShareGPT, and put them under `$HOME/RecAI/RecExplainer/data/amazon_video_games_v3/raw_data`: * Amazon Video Games 5-core reviews: https://jmcauley.ucsd.edu/data/amazon_v2/categoryFilesSmall/Video_Games_5.json.gz * Amazon Video Games metadata: https://jmcauley.ucsd.edu/data/amazon_v2/metaFiles2/meta_Video_Games.json.gz * ShareGPT: https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/blob/main/ShareGPT_V3_unfiltered_cleaned_split.json @@ -83,7 +83,7 @@ For SASRec model: ```bash bash shell/unirec_sasrec_infer.sh ``` -After inference, please copy the contents of `$HOME/UniRec/output/$DATASET_NAME/SASRec/RecExplainer/xxx/` to `$HOME/RecAI/RecExplainer/data/amazon_video_games_v3` +After inference, please copy the contents of `$HOME/UniRec/output/amazon_video_games_v3/SASRec/RecExplainer/xxx/` to `$HOME/RecAI/RecExplainer/data/amazon_video_games_v3` Finally, there should exist these files in `$HOME/RecAI/RecExplainer/data/amazon_video_games_v3`: datamaps.json, metadata.json, SASRec.pth, sequential_data.txt, sim_item.txt, test_top.txt, train_top.txt @@ -91,7 +91,7 @@ For MF model: ```bash bash shell/unirec_mf_infer.sh ``` -After inference, please copy the contents of `$HOME/UniRec/output/$DATASET_NAME/MF/RecExplainer/xxx/` to `$HOME/RecAI/RecExplainer/data/mf_amazon_video_games_v3` +After inference, please copy the contents of `$HOME/UniRec/output/amazon_video_games_v3/MF/RecExplainer/xxx/` to `$HOME/RecAI/RecExplainer/data/mf_amazon_video_games_v3` At the same time, copy these files from `$HOME/RecAI/RecExplainer/data/amazon_video_games_v3` to `$HOME/RecAI/RecExplainer/data/mf_amazon_video_games_v3`: datamaps.json, metadata.json, sequential_data.txt