PRETRAINED_MODEL_PATH="PATH_TO_Meta-Llama-3-8B"
# huggingface or local model path
cd ColossalAI/examples/inference/
colossalai run --nproc_per_node 1 llama_generation.py -m $PRETRAINED_MODEL_PATH --max_length 80
PRETRAINED_MODEL_PATH="PATH_TO_Meta-Llama-3-8B"
git pull # update example benchmark from branch feature/colossal-infer
cd ColossalAI/examples/inference/
python benchmark_llama3.py -m llama3-8b -b 32 -s 128 -o 256 -p $PRETRAINED_MODEL_PATH
git clone git@github.com:Dao-AILab/flash-attention.git
cd flash-attention
# At flash-attention root folder
cd csrc/xentropy && pip install .
cd csrc/layer_norm && pip install .
cd csrc/rotary && pip install .
# At local root folder, same level as flash attention
clone git@github.com:hpcaitech/ColossalAI.git
# At local root folder, same level as flash attention, ColossalAI
mkdir training_outputs
mkdir training_outputs/checkpoints
mkdir training_outputs/configs
mkdir training_outputs/tensorboards
cd ColossalAI/applications/Colossal-LLaMA/
cp hostfile.example hostfile
# update hostfile
{"source": "", "target": "Lionel Andrés Messi(Spanish pronunciation: [ljoˈnel anˈdɾes ˈmesi] (i); born 24 June 1987), also known as Leo Messi, is an Argentine professional footballer who plays as a forward for and captains both Major League Soccer club Inter Miami and the Argentina national team.", "category": "sports"}
{"source": "猜谜语:一身卷卷细毛,吃的青青野草,过了数九寒冬,无私献出白毛。(打一动物)", "target": "白羊", "category": "riddle"}
python prepare_pretrain_dataset.py \
--data_input_dirs "Directory that contains one or more JSONL file(s)>" \
--tokenizer_dir "Tokenizer directory" \
--data_output_dirs "Spliced tokenized output" \
--max_length 8192 \
--num_spliced_dataset_bins 10
{"messages": [{"from": "human", "content": "What are the three primary colors?"}, {"from": "assistant", "content": "The three primary colors are red, blue, and yellow."}]}
{"messages": [{"from": "human", "content": "解释个人电脑和服务器之间的区别。"}, {"from": "assistant", "content": "个人电脑和服务器是两种不同类型的计算机系统,它们的主要区别在于用途、硬件配置和性能。 个人电脑,顾名思义,是为个人使用而设计的计算机。它们通常用于日常的工作、娱乐和学习,可以运行各种各样的应用程序和游戏。个人电脑的硬件配置一般是按照标准配置来设计的,不过也可以根据个人需求进行定制。 而服务器是为了满足大量用户的需求而设计的计算机系统,它们通常用于为用户提供各种网络服务,如网站、电子邮件和文件传输等。服务器通常需要高性能的硬件配置,并且可以承受高负载和长时间的运行。由于服务器需要支持大量用户的访问,它们通常配备多核处理器、大容量内存和大容量硬盘驱动器,以提高系统的运行速度和稳定性。 总之,个人电脑和服务器之间的主要区别在于它们的用途、硬件配置和性能。个人电脑用于个人使用,而服务器用于支持大量用户的访问。服务器的硬件配置通常比个人电脑更高,以保证系统的性能和稳定性。"}]}
python prepare_sft_dataset.py.py \
--data_input_dirs "Directory that contains one or more JSONL file(s)>" \
--tokenizer_dir "Tokenizer directory" \
--data_output_dirs "Spliced tokenized output" \
--max_length 8192 \
--num_spliced_dataset_bins 10 \
--llama_version 3
data_output_dirs
directory. These subfolders contain data suitable for direct training, particularly the contents of the arrow
folder./root/ColossalAI/ColossalAI/applications/Colossal-LLaMA/
cp train.example.sh train.sh
#Update training scripts
# xxx indicate your local path to the directory that contains ColossalAI and training_outputs
PROJECT_NAME="LLaMA-3-8B-cpt"
PARENT_SAVE_DIR="xxx/training_outputs/checkpoints" # Path to a folder to save checkpoints
PARENT_TENSORBOARD_DIR="xxx/training_outputs/tensorboards" # Path to a folder to save logs
PARENT_CONFIG_FILE="xxx/training_outputs/configs" # Path to a folder to save training config logs
PRETRAINED_MODEL_PATH="" # huggingface or local model path
# Taking the pre-set processed dataset as an example
# xxx indicate your local path to the directory that contains processed arrow folders
declare -a dataset=(
/xxx/arrow/part-00000
/xxx/arrow/part-00001
/xxx/arrow/part-00002
)
TIMESTAMP=$(date +%Y-%m-%d-%H-%M-%S)
FULL_PROJECT_NAME="${PROJECT_NAME}-${TIMESTAMP}"
SAVE_DIR="${PARENT_SAVE_DIR}${FULL_PROJECT_NAME}"
CONFIG_FILE="${PARENT_CONFIG_FILE}${FULL_PROJECT_NAME}.json"
colossalai run --nproc_per_node 8 --hostfile hostfile --master_port 31312 train.py \
--pretrained $PRETRAINED_MODEL_PATH \
--dataset ${dataset[@]} \
--plugin "zero2" \
--save_interval 400 \
--save_dir $SAVE_DIR \
--tensorboard_dir $TENSORBOARD_DIR \
--config_file $CONFIG_FILE \
--num_epochs 1 \
--micro_batch_size 2 \
--lr 1e-4 \
--mixed_precision "bf16" \
--grad_clip 1.0 \
--weight_decay 0.01 \
--warmup_steps 100 \
--use_grad_checkpoint \
--use_flash_attn \
git clone https://github.com/hpcaitech/ColossalAI
cd ColossalAI/examples/language/llama
BUILD_EXT=1 pip install -U git+https://github.com/hpcaitech/ColossalAI
pip install -r requirements.txt
export PYTHONPATH=$(realpath ..)
colossalai run --nproc_per_node 8 --hostfile HOSTFILE benchmark.py -c Meta-Llama-3-70B -x -g -p 3d --tp 4 --pp 4 --zero 1 -l 8192 --mbs 2 -b 128 --custom-ckpt