あれ
2025/4/13 14:23:00
ちょっと寒いしGPU回して暖を取るか
『A custom autocomplete model in 30 minutes using Unsloth (Community post)』を試す。
Colaboratoryでポチポチしたらなんか学習が動かせたっぽい。
なお部屋は温まっていない模様。
ちょっと寒いしGPU回して暖を取るか
『A custom autocomplete model in 30 minutes using Unsloth (Community post)』を試す。
Colaboratoryでポチポチしたらなんか学習が動かせたっぽい。
なお部屋は温まっていない模様。
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
"unsloth/Qwen2.5-Coder-1.5B-bnb-4bit",
"unsloth/mistral-7b-v0.3-bnb-4bit", # New Mistral v3 2x faster!
"unsloth/llama-3-8b-bnb-4bit", # Llama-3 15 trillion tokens model 2x faster!
"unsloth/llama-3-8b-Instruct-bnb-4bit",
"unsloth/llama-3-70b-bnb-4bit",
"unsloth/Phi-3-mini-4k-instruct", # Phi-3 2x faster!
"unsloth/Phi-3-medium-4k-instruct",
"unsloth/Qwen2-0.5b-bnb-4bit", # Qwen2 2x faster!
"unsloth/Qwen2-1.5b-bnb-4bit",
"unsloth/Qwen2-7b-bnb-4bit",
"unsloth/Qwen2-72b-bnb-4bit",
"unsloth/gemma-7b-bnb-4bit", # Gemma 2.2x faster!
] # Try more models at https://huggingface.co/unsloth!
if __name__ == '__main__':
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/Qwen2.5-Coder-0.5B-bnb-4bit", # Reminder we support ANY Hugging Face model!
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
# token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)
model = FastLanguageModel.get_peft_model(
model,
r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 16,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
random_state = 3407,
use_rslora = False, # We support rank stabilized LoRA
loftq_config = None, # And LoftQ
)
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:
{}"""
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
instructions = examples["instruction"]
inputs = examples["input"]
outputs = examples["output"]
texts = []
for instruction, input, output in zip(instructions, inputs, outputs):
# Must add EOS_TOKEN, otherwise your generation will go on forever!
text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
texts.append(text)
return { "text" : texts, }
pass
from datasets import load_dataset
dataset = load_dataset("yahma/alpaca-cleaned", split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True,)
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = dataset,
dataset_text_field = "text",
max_seq_length = max_seq_length,
dataset_num_proc = 2,
args = TrainingArguments(
per_device_train_batch_size = 2,
gradient_accumulation_steps = 4,
# Use num_train_epochs = 1, warmup_ratio for full training runs!
# warmup_steps = 5,
# max_steps = 2500,
max_steps = 100,
# num_train_epochs = 1,
learning_rate = 2e-4,
fp16 = not is_bfloat16_supported(),
bf16 = is_bfloat16_supported(),
logging_steps = 1,
optim = "adamw_8bit",
weight_decay = 0.01,
lr_scheduler_type = "linear",
seed = 3407,
output_dir = "outputs",
report_to = "none", # Use this for WandB etc
),
)
trainer_stats = trainer.train()
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Unsloth has 2x faster inference!
inputs = tokenizer(
[
alpaca_prompt.format(
"Continue the fibonnaci sequence.", # instruction
"1, 1, 2, 3, 5, 8", # input
"", # output - leave this blank for generation!
)
], return_tensors = "pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)
model.save_pretrained("lora_model") # Local saving
tokenizer.save_pretrained("lora_model")
# model.push_to_hub("your_name/lora_model", token = "...") # Online saving
# tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving