instruct model setup
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
"""
|
||||
2025.8.4
|
||||
2025.8.5
|
||||
4.55.1
|
||||
2025.8.9
|
||||
2025.8.10
|
||||
4.55.4
|
||||
0.21.0
|
||||
__UNSLOTH_VERSIONING__
|
||||
"""
|
||||
@@ -727,6 +727,7 @@ class UnslothGRPOConfig(GRPOConfig):
|
||||
default = -1,
|
||||
metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir = None,
|
||||
@@ -904,6 +905,7 @@ class UnslothGRPOConfig(GRPOConfig):
|
||||
wandb_log_unique_prompts = False,
|
||||
vllm_sampling_params = None,
|
||||
unsloth_num_chunks = -1,
|
||||
|
||||
**kwargs,
|
||||
):
|
||||
if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
|
||||
@@ -1116,6 +1118,7 @@ class UnslothGRPOConfig(GRPOConfig):
|
||||
wandb_log_unique_prompts = wandb_log_unique_prompts,**kwargs)
|
||||
self.vllm_sampling_params = vllm_sampling_params
|
||||
self.unsloth_num_chunks = unsloth_num_chunks
|
||||
|
||||
pass
|
||||
|
||||
class _UnslothGRPOTrainer(Trainer):
|
||||
@@ -2814,7 +2817,7 @@ class UnslothGRPOTrainer(_UnslothGRPOTrainer):
|
||||
print('Unsloth: Switching to float32 training since model cannot work with float16')
|
||||
force_float32 = True
|
||||
mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
|
||||
dtype = getattr(model.config, 'torch_dtype', None)
|
||||
dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
|
||||
if dtype is None: dtype = model.get_input_embeddings().dtype
|
||||
from unsloth_zoo.utils import _get_dtype
|
||||
dtype = _get_dtype(dtype)
|
||||
|
||||
Reference in New Issue
Block a user