instruct model setup

2025-08-28 17:57:59 +00:00
parent 77c563f358
commit d49b4ff2d5
55 changed files with 27760 additions and 326 deletions
@@ -1,7 +1,7 @@
 """
-2025.8.4
-2025.8.5
-4.55.1
+2025.8.9
+2025.8.10
+4.55.4
 0.21.0
 __UNSLOTH_VERSIONING__
 """
@@ -727,6 +727,7 @@ class UnslothGRPOConfig(GRPOConfig):
        default = -1,
        metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
    )
+    
    def __init__(
        self,
        output_dir = None,
@@ -904,6 +905,7 @@ class UnslothGRPOConfig(GRPOConfig):
        wandb_log_unique_prompts = False,
        vllm_sampling_params = None,
        unsloth_num_chunks = -1,
+        
        **kwargs,
    ):
        if learning_rate < 1e-7: raise FloatingPointError(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
@@ -1116,6 +1118,7 @@ class UnslothGRPOConfig(GRPOConfig):
            wandb_log_unique_prompts = wandb_log_unique_prompts,**kwargs)
        self.vllm_sampling_params = vllm_sampling_params
        self.unsloth_num_chunks = unsloth_num_chunks
+        
 pass

 class _UnslothGRPOTrainer(Trainer):
@@ -2814,7 +2817,7 @@ class UnslothGRPOTrainer(_UnslothGRPOTrainer):
            print('Unsloth: Switching to float32 training since model cannot work with float16')
            force_float32 = True
        mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
-        dtype = getattr(model.config, 'torch_dtype', None)
+        dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
        if dtype is None: dtype = model.get_input_embeddings().dtype
        from unsloth_zoo.utils import _get_dtype
        dtype = _get_dtype(dtype)