Skip to content
Snippets Groups Projects

Init

Open Sharat Patil requested to merge init into main
1 file
+ 1
1
Compare changes
  • Side-by-side
  • Inline
+ 1
1
@@ -6,7 +6,7 @@ from api_writer.utils import load_config,load_templates
import time
model_id = os.environ['BASE_MODEL'] #"unsloth/tinyllama-bnb-4bit"#
llm = LLM(model=model_id, dtype=torch.bfloat16, trust_remote_code=True, quantization="bitsandbytes",load_format="bitsandbytes",enable_lora=True,enable_prefix_caching=True,max_lora_rank=64)
llm = LLM(model=model_id, dtype=torch.bfloat16, trust_remote_code=True, quantization="bitsandbytes",load_format="bitsandbytes",enable_lora=True,enable_prefix_caching=True,max_lora_rank=32,max_loras=1)
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
Loading