Spaces:
Sleeping
Sleeping
| import os | |
| import multiprocessing | |
| import gradio as gr | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| import config | |
| # ============================ | |
| # Download Model | |
| # ============================ | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| print("Downloading model from Hugging Face Hub...") | |
| model_path = hf_hub_download( | |
| repo_id=config.MODEL_REPO, | |
| filename=config.MODEL_FILE, | |
| token=HF_TOKEN, | |
| cache_dir="/tmp/hf_cache" | |
| ) | |
| print("Model downloaded successfully:", model_path) | |
| # ============================ | |
| # Load Model | |
| # ============================ | |
| CPU_THREADS = multiprocessing.cpu_count() | |
| print("CPU Threads available:", CPU_THREADS) | |
| print("Loading model into memory...") | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=config.CTX_SIZE, | |
| n_threads=CPU_THREADS, | |
| n_batch=512, | |
| use_mmap=True, | |
| verbose=False | |
| ) | |
| print("Model loaded successfully.") | |
| # ============================ | |
| # Prompt Builder | |
| # ============================ | |
| SYSTEM_PROMPT = """You are DeepSeek Coder, an expert programming assistant. | |
| Write clean and efficient code. | |
| Only explain when asked. | |
| """ | |
| def build_prompt(message, history): | |
| prompt = SYSTEM_PROMPT + "\n\n" | |
| for user_msg, assistant_msg in history: | |
| prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n" | |
| prompt += f"User: {message}\nAssistant:" | |
| return prompt | |
| # ============================ | |
| # Generate Response | |
| # ============================ | |
| def chat(message, history): | |
| history = history or [] | |
| prompt = build_prompt(message, history) | |
| output = "" | |
| for token in llm( | |
| prompt, | |
| max_tokens=config.MAX_TOKENS, | |
| temperature=config.TEMPERATURE, | |
| top_p=0.95, | |
| stream=True | |
| ): | |
| output += token["choices"][0]["text"] | |
| yield output | |
| # ============================ | |
| # Launch Gradio ChatInterface | |
| # ============================ | |
| demo = gr.ChatInterface( | |
| fn=chat, | |
| title="DeepSeek Coder 1.3B", | |
| description="Production GGUF model running on llama.cpp" | |
| ) | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860 | |
| ) |