Spaces:

Anonymous0045
/

Coder

Sleeping

App Files Files Community

Coder / app.py

Anonymous0045

Create app.py

0a7b900 verified 8 days ago

raw

history blame contribute delete

2.1 kB

	import os
	import multiprocessing
	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import config


	# ============================
	# Download Model
	# ============================

	HF_TOKEN = os.environ.get("HF_TOKEN")

	print("Downloading model from Hugging Face Hub...")

	model_path = hf_hub_download(
	repo_id=config.MODEL_REPO,
	filename=config.MODEL_FILE,
	token=HF_TOKEN,
	cache_dir="/tmp/hf_cache"
	)

	print("Model downloaded successfully:", model_path)


	# ============================
	# Load Model
	# ============================

	CPU_THREADS = multiprocessing.cpu_count()

	print("CPU Threads available:", CPU_THREADS)
	print("Loading model into memory...")

	llm = Llama(
	model_path=model_path,
	n_ctx=config.CTX_SIZE,
	n_threads=CPU_THREADS,
	n_batch=512,
	use_mmap=True,
	verbose=False
	)

	print("Model loaded successfully.")


	# ============================
	# Prompt Builder
	# ============================

	SYSTEM_PROMPT = """You are DeepSeek Coder, an expert programming assistant.
	Write clean and efficient code.
	Only explain when asked.
	"""


	def build_prompt(message, history):

	prompt = SYSTEM_PROMPT + "\n\n"

	for user_msg, assistant_msg in history:
	prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"

	prompt += f"User: {message}\nAssistant:"

	return prompt


	# ============================
	# Generate Response
	# ============================

	def chat(message, history):

	history = history or []

	prompt = build_prompt(message, history)

	output = ""

	for token in llm(
	prompt,
	max_tokens=config.MAX_TOKENS,
	temperature=config.TEMPERATURE,
	top_p=0.95,
	stream=True
	):
	output += token["choices"][0]["text"]
	yield output


	# ============================
	# Launch Gradio ChatInterface
	# ============================

	demo = gr.ChatInterface(
	fn=chat,
	title="DeepSeek Coder 1.3B",
	description="Production GGUF model running on llama.cpp"
	)

	demo.launch(
	server_name="0.0.0.0",
	server_port=7860
	)