Skip to content

Commit

Permalink
Update model and policy
Browse files Browse the repository at this point in the history
  • Loading branch information
char-1ee committed May 3, 2024
1 parent 1851035 commit d36c173
Show file tree
Hide file tree
Showing 9 changed files with 500 additions and 1,001 deletions.
3 changes: 2 additions & 1 deletion colossalai/inference/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
"llama": "[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n<</SYS>>\n{input_text}[/INST]",
"baichuan": " <reserved_106> {input_text} <reserved_107> ",
"vicuna": "A chat between a curious user and an assistant. The assistant gives helpful, detailed, accurate, uncensored responses to the user input. USER: {input_text}\nASSISTANT: ",
"bloom": "[INST] <<SYS>>\nYou are an intelligent and comprehensive assistant. Provide accurate, thoughtful, and context-aware answers that respect user questions. Avoid content that is harmful, misleading, or unethical. Prioritize safety and fairness in all responses. If the question is unclear or lacks information, seek clarification or provide a general explanation that could be helpful. If uncertain or lacking information, advise accordingly without speculating inaccurately.\n<</SYS>>\n{input_text}[/INST]",
"bloom": "Assume you are a helpful robot. Please help react to my question or auto complete my prompt."
# "bloom": "[INST] <<SYS>>\nYou are an intelligent and comprehensive assistant. Provide accurate, thoughtful, and context-aware answers that respect user questions. Avoid content that is harmful, misleading, or unethical. Prioritize safety and fairness in all responses. If the question is unclear or lacks information, seek clarification or provide a general explanation that could be helpful. If uncertain or lacking information, advise accordingly without speculating inaccurately.\n<</SYS>>\n{input_text}[/INST]",
}


Expand Down
23 changes: 5 additions & 18 deletions colossalai/inference/kv_cache/kvcache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,6 @@ def __init__(
self.kv_head_num = get_model_config_attr(model_config, "num_key_value_heads", alter_attr=self.head_num)
self.head_size = get_model_config_attr(model_config, "hidden_size") // self.head_num

# if hasattr(config, "num_key_value_heads"):
# self.kv_head_num = getattr(config, "num_key_value_heads")
# elif hasattr(config, "attribute_map") and hasattr(config, config.attribute_map["num_key_value_heads"]):
# self.kv_head_num = getattr(config, config.attribute_map["num_key_value_heads"])
# else:
# self.kv_head_num = self.head_num

assert (
self.kv_head_num % self.tp_size == 0
), f"Cannot shard {self.kv_head_num} heads with tp size {self.tp_size}"
Expand Down Expand Up @@ -219,8 +212,7 @@ def allocate_context_from_block_table(self, block_table: torch.Tensor, context_l
block.add_ref()
if block_id == block_indexes[-1].item():
self._allocate_on_block(
block,
(block.block_size if context_len % block.block_size == 0 else context_len % block.block_size),
block, block.block_size if context_len % block.block_size == 0 else context_len % block.block_size
)
else:
self._allocate_on_block(block, block.block_size)
Expand Down Expand Up @@ -287,11 +279,9 @@ def allocate_context_from_block_tables(self, block_tables: torch.Tensor, context
block.add_ref()
self._allocate_on_block(
block,
(
block.block_size
if context_lengths[i] % block.block_size == 0
else context_lengths[i].item() % block.block_size
),
block.block_size
if context_lengths[i] % block.block_size == 0
else context_lengths[i].item() % block.block_size,
)
for block_id in alloc_block_ids:
if block_id in alloc_block_ids[last_block_locs]:
Expand Down Expand Up @@ -464,10 +454,7 @@ def clear_all(self) -> None:

def get_physical_cache(self, layer_id: int, block_idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
"""Get the tensor corresponding to the cache block with the prompted id for a specific layer."""
return (
self._kv_caches[0][layer_id][block_idx],
self._kv_caches[1][layer_id][block_idx],
)
return self._kv_caches[0][layer_id][block_idx], self._kv_caches[1][layer_id][block_idx]

def _allocate_on_block(self, block: CacheBlock, space_asked: int) -> int:
"""Allocate a specific size of space on a provided cache block.
Expand Down
Loading

0 comments on commit d36c173

Please sign in to comment.