Skip to content

Commit

Permalink
[1/N] Remove CacheConfig import in all model files (#1658)
Browse files Browse the repository at this point in the history
  • Loading branch information
ByronHsu authored Oct 14, 2024
1 parent 02bc957 commit 56503d9
Show file tree
Hide file tree
Showing 30 changed files with 64 additions and 91 deletions.
5 changes: 2 additions & 3 deletions python/sglang/srt/models/baichuan.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
Expand Down Expand Up @@ -330,7 +329,7 @@ def __init__(
self,
config: PretrainedConfig,
position_embedding: str,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
Expand Down Expand Up @@ -404,7 +403,7 @@ class BaichuanForCausalLM(BaiChuanBaseForCausalLM):
def __init__(
self,
config,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
if config.hidden_size == 4096: # baichuan2 7b
Expand Down
11 changes: 5 additions & 6 deletions python/sglang/srt/models/chatglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import torch
from torch import nn
from torch.nn import LayerNorm
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import (
Expand Down Expand Up @@ -52,7 +51,7 @@ def __init__(
self,
config,
layer_id: int = 0,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
Expand Down Expand Up @@ -188,7 +187,7 @@ def __init__(
self,
config,
layer_id: int,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
Expand Down Expand Up @@ -260,7 +259,7 @@ class GLMTransformer(nn.Module):
def __init__(
self,
config,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
Expand Down Expand Up @@ -308,7 +307,7 @@ class ChatGLMModel(nn.Module):
def __init__(
self,
config,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
Expand Down Expand Up @@ -359,7 +358,7 @@ class ChatGLMForCausalLM(nn.Module):
def __init__(
self,
config: ChatGLMConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoraConfig] = None,
):
Expand Down
3 changes: 1 addition & 2 deletions python/sglang/srt/models/commandr.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
from torch import nn
from torch.nn.parameter import Parameter
from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
Expand Down Expand Up @@ -320,7 +319,7 @@ def __init__(
self,
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
Expand Down
3 changes: 1 addition & 2 deletions python/sglang/srt/models/dbrx.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import torch
import torch.nn as nn
from vllm.config import CacheConfig
from vllm.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
Expand Down Expand Up @@ -368,7 +367,7 @@ def __init__(
self,
config: DbrxConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
):
super().__init__()
self.config = config
Expand Down
9 changes: 4 additions & 5 deletions python/sglang/srt/models/deepseek.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
Expand Down Expand Up @@ -185,7 +184,7 @@ def __init__(
rope_theta: float = 10000,
rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 8192,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
Expand Down Expand Up @@ -262,7 +261,7 @@ def __init__(
self,
config: PretrainedConfig,
layer_id: int,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
Expand Down Expand Up @@ -331,7 +330,7 @@ class DeepseekModel(nn.Module):
def __init__(
self,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
Expand Down Expand Up @@ -374,7 +373,7 @@ class DeepseekForCausalLM(nn.Module):
def __init__(
self,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
Expand Down
11 changes: 5 additions & 6 deletions python/sglang/srt/models/deepseek_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import (
get_tensor_model_parallel_world_size,
tensor_model_parallel_all_reduce,
Expand Down Expand Up @@ -188,7 +187,7 @@ def __init__(
rope_theta: float = 10000,
rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 8192,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
layer_id=None,
) -> None:
Expand Down Expand Up @@ -336,7 +335,7 @@ def __init__(
rope_theta: float = 10000,
rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 8192,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
layer_id=None,
) -> None:
Expand Down Expand Up @@ -498,7 +497,7 @@ def __init__(
self,
config: PretrainedConfig,
layer_id: int,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
Expand Down Expand Up @@ -594,7 +593,7 @@ class DeepseekV2Model(nn.Module):
def __init__(
self,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
Expand Down Expand Up @@ -640,7 +639,7 @@ class DeepseekV2ForCausalLM(nn.Module):
def __init__(
self,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
Expand Down
3 changes: 1 addition & 2 deletions python/sglang/srt/models/exaone.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import torch
from torch import nn
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import (
Expand Down Expand Up @@ -295,7 +294,7 @@ def __init__(
self,
config,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
Expand Down
4 changes: 2 additions & 2 deletions python/sglang/srt/models/gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig, LoRAConfig
from vllm.config import LoRAConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
Expand Down Expand Up @@ -279,7 +279,7 @@ def __init__(
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoRAConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
del lora_config # Unused.
super().__init__()
Expand Down
10 changes: 5 additions & 5 deletions python/sglang/srt/models/gemma2.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig, LoRAConfig
from vllm.config import LoRAConfig
from vllm.distributed import get_tensor_model_parallel_world_size

# from vllm.model_executor.layers.rotary_embedding import GemmaRotaryEmbedding
Expand Down Expand Up @@ -105,7 +105,7 @@ def __init__(
head_dim: int,
max_position_embeddings: int,
rope_theta: float,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
Expand Down Expand Up @@ -190,7 +190,7 @@ def __init__(
self,
layer_idx: int,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
Expand Down Expand Up @@ -257,7 +257,7 @@ class Gemma2Model(nn.Module):
def __init__(
self,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
) -> None:
super().__init__()
Expand Down Expand Up @@ -336,7 +336,7 @@ class Gemma2ForCausalLM(nn.Module):
def __init__(
self,
config: PretrainedConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoRAConfig] = None,
) -> None:
Expand Down
10 changes: 5 additions & 5 deletions python/sglang/srt/models/gpt_bigcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import torch
from torch import nn
from transformers import GPTBigCodeConfig
from vllm.config import CacheConfig, LoRAConfig
from vllm.config import LoRAConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
Expand All @@ -44,7 +44,7 @@ def __init__(
self,
layer_id: int,
config: GPTBigCodeConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
Expand Down Expand Up @@ -145,7 +145,7 @@ def __init__(
self,
layer_id: int,
config: GPTBigCodeConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
Expand Down Expand Up @@ -183,7 +183,7 @@ class GPTBigCodeModel(nn.Module):
def __init__(
self,
config: GPTBigCodeConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoRAConfig] = None,
):
Expand Down Expand Up @@ -243,7 +243,7 @@ class GPTBigCodeForCausalLM(nn.Module):
def __init__(
self,
config: GPTBigCodeConfig,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoRAConfig] = None,
):
Expand Down
3 changes: 1 addition & 2 deletions python/sglang/srt/models/grok.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import torch.nn.functional as F
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
Expand Down Expand Up @@ -289,7 +288,7 @@ def __init__(
self,
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
Expand Down
3 changes: 1 addition & 2 deletions python/sglang/srt/models/internlm2.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import torch
from torch import nn
from transformers import PretrainedConfig
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import (
Expand Down Expand Up @@ -254,7 +253,7 @@ def __init__(
self,
config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
Expand Down
3 changes: 1 addition & 2 deletions python/sglang/srt/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import torch
from torch import nn
from transformers import LlamaConfig
from vllm.config import CacheConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.vocab_parallel_embedding import (
Expand Down Expand Up @@ -295,7 +294,7 @@ def __init__(
self,
config: LlamaConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
cache_config=None,
) -> None:
super().__init__()
self.config = config
Expand Down
Loading

0 comments on commit 56503d9

Please sign in to comment.