|
|
|
@ -198,7 +198,7 @@ class Attention(nn.Module):
@@ -198,7 +198,7 @@ class Attention(nn.Module):
|
|
|
|
|
Args: |
|
|
|
|
dim (int): Number of input channels. |
|
|
|
|
num_heads (int): Number of attention heads. |
|
|
|
|
qkv_bias (bool: If True, add a learnable bias to query, key, value. |
|
|
|
|
qkv_bias (bool): If True, add a learnable bias to query, key, value. |
|
|
|
|
rel_pos (bool): If True, add relative positional embeddings to the attention map. |
|
|
|
|
rel_pos_zero_init (bool): If True, zero initialize relative positional parameters. |
|
|
|
|
input_size (int or None): Input resolution for calculating the relative positional |
|
|
|
@ -270,7 +270,7 @@ def window_unpartition(
@@ -270,7 +270,7 @@ def window_unpartition(
|
|
|
|
|
""" |
|
|
|
|
Window unpartition into original sequences and removing padding. |
|
|
|
|
Args: |
|
|
|
|
x (tensor): input tokens with [B * num_windows, window_size, window_size, C]. |
|
|
|
|
windows (tensor): input tokens with [B * num_windows, window_size, window_size, C]. |
|
|
|
|
window_size (int): window size. |
|
|
|
|
pad_hw (Tuple): padded height and width (Hp, Wp). |
|
|
|
|
hw (Tuple): original height and width (H, W) before padding. |
|
|
|
|