Visualizing Sinusoidal Positional Embeddings and Rotary Positional Embeddings

RoPE is all about adding in relative positional information among tokens in a context. This article gives you code to visualize Sinusoidal Positional Embeddings and Rotary Positional Embeddings, without going into all the math stuff. If you’re curious about the nitty-gritty math, there’s a cool post you can dive into at eleuther.ai and DeepLearningHero.

Visualizing the chaos of Sinosouidal Positional Embeddings

  • In Sinusoidal Positional Embeddings, angles and magnitudes undergo dramatic changes as the position of the token changes.
  • It becomes challenging to capture any discernible pattern.
  • Perplexity tends to increase due to the chaotic nature of the Embeddings as the sequence length increases.
from google.colab import output
output.enable_custom_widget_manager()
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import numpy as np


def plot_embeddings(embeddings, title):
    dimensions = len(embeddings[0])
    length = len(embeddings)
    # Define vectors
    vectors = [
        {"vector": embeddings[i], "name": f"Vector {i}"} for i in range(len(embeddings))
    ]
    # Create data for vectors
    data = []
    for vec in vectors:
        data.append(
            go.Scatter(
                x=[0, vec["vector"][0]],
                y=[0, vec["vector"][1]],
                mode="lines",
                name=vec["name"],
            )
        )

    # Create layout
    layout = go.Layout(
        title=title, xaxis=dict(title="X"), yaxis=dict(title="Y"), showlegend=True
    )

    # Create figure
    fig = go.Figure(data=data, layout=layout)
    colours = [
        "#1f77b4",  # muted blue
        "#ff7f0e",  # safety orange
        "#2ca02c",  # cooked asparagus green
        "#d62728",  # brick red
        "#9467bd",  # muted purple
        "#8c564b",  # chestnut brown
        "#e377c2",  # raspberry yogurt pink
        "#7f7f7f",  # middle gray
        "#bcbd22",  # curry yellow-green
        "#17becf",  # blue-teal
    ]
    colour_index = 0
    for vec in vectors:
        arrow_x = vec["vector"][0]
        arrow_y = vec["vector"][1]
        # Add arrowhead to the vector
        fig.add_annotation(
            x=arrow_x,
            y=arrow_y,
            ax=0,
            ay=0,
            axref="x",
            ayref="y",
            xref="x",
            yref="y",
            text="",
            showarrow=True,
            arrowwidth=2,
            arrowhead=2,
            arrowsize=1,
            arrowcolor=colours[colour_index],
        )
        colour_index += 1

    # Define frames for animation
    frames = []
    for vec in vectors:
        frame_data = [
            go.Scatter(
                x=[0, vec["vector"][0]],
                y=[0, vec["vector"][1]],
                mode="lines",
                name=vec["name"],
            )
        ]
        frames.append(go.Frame(data=frame_data))

    # Update figure with frames
    fig.frames = frames[:]

    # Define animation settings
    fig.update_layout(
        updatemenus=[
            dict(
                type="buttons",
                buttons=[
                    dict(
                        label="Play",
                        method="animate",
                        args=[
                            None,
                            dict(
                                frame=dict(duration=500, redraw=True), fromcurrent=True
                            ),
                        ],
                    )
                ],
            )
        ]
    )
    return fig
def sinusoid_positional_encoding(length, dimensions):
    def get_position_angle_vec(position):
        return [
            position / np.power(10000, 2 * (i // 2) / dimensions)
            for i in range(dimensions)
        ]
    PE = np.array([get_position_angle_vec(i) for i in range(length)])
    PE[:, 0::2] = np.sin(PE[:, 0::2])  # dim 2i
    PE[:, 1::2] = np.cos(PE[:, 1::2])  # dim 2i+1
    return PE

# Number of embeddings to generate
num_positions = 10
# Number of Dimensions
dimensions = 2
# Sample word embedding
emb = np.array([1,1])

# Generate positional embeddings
positional_encodings = sinusoid_positional_encoding(num_positions, dimensions)
positional_embeddings = np.array([positional_encodings[i]+emb for i in range(num_positions)])
fig = plot_embeddings(positional_embeddings, 'Sinusoidal Positional Embeddings')
fig.show(renderer="colab")

Visualizing the chaos of Rotary Positional Embeddings

  • Rotary Positional Embeddings rotate counterclockwise.
  • They effectively capture patterns.
  • This leads to reduced perplexity levels.
def rotary_positional_encoding(length, dimension):
    rotation_matrix = []
    i = 0
    theta = np.power(10000, -2 * (i // 2))
    for m in range(length):
        cos, sine = np.cos(m * theta), np.sin(m * theta)
        r = np.array([[cos, -sine], [sine, cos]])
        rotation_matrix.append(r)
    return rotation_matrix

# Generate rotary positional embeddings
rotation_matrix = rotary_positional_encoding(num_positions, dimensions)
rope = np.array([ np.matmul(rotation_matrix[i], emb) for i in range(num_positions)])
fig = plot_embeddings(rope, 'Rotary Positional Embeddings')
fig.show(renderer="colab")

You can experiment with the notebook by accessing it here: Google Colab

References

[1] https://arxiv.org/pdf/2104.09864.pdf

[2] https://blog.eleuther.ai/rotary-embeddings/

[3] https://www.inovex.de/de/blog/positional-encoding-everything-you-need-to-know/

[4] https://www.youtube.com/watch?v=GQPOtyITy54&ab_channel=DeepLearningHero