vllm page attention kernel详细解析 一、Prologue:身份确认与分区范围const int seq_idx = blockIdx.y; const int partition_idx = blockIdx.z; const int max_num_partitions = gridDim.z; constexpr bool USE_PARTITIONING = PARTITION_SIZE 0; const int seq_len = seq_lens[seq_idx]; if (USE_PARTITIONING partition_idx * PARTITION_SIZE = seq_len) { // No work to do. Terminate the thread block. return; } const int num_seq_blocks = DIVIDE_ROUND_UP(seq_len, BLOCK_SIZE); const int num_blocks_per_partition = USE_PARTITIONING ? PARTITION_SIZE / BLOCK_SIZE : num_seq_blocks; // [start_block_idx, end_block_idx) is the range of blocks to process. const int start_block_idx