欢迎光临千百叶网
详情描述
SQL Server 窗口函数详细指南

1. 窗口函数概述

1.1 什么是窗口函数

窗口函数允许在不分组的情况下对行进行计算,同时保留原始行的详细信息。与聚合函数不同,窗口函数不会将多行合并为一行。

1.2 基本语法

<窗口函数> OVER (
    [PARTITION BY <分区列>]
    [ORDER BY <排序列> [ASC|DESC]]
    [ROWS/RANGE <窗口框架>]
)

2. 窗口函数分类

2.1 排名函数

ROW_NUMBER()
-- 为每一行分配唯一的序号(分区内)
SELECT 
    EmployeeID,
    Name,
    Department,
    Salary,
    ROW_NUMBER() OVER (PARTITION BY Department ORDER BY Salary DESC) AS RankInDept
FROM Employees;
RANK()
-- 相同值获得相同排名,但会跳过后续序号
SELECT 
    StudentID,
    Score,
    RANK() OVER (ORDER BY Score DESC) AS Rank
FROM ExamResults;
-- 成绩:100, 95, 95, 90 → 排名:1, 2, 2, 4
DENSE_RANK()
-- 相同值获得相同排名,但不会跳过序号
SELECT 
    ProductID,
    Sales,
    DENSE_RANK() OVER (ORDER BY Sales DESC) AS DenseRank
FROM Products;
-- 销售额:100, 100, 80, 70 → 排名:1, 1, 2, 3
NTILE(n)
-- 将结果集分成n个桶
SELECT 
    CustomerID,
    TotalPurchase,
    NTILE(4) OVER (ORDER BY TotalPurchase DESC) AS Quartile
FROM Customers;

2.2 聚合窗口函数

基础用法
-- 计算累计和
SELECT 
    OrderDate,
    Amount,
    SUM(Amount) OVER (ORDER BY OrderDate) AS RunningTotal,
    -- 按月份分区计算累计和
    SUM(Amount) OVER (PARTITION BY YEAR(OrderDate), MONTH(OrderDate) ORDER BY OrderDate) AS MonthlyRunningTotal
FROM Orders;
多聚合组合
SELECT 
    ProductID,
    SaleDate,
    DailySales,
    SUM(DailySales) OVER (PARTITION BY ProductID ORDER BY SaleDate) AS CumulativeSales,
    AVG(DailySales) OVER (PARTITION BY ProductID ORDER BY SaleDate ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS WeeklyMovingAvg,
    MAX(DailySales) OVER (PARTITION BY ProductID ORDER BY SaleDate ROWS BETWEEN 29 PRECEDING AND CURRENT ROW) AS MonthlyMax
FROM Sales;

2.3 偏移函数

LAG() / LEAD()
-- 与前一行/后一行比较
SELECT 
    Date,
    Temperature,
    Temperature - LAG(Temperature) OVER (ORDER BY Date) AS TempChange,
    LEAD(Temperature, 2) OVER (ORDER BY Date) AS TempInTwoDays,
    -- 提供默认值
    LAG(Temperature, 1, 0) OVER (ORDER BY Date) AS PreviousTemp
FROM WeatherData;
FIRST_VALUE() / LAST_VALUE()
SELECT 
    EmployeeID,
    Name,
    Department,
    Salary,
    FIRST_VALUE(Salary) OVER (PARTITION BY Department ORDER BY Salary DESC) AS HighestSalaryInDept,
    LAST_VALUE(Salary) OVER (PARTITION BY Department ORDER BY Salary DESC 
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS LowestSalaryInDept
FROM Employees;

2.4 分布函数

PERCENT_RANK()
-- 计算百分比排名 (0-1)
SELECT 
    StudentID,
    Score,
    PERCENT_RANK() OVER (ORDER BY Score) AS PercentRank
FROM Scores;
CUME_DIST()
-- 计算累积分布
SELECT 
    ProductID,
    Price,
    CUME_DIST() OVER (ORDER BY Price) AS CumulativeDistribution
FROM Products;
PERCENTILE_CONT() / PERCENTILE_DISC()
-- 计算百分位数
SELECT 
    Department,
    Salary,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY Salary) OVER (PARTITION BY Department) AS MedianSalaryCont,
    PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY Salary) OVER (PARTITION BY Department) AS MedianSalaryDisc
FROM Employees;

3. 窗口框架详解

3.1 框架子句类型

-- ROWS (物理行)
SELECT 
    Date,
    Sales,
    SUM(Sales) OVER (
        ORDER BY Date 
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    ) AS Last3DaysSales
FROM DailySales;

-- RANGE (逻辑值范围)
SELECT 
    Date,
    Amount,
    SUM(Amount) OVER (
        ORDER BY Date 
        RANGE BETWEEN INTERVAL 2 DAY PRECEDING AND CURRENT ROW
    ) AS Last3DaysAmount
FROM Transactions;

3.2 常用框架模式

-- 从开始到当前行
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW

-- 当前行到结束
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING

-- 滑动窗口(最近3行)
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW

-- 对称窗口(前后各1行)
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING

4. 实战应用场景

4.1 销售分析

-- 计算销售排名和百分比
WITH SalesAnalysis AS (
    SELECT 
        Salesperson,
        Region,
        SalesAmount,
        RANK() OVER (PARTITION BY Region ORDER BY SalesAmount DESC) AS RegionRank,
        RANK() OVER (ORDER BY SalesAmount DESC) AS GlobalRank,
        PERCENT_RANK() OVER (ORDER BY SalesAmount) AS SalesPercentile,
        SUM(SalesAmount) OVER (PARTITION BY Region) AS RegionTotal,
        SalesAmount * 1.0 / SUM(SalesAmount) OVER (PARTITION BY Region) AS RegionShare
    FROM Sales
)
SELECT * FROM SalesAnalysis
WHERE RegionRank <= 3;

4.2 员工工资分析

-- 工资比较和统计
SELECT 
    EmployeeID,
    Name,
    Department,
    Salary,
    -- 与部门平均比较
    Salary - AVG(Salary) OVER (PARTITION BY Department) AS DiffFromDeptAvg,
    -- 部门内排名
    ROW_NUMBER() OVER (PARTITION BY Department ORDER BY Salary DESC) AS DeptSalaryRank,
    -- 全公司百分位
    CUME_DIST() OVER (ORDER BY Salary) * 100 AS CompanyPercentile,
    -- 工资增长
    Salary - LAG(Salary, 1) OVER (PARTITION BY EmployeeID ORDER BY ReviewDate) AS SalaryIncrease
FROM Employees
WHERE Active = 1;

4.3 时序数据分析

-- 计算移动平均和趋势
SELECT 
    Date,
    Revenue,
    -- 7日移动平均
    AVG(Revenue) OVER (ORDER BY Date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS Revenue7DayMA,
    -- 30日移动平均
    AVG(Revenue) OVER (ORDER BY Date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW) AS Revenue30DayMA,
    -- 月增长率
    Revenue * 1.0 / LAG(Revenue, 30) OVER (ORDER BY Date) - 1 AS MoMGrowthRate,
    -- 累计年度收入
    SUM(Revenue) OVER (PARTITION BY YEAR(Date) ORDER BY Date) AS YTDCumulative
FROM DailyRevenue;

4.4 数据质量检查

-- 检测异常值
WITH DataWithStats AS (
    SELECT 
        SensorID,
        ReadingTime,
        Value,
        AVG(Value) OVER (PARTITION BY SensorID) AS AvgValue,
        STDEV(Value) OVER (PARTITION BY SensorID) AS StdDevValue,
        LAG(Value) OVER (PARTITION BY SensorID ORDER BY ReadingTime) AS PrevValue
    FROM SensorReadings
)
SELECT 
    SensorID,
    ReadingTime,
    Value,
    CASE 
        WHEN ABS(Value - AvgValue) > 3 * StdDevValue THEN 'Outlier'
        WHEN ABS(Value - PrevValue) > AvgValue * 0.5 THEN 'Sudden Spike/Drop'
        ELSE 'Normal'
    END AS DataQualityFlag
FROM DataWithStats;

5. 性能优化技巧

5.1 索引策略

-- 为窗口函数创建合适的索引
CREATE INDEX IX_Orders_Date_Amount 
ON Orders (OrderDate, Amount) 
INCLUDE (CustomerID);

-- 复合索引支持分区和排序
CREATE INDEX IX_Employees_Dept_Salary 
ON Employees (Department, Salary DESC) 
INCLUDE (EmployeeID, Name);

5.2 分区优化

-- 避免过多小分区
-- ❌ 不好:每个客户一个分区(如果客户很多)
SELECT 
    SUM(Amount) OVER (PARTITION BY CustomerID ORDER BY Date)
FROM Orders;

-- ✅ 更好:使用合适的粒度
SELECT 
    SUM(Amount) OVER (PARTITION BY CustomerSegment ORDER BY Date)
FROM Orders;

5.3 CTE与窗口函数结合

WITH RankedData AS (
    SELECT 
        *,
        ROW_NUMBER() OVER (PARTITION BY Category ORDER BY Price DESC) AS PriceRank,
        AVG(Price) OVER (PARTITION BY Category) AS AvgCategoryPrice
    FROM Products
),
FilteredData AS (
    SELECT *
    FROM RankedData
    WHERE PriceRank <= 10  -- 每个类别前10名
)
SELECT 
    Category,
    ProductName,
    Price,
    Price - AvgCategoryPrice AS PriceVsAvg,
    RANK() OVER (ORDER BY (Price - AvgCategoryPrice) DESC) AS ValueRank
FROM FilteredData;

6. 高级应用示例

6.1 会话分析

-- 识别用户会话
WITH UserEvents AS (
    SELECT 
        UserID,
        EventTime,
        LAG(EventTime) OVER (PARTITION BY UserID ORDER BY EventTime) AS PrevEventTime
    FROM UserActivity
),
SessionMarked AS (
    SELECT 
        UserID,
        EventTime,
        CASE 
            WHEN DATEDIFF(MINUTE, PrevEventTime, EventTime) > 30 
                 OR PrevEventTime IS NULL 
            THEN 1 
            ELSE 0 
        END AS IsNewSession
    FROM UserEvents
),
SessionNumbered AS (
    SELECT 
        UserID,
        EventTime,
        SUM(IsNewSession) OVER (PARTITION BY UserID ORDER BY EventTime) AS SessionID
    FROM SessionMarked
)
SELECT 
    UserID,
    SessionID,
    MIN(EventTime) AS SessionStart,
    MAX(EventTime) AS SessionEnd,
    COUNT(*) AS EventsInSession
FROM SessionNumbered
GROUP BY UserID, SessionID;

6.2 股票分析

-- 计算技术指标
SELECT 
    StockSymbol,
    TradeDate,
    ClosePrice,
    -- 移动平均线
    AVG(ClosePrice) OVER (
        PARTITION BY StockSymbol 
        ORDER BY TradeDate 
        ROWS BETWEEN 19 PRECEDING AND CURRENT ROW
    ) AS MA20,
    -- 布林带
    AVG(ClosePrice) OVER (
        PARTITION BY StockSymbol 
        ORDER BY TradeDate 
        ROWS BETWEEN 19 PRECEDING AND CURRENT ROW
    ) + 2 * STDEV(ClosePrice) OVER (
        PARTITION BY StockSymbol 
        ORDER BY TradeDate 
        ROWS BETWEEN 19 PRECEDING AND CURRENT ROW
    ) AS BollingerUpper,
    -- RSI计算
    100 - (100 / (1 + 
        AVG(CASE WHEN PriceChange > 0 THEN PriceChange ELSE 0 END) OVER (
            PARTITION BY StockSymbol 
            ORDER BY TradeDate 
            ROWS BETWEEN 13 PRECEDING AND CURRENT ROW
        ) / 
        NULLIF(AVG(CASE WHEN PriceChange < 0 THEN ABS(PriceChange) ELSE 0 END) OVER (
            PARTITION BY StockSymbol 
            ORDER BY TradeDate 
            ROWS BETWEEN 13 PRECEDING AND CURRENT ROW
        ), 0)
    )) AS RSI14
FROM StockPrices;

6.3 库存管理

-- 先进先出(FIFO)库存计算
WITH InventoryTransactions AS (
    SELECT 
        ProductID,
        TransactionDate,
        TransactionType,
        Quantity,
        Cost,
        SUM(CASE WHEN TransactionType = 'IN' THEN Quantity ELSE -Quantity END) 
            OVER (PARTITION BY ProductID ORDER BY TransactionDate) AS RunningBalance
    FROM Inventory
),
FIFOCost AS (
    SELECT 
        ProductID,
        TransactionDate,
        TransactionType,
        Quantity,
        Cost,
        RunningBalance,
        MIN(TransactionDate) OVER (
            PARTITION BY ProductID 
            ORDER BY TransactionDate 
            ROWS UNBOUNDED PRECEDING
        ) AS FirstInDate
    FROM InventoryTransactions
    WHERE TransactionType = 'IN'
)
SELECT * FROM FIFOCost;

7. 常见错误与解决方案

7.1 性能问题

-- ❌ 错误:窗口过大导致性能问题
SELECT 
    SUM(Amount) OVER (ORDER BY Date ROWS UNBOUNDED PRECEDING)
FROM VeryLargeTable;

-- ✅ 优化:添加限制条件或使用索引
SELECT 
    SUM(Amount) OVER (ORDER BY Date ROWS BETWEEN 30 PRECEDING AND CURRENT ROW)
FROM VeryLargeTable
WHERE Date >= DATEADD(DAY, -60, GETDATE());

7.2 排序问题

-- ❌ 错误:缺少ORDER BY导致非确定性结果
SELECT 
    Name,
    FIRST_VALUE(Salary) OVER (PARTITION BY Department) AS FirstSalary
FROM Employees;

-- ✅ 正确:明确指定排序
SELECT 
    Name,
    FIRST_VALUE(Salary) OVER (
        PARTITION BY Department 
        ORDER BY HireDate
    ) AS FirstSalaryByHireDate
FROM Employees;

8. 最佳实践总结

明确排序:始终为窗口函数指定ORDER BY以确保确定性结果 合理分区:根据数据特性和业务需求选择分区键 框架优化:使用适当的窗口框架限制计算范围 索引支持:为窗口函数的PARTITION BY和ORDER BY列创建索引 避免嵌套:避免过度嵌套窗口函数,考虑使用CTE分步处理 测试性能:在大数据量下测试窗口函数的性能表现

9. SQL Server版本特性

  • SQL Server 2005+:支持ROW_NUMBER, RANK, DENSE_RANK, NTILE
  • SQL Server 2012+:支持LAG, LEAD, FIRST_VALUE, LAST_VALUE
  • SQL Server 2012+:支持ROWS和RANGE窗口框架
  • SQL Server 2012+:支持PERCENT_RANK, CUME_DIST
  • SQL Server 2012+:支持PERCENTILE_CONT, PERCENTILE_DISC

窗口函数是SQL Server中强大的分析工具,合理使用可以显著简化复杂查询并提高性能。在实际应用中,应根据具体业务需求选择最合适的窗口函数和参数配置。