更快的distinct


Warning: Undefined array key "HTTP_REFERER" in /www/wwwroot/prod/www.enjoyasp.net/wp-content/plugins/google-highlight/google-hilite.php on line 58
USE     tempdb;
GO
DROP    TABLE dbo.Test;
GO
CREATE  TABLE 
        dbo.Test 
        (
        data            INTEGER NOT NULL,
        );
GO
CREATE  CLUSTERED INDEX c ON dbo.Test (data);
GO
-- Lots of duplicated values
INSERT  dbo.Test WITH (TABLOCK)
        (data)
SELECT  TOP (5000000)
        ROW_NUMBER() OVER (ORDER BY (SELECT 0)) / 117329
FROM    master.sys.columns C1,
        master.sys.columns C2,
        master.sys.columns C3;
GO

SET     STATISTICS TIME ON;

-- 1591ms CPU
SELECT  DISTINCT 
        data
FROM    dbo.Test;

SQL Server 分析和编译时间: 
   CPU 时间 = 859 毫秒,占用时间 = 2702 毫秒。

SQL Server 执行时间:
   CPU 时间 = 0 毫秒,占用时间 = 0 毫秒。

(43 行受影响)

SQL Server 执行时间:
   CPU 时间 = 967 毫秒,占用时间 = 5881 毫秒。
   
-- 15ms CPU
WITH    RecursiveCTE
AS      (
        SELECT  data = MIN(T.data)
        FROM    dbo.Test T
        UNION   ALL
        SELECT  R.data
        FROM    (
                -- A cunning way to use TOP in the recursive part of a CTE :)
                SELECT  T.data,
                        rn = ROW_NUMBER() OVER (ORDER BY T.data)
                FROM    dbo.Test T
                JOIN    RecursiveCTE R
                        ON  R.data < T.data
                ) R
        WHERE   R.rn = 1
        )
SELECT  *
FROM    RecursiveCTE

--OPTION  (MAXRECURSION 0);

在有索引且排好序的情况下,后者速度更快!

SET     STATISTICS TIME OFF;
GO
DROP    TABLE dbo.Test;