T-SQL: Median und Ausreißer berechnen

Unter http://www.insidesql.org/blogs/frankkalis/2004/07/13/median-berechnen und http://quantmeditate.blogspot.com/2005/03/computing-interquartile-range.html habe ich Informationen zur Berechnung von Median und Quartilen in T-SQL gefunden.

DROP TABLE #data
CREATE TABLE #data(NUMBER FLOAT)
 
INSERT INTO #data VALUES(1.0);INSERT INTO #data VALUES(2.0);
INSERT INTO #data VALUES(3.0);INSERT INTO #data VALUES(4.0);
INSERT INTO #data VALUES(5.0);INSERT INTO #data VALUES(6.0);
INSERT INTO #data VALUES(7.0);INSERT INTO #data VALUES(8.0);
INSERT INTO #data VALUES(9.0);INSERT INTO #data VALUES(10.0);
INSERT INTO #data VALUES(11.0);INSERT INTO #data VALUES(12.0);
INSERT INTO #data VALUES(13.0);INSERT INTO #data VALUES(14.0);
INSERT INTO #data VALUES(15.0);INSERT INTO #data VALUES(16.0);
INSERT INTO #data VALUES(17.0);INSERT INTO #data VALUES(18.0);
INSERT INTO #data VALUES(19.0);INSERT INTO #data VALUES(20.0);
INSERT INTO #data VALUES(21.0);INSERT INTO #data VALUES(22.0);
INSERT INTO #data VALUES(23.0);INSERT INTO #data VALUES(24.0);
INSERT INTO #data VALUES(25.0);INSERT INTO #data VALUES(26.0);
INSERT INTO #data VALUES(27.0);INSERT INTO #data VALUES(28.0);
INSERT INTO #data VALUES(29.0);INSERT INTO #data VALUES(30.0);
INSERT INTO #data VALUES(31.0);INSERT INTO #data VALUES(32.0);
INSERT INTO #data VALUES(33.0);INSERT INTO #data VALUES(34.0);
INSERT INTO #data VALUES(35.0);INSERT INTO #data VALUES(36.0);
INSERT INTO #data VALUES(37.0);INSERT INTO #data VALUES(38.0);
INSERT INTO #data VALUES(39.0);INSERT INTO #data VALUES(40.0);
INSERT INTO #data VALUES(-100.0);INSERT INTO #data VALUES(100.0);
 
DECLARE @median FLOAT
DECLARE @perc25 FLOAT
DECLARE @perc75 FLOAT
DECLARE @iqr FLOAT
 
SET @median = (SELECT   MAX(NUMBER) AS Median FROM (SELECT TOP 50 PERCENT NUMBER FROM #data ORDER BY NUMBER) a)
SET @perc25 = (SELECT   MAX(NUMBER) AS Median FROM (SELECT TOP 25 PERCENT NUMBER FROM #data ORDER BY    NUMBER) a)
SET @perc75 = (SELECT   MAX(NUMBER) AS Median FROM (SELECT TOP 75 PERCENT NUMBER FROM #data ORDER BY    NUMBER) a)
 
print @median
print @perc25
print @perc75
 
SET @iqr = @perc75-@perc25
print 'IQR'
print @iqr
 
SELECT * FROM #data WHERE NUMBER <  (@median + 1.5* @iqr) AND 
NUMBER >  (@median - 1.5* @iqr)
 
print 'number of outliers'
SELECT COUNT(*) FROM #data WHERE NUMBER <  (@median - 1.5* @iqr)
OR NUMBER >  (@median + 1.5* @iqr)

Uwe

Uwe Ziegenhagen likes LaTeX and Python, sometimes even combined. Do you like my content and would like to thank me for it? Consider making a small donation to my local fablab, the Dingfabrik Köln. Details on how to donate can be found here Spenden für die Dingfabrik.

More Posts - Website