T-SQL: Median und Ausreißer berechnen
Unter http://www.insidesql.org/blogs/frankkalis/2004/07/13/median-berechnen und http://quantmeditate.blogspot.com/2005/03/computing-interquartile-range.html habe ich Informationen zur Berechnung von Median und Quartilen in T-SQL gefunden.
DROP TABLE #data CREATE TABLE #data(NUMBER FLOAT) INSERT INTO #data VALUES(1.0);INSERT INTO #data VALUES(2.0); INSERT INTO #data VALUES(3.0);INSERT INTO #data VALUES(4.0); INSERT INTO #data VALUES(5.0);INSERT INTO #data VALUES(6.0); INSERT INTO #data VALUES(7.0);INSERT INTO #data VALUES(8.0); INSERT INTO #data VALUES(9.0);INSERT INTO #data VALUES(10.0); INSERT INTO #data VALUES(11.0);INSERT INTO #data VALUES(12.0); INSERT INTO #data VALUES(13.0);INSERT INTO #data VALUES(14.0); INSERT INTO #data VALUES(15.0);INSERT INTO #data VALUES(16.0); INSERT INTO #data VALUES(17.0);INSERT INTO #data VALUES(18.0); INSERT INTO #data VALUES(19.0);INSERT INTO #data VALUES(20.0); INSERT INTO #data VALUES(21.0);INSERT INTO #data VALUES(22.0); INSERT INTO #data VALUES(23.0);INSERT INTO #data VALUES(24.0); INSERT INTO #data VALUES(25.0);INSERT INTO #data VALUES(26.0); INSERT INTO #data VALUES(27.0);INSERT INTO #data VALUES(28.0); INSERT INTO #data VALUES(29.0);INSERT INTO #data VALUES(30.0); INSERT INTO #data VALUES(31.0);INSERT INTO #data VALUES(32.0); INSERT INTO #data VALUES(33.0);INSERT INTO #data VALUES(34.0); INSERT INTO #data VALUES(35.0);INSERT INTO #data VALUES(36.0); INSERT INTO #data VALUES(37.0);INSERT INTO #data VALUES(38.0); INSERT INTO #data VALUES(39.0);INSERT INTO #data VALUES(40.0); INSERT INTO #data VALUES(-100.0);INSERT INTO #data VALUES(100.0); DECLARE @median FLOAT DECLARE @perc25 FLOAT DECLARE @perc75 FLOAT DECLARE @iqr FLOAT SET @median = (SELECT MAX(NUMBER) AS Median FROM (SELECT TOP 50 PERCENT NUMBER FROM #data ORDER BY NUMBER) a) SET @perc25 = (SELECT MAX(NUMBER) AS Median FROM (SELECT TOP 25 PERCENT NUMBER FROM #data ORDER BY NUMBER) a) SET @perc75 = (SELECT MAX(NUMBER) AS Median FROM (SELECT TOP 75 PERCENT NUMBER FROM #data ORDER BY NUMBER) a) print @median print @perc25 print @perc75 SET @iqr = @perc75-@perc25 print 'IQR' print @iqr SELECT * FROM #data WHERE NUMBER < (@median + 1.5* @iqr) AND NUMBER > (@median - 1.5* @iqr) print 'number of outliers' SELECT COUNT(*) FROM #data WHERE NUMBER < (@median - 1.5* @iqr) OR NUMBER > (@median + 1.5* @iqr) |