Wednesday, August 11, 2010

Delete Duplicate Data From a Table

USE SAMPLE
GO
CREATE TABLE #TAB1
(ID INT,
NAME VARCHAR(10)
)

INSERT INTO #TAB1
SELECT 1, 'JOHN'

INSERT INTO #TAB1
SELECT 1, 'JOHN'


INSERT INTO #TAB1
SELECT 2, 'Stephen'

INSERT INTO #TAB1
SELECT 3, 'Sonia'

INSERT INTO #TAB1
SELECT 4, 'RIYA'

INSERT INTO #TAB1
SELECT 4, 'RIYA'

INSERT INTO #TAB1
SELECT 4, 'RIYA'

--SELECT * FROM #TAB1

--To Identify Duplicates
SELECT * FROM #TAB1
GROUP BY ID, NAME HAVING COUNT(*) > 1


--To Delete Duplicates from the Table

DECLARE @DupCount INT
DECLARE @NonDupCount INT
DECLARE @TotalCount INT
DECLARE @NAME VARCHAR(10)
DECLARE @ID INT

SELECT @TotalCount = COUNT(*) FROM #TAB1

SELECT * FROM #TAB1
GROUP BY ID, NAME HAVING COUNT(*) = 1
SELECT distinct ID, name FROM #TAB1

SET @NonDupCount = @@ROWCOUNT

SELECT @DupCount = @TotalCount - @NonDupCount
--Select distinct * from #TAB1

Select @DupCount as dup, @TotalCount as total, @NonDupCount as nondup

set @DupCount=@DupCount+4
SET ROWCOUNT 1

WHILE @DupCount > 0
BEGIN
--SELECT TOP 1 NAME , ID FROM #TAB1 GROUP BY ID, NAME HAVING COUNT(*) > 1
SELECT TOP 1 @NAME = NAME , @ID = ID FROM #TAB1 GROUP BY ID, NAME HAVING COUNT(*) > 1
--Select @name as Name , @id as ID

DELETE FROM #TAB1 WHERE NAME = @NAME AND ID = @ID
Select @name =null, @id =0
SET @DupCount = @DupCount - 1
END
SET ROWCOUNT 0
Select * from #TAB1


DROP TABLE #TAB1

1 comment:

  1. This is quite informative but I did in some different way,


    --Create a table

    create table t1(col1 int, col2 int, col3 char(50))



    --Insert some duplicate rows

    insert into t1 values (1, 1, 'Manish')
    insert into t1 values (1, 1, 'Manish')
    insert into t1 values (1, 1, 'Manish')
    insert into t1 values (1, 1, 'Manish')
    insert into t1 values (1, 2, 'Sql server')

    select * from t1

    --Identify and save dup data into temp table

    select * into t2 from t1 group by col1,col2,col3 having COUNT(*)>1



    --Delete dup data from original table

    Delete from t1 from t1,t2 where

    t1.col1=t2.col1 and t1.col2=t2.col2 and t1.col3=t2.col3



    --Insert the delete data back

    insert into t1 select * from t2



    --Verify

    select * from t1



    --Cleanup

    drop table t1

    drop table t2

    ReplyDelete