Create table and insert some duplicate rows.
CREATE TABLE DUP_TEST
(
COMPANY_ID VARCHAR(8),
COMPANY_NAME VARCHAR(80),
ADDRESS VARCHAR(80)
);
-- Create test data, dup by company id
INSERT INTO DUP_TEST VALUES ('1', 'Company One', 'Address1');
INSERT INTO DUP_TEST VALUES ('1', 'Company One', 'Address1');
INSERT INTO DUP_TEST VALUES ('2', 'Company Two', 'Address2');
INSERT INTO DUP_TEST VALUES ('2', 'Company Two', 'Address');
INSERT INTO DUP_TEST VALUES ('3', 'Company Three', 'Address3');
The duplicates are with company id 1 and 2. Let delete the duplicates.
set serveroutput on
declare
rows_deleted integer := 0;
begin
for cur in ( select company_id, count(*) cnt from DUP_TEST
group by company_id having count(*) > 1 )
loop
delete from DUP_TEST
where company_id = cur.company_id and rownum < cur.cnt;
rows_deleted := rows_deleted + SQL%ROWCOUNT;
end loop;
dbms_output.put_line( 'records deleted: ' rows_deleted );
end;
/
Now show the result after deleting.
SQL> select * from dup_test;
COMPANY_ COMPANY_NAME ADDRESS
-------- -------------------- --------------------
1 Company One Address1
2 Company Two Address
3 Company Three Address3
It's a little bit diffcult to delete duplicate rows in SQL Server. I'll show one solution using a cursor. Consider the performance when you have large amount of data to search or delete.declare cur cursor for select COMPANY_ID from DUP_TESTIt goes through the full table and search duplicates for each row, if found, delete the current of cursor.
declare @CompanyId int
open cur
fetch cur into @CompanyId
while @@fetch_status = 0
begin
if ( select count(*) from DUP_TEST where COMPANY_ID = @CompanyId ) > 1
begin
delete from DUP_TEST where current of cur
end
fetch cur into @CompanyId
end
close cur
deallocate cur
go
1 comment:
In Oracle, there are ways to de-dupe a table using straight SQL as well. Here's one way -
SQL>
SQL> --
SQL> -- Before delete...
SQL> --
SQL> select * from dup_test;
COMPANY_ID COMPANY_NAME ADDRESS
---------- --------------- --------------------
1 Company One Address1
1 Company One Address1
2 Company Two Address2
2 Company Two Address2
3 Company Three Address3
SQL>
SQL> --
SQL> -- The actual delete...
SQL> --
SQL> delete dup_test x
2 where rowid > (
3 select min (rowid)
4 from dup_test y
5 where y.company_id = x.company_id
6 and y.company_name = x.company_name
7 and y.address = x.address
8 )
9 /
2 rows deleted.
SQL>
SQL> --
SQL> -- After delete...
SQL> --
SQL> select * from dup_test;
COMPANY_ID COMPANY_NAME ADDRESS
---------- --------------- --------------------
1 Company One Address1
2 Company Two Address2
3 Company Three Address3
SQL>
And another way -
SQL>
SQL> --
SQL> -- Before delete...
SQL> --
SQL> select * from dup_test;
COMPANY_ID COMPANY_NAME ADDRESS
---------- --------------- --------------------
1 Company One Address1
1 Company One Address1
2 Company Two Address2
2 Company Two Address2
3 Company Three Address3
SQL>
SQL> --
SQL> -- The actual delete...
SQL> --
SQL> delete dup_test y
2 where rowid in
3 (
4 select t.rid
5 from
6 (
7 select rowid as rid,
8 x.company_id,
9 x.company_name,
10 x.address,
11 row_number() over (partition by x.company_id, x.company_name, x.address order by 1) as rnum
12 from dup_test x
13 ) t
14 where t.rnum > 1
15 )
16 /
2 rows deleted.
SQL>
SQL> --
SQL> -- After delete...
SQL> --
SQL> select * from dup_test;
COMPANY_ID COMPANY_NAME ADDRESS
---------- --------------- --------------------
1 Company One Address1
2 Company Two Address2
3 Company Three Address3
SQL>
SQL>
Post a Comment