星期二, 10月 31, 2023

2000-04-21 如何於找出重複資料並將之刪除 ?(delete duplicated record)


 如何於找出重複資料並將之刪除 ?(delete duplicated record)

-- Usage : upload following code to source member
-- modify the library, file, key fields to your own, 
-- the use RUNSQLSTM command 
-- and specify the uploaded member

-- The initial DROP VIEWs will fail the
--   first time this runs because the views
--   do not exist yet.

DROP VIEW MYLIB/DUPV

;
DROP VIEW MYLIB/DUPV2

;
DROP VIEW MYLIB/DUPV3

;
DROP VIEW MYLIB/DUPV4

;

-- This creates a view that groups records and
--   then only includes keys that are duplicates
--   (or triplicates or more).
-- The CNT field reports the count of each set of
--   keys that is duplicated (HAVING count(*)>1).

CREATE VIEW MYLIB/DUPV (
    OrgKFld1,
    OrgKFld2,
    OrgKFld3,
    CNT
    ) AS SELECT
        OrgKFld1,
        OrgKFld2,
        OrgKFld3,
        count(*)
            FROM MYLIB/MyFile GROUP BY
                OrgKFld1,
                OrgKFld2,
                OrgKFld3
                    HAVING count(*)>1
;

-- This view is based on the view above which only
--   contains duplicated keys. That view is joined
--   to the original file to make the relative
--   record numbers available.

CREATE VIEW MYLIB/DUPV2 (
    KFld1,
    KFld2,
    KFld3,
    DRRN
    ) AS SELECT
        a.OrgKFld1,
        a.OrgKFld2,
        a.OrgKFld3,
        rrn(a)
            FROM MYLIB/MyFile a inner join MYLIB/DUPV B
              on a.OrgKFld1=b.OrgKFld1 and
                 a.OrgKFld2=b.OrgKFld2 and
                 a.OrgKFld3=b.OrgKFld3
;

-- This view is based on the view above. The purpose
--   is to find a single record number from a group
--   of duplicated (or triplicated or more) records.
--   By using the MAX() function, we get the highest
--   RRN() from a group. (We could use MIN() to get
--   the lowest.)

create view MYLIB/dupv3 (
    KFld1,
    KFld2,
    KFld3,
    mrrn
    ) as SELECT
        KFld1,
        KFld2,
        KFld3,
        max( DRRN )
            FROM MYLIB/dupv2 GROUP BY
                KFld1,
                KFld2,
                KFld3
;

-- This view provides direct access to the relative
--   record number of every record in the original
--   file. We cannot use the RRN() function to select
--   records in a delete unless we supply an actual
--   number such as WHERE RRN(MyFile)=1. So, we use
--   this view to turn RRN() into a column. Once we
--   have it as a column, we can reference it directly
--   in a DELETE statement.

CREATE VIEW MYLIB/DUPV4
    AS SELECT
        OrgKFld1,
        OrgKFld2,
        OrgKFld3,
        rrn(MyFile) AS DUPRRN
            FROM MYLIB/MyFile
;


-- This is where the actual deletes are done.
-- DO NOT run this statement unless FINDDUPES
--   has completed successfully.
--
-- The DUPV4 view references the original file
--   and exposes the record numbers. We can now
--   use a sub-SELECT to delete only records that
--   appear in our "duplicates" views.

DELETE FROM MYLIB/DUPV4
    WHERE DUPRRN in (
        SELECT MRRN FROM MYLIB/DUPV3
        )
;




沒有留言: