Tags

, ,

Another question from the ORACLE SQL & PL/SQL Forum.

Problem:

The original poster (OP) needed to find the first “missing” entry in a series of entries.

Entries are like:

  • Set 1: 0001, 0004, 0006  First missing entry: 0002
  • Set 2: 0001, 0002, 0006  First missing entry: 0003
  • Set 3: 0001, 0002, 0003   First missing entry: 0004
  • Set 4: 0002, 0003, 0004  First missing entry: 0001

Test Data:

WITH data (id,entry) AS (SELECT 1, '0001' FROM dual UNION ALL
                         SELECT 1, '0004' FROM dual UNION ALL
                         SELECT 1, '0006' FROM dual UNION ALL
                         SELECT 2, '0001' FROM dual UNION ALL
                         SELECT 2, '0002' FROM dual UNION ALL
                         SELECT 2, '0006' FROM dual UNION ALL
                         SELECT 3, '0001' FROM dual UNION ALL
                         SELECT 3, '0002' FROM dual UNION ALL
                         SELECT 3, '0003' FROM dual UNION ALL
                         SELECT 4, '0002' FROM dual UNION ALL
                         SELECT 4, '0003' FROM dual UNION ALL
                         SELECT 4, '0004' FROM dual)
SELECT id, entry
  FROM data
/

        ID ENTRY
---------- -----
         1 0001 
         1 0004 
         1 0006 
         2 0001 
         2 0002 
         2 0006 
         3 0001 
         3 0002 
         3 0003 
         4 0002 
         4 0003 
         4 0004 

 12 rows selected

Solution:

So first step would be to calculate the effective entry and the entry – “value an entry had if everything was gapless” (the column is called distance).

WITH data (id,entry) AS (SELECT 1, '0001' FROM dual UNION ALL
                         SELECT 1, '0004' FROM dual UNION ALL
                         SELECT 1, '0006' FROM dual UNION ALL
                         SELECT 2, '0001' FROM dual UNION ALL
                         SELECT 2, '0002' FROM dual UNION ALL
                         SELECT 2, '0006' FROM dual UNION ALL
                         SELECT 3, '0001' FROM dual UNION ALL
                         SELECT 3, '0002' FROM dual UNION ALL
                         SELECT 3, '0003' FROM dual UNION ALL
                         SELECT 4, '0002' FROM dual UNION ALL
                         SELECT 4, '0003' FROM dual UNION ALL
                         SELECT 4, '0004' FROM dual)
SELECT id, entry
     , TO_NUMBER(entry) - ROW_NUMBER() OVER (PARTITION BY id 
                                             ORDER BY entry) AS distance
  FROM data
/

        ID ENTRY   DISTANCE
---------- ----- ----------
         1 0001           0
         1 0004           2
         1 0006           3
         2 0001           0
         2 0002           0
         2 0006           3
         3 0001           0
         3 0002           0
         3 0003           0
         4 0002           1
         4 0003           1
         4 0004           1

12 rows selected

As long as the distance is 0 the entry belongs to a gapless series. To find the first missing entry, we are looking for the last entry with a distance of 0.

WITH data (id,entry) AS (SELECT 1, '0001' FROM dual UNION ALL
                         SELECT 1, '0004' FROM dual UNION ALL
                         SELECT 1, '0006' FROM dual UNION ALL
                         SELECT 2, '0001' FROM dual UNION ALL
                         SELECT 2, '0002' FROM dual UNION ALL
                         SELECT 2, '0006' FROM dual UNION ALL
                         SELECT 3, '0001' FROM dual UNION ALL
                         SELECT 3, '0002' FROM dual UNION ALL
                         SELECT 3, '0003' FROM dual UNION ALL
                         SELECT 4, '0002' FROM dual UNION ALL
                         SELECT 4, '0003' FROM dual UNION ALL
                         SELECT 4, '0004' FROM dual)
SELECT id, MAX(DECODE(distance,0,id,NULL)) AS last_in_serie
  FROM (SELECT id, entry
             , TO_NUMBER(entry) - ROW_NUMBER() OVER (PARTITION BY id
                                                         ORDER BY entry) AS distance
          FROM data)
  GROUP BY id
/

        ID LAST_IN_SERIE
---------- -------------
         1             1
         2             2
         3             3
         4     

By increasing this last_in_series value by 1 we get the first missing entry.

WITH data (id,entry) AS (SELECT 1, '0001' FROM dual UNION ALL
                         SELECT 1, '0004' FROM dual UNION ALL
                         SELECT 1, '0006' FROM dual UNION ALL
                         SELECT 2, '0001' FROM dual UNION ALL
                         SELECT 2, '0002' FROM dual UNION ALL
                         SELECT 2, '0006' FROM dual UNION ALL
                         SELECT 3, '0001' FROM dual UNION ALL
                         SELECT 3, '0002' FROM dual UNION ALL
                         SELECT 3, '0003' FROM dual UNION ALL
                         SELECT 4, '0002' FROM dual UNION ALL
                         SELECT 4, '0003' FROM dual UNION ALL
                         SELECT 4, '0004' FROM dual)
SELECT id, TO_CHAR(NVL(MAX(DECODE(distance,0,id,NULL)),0) + 1,'FM0000') as next_entry
  FROM (SELECT id, entry
             , TO_NUMBER(entry) - ROW_NUMBER() OVER (PARTITION BY id
                                                         ORDER BY entry) AS distance
          FROM data)
  GROUP BY id
/

        ID NEXT_ENTRY
---------- ----------
         1 0002      
         2 0003      
         3 0004      
         4 0001