Site icon Just another ORACLE Development blog

Gap Detection – Tabibitosan Method

Another question from the ORACLE SQL & PL/SQL Forum.

Problem:

The original poster (OP) needed to find the first “missing” entry in a series of entries.

Entries are like:


Test Data:

WITH data (id,entry) AS (SELECT 1, '0001' FROM dual UNION ALL
                         SELECT 1, '0004' FROM dual UNION ALL
                         SELECT 1, '0006' FROM dual UNION ALL
                         SELECT 2, '0001' FROM dual UNION ALL
                         SELECT 2, '0002' FROM dual UNION ALL
                         SELECT 2, '0006' FROM dual UNION ALL
                         SELECT 3, '0001' FROM dual UNION ALL
                         SELECT 3, '0002' FROM dual UNION ALL
                         SELECT 3, '0003' FROM dual UNION ALL
                         SELECT 4, '0002' FROM dual UNION ALL
                         SELECT 4, '0003' FROM dual UNION ALL
                         SELECT 4, '0004' FROM dual)
SELECT id, entry
  FROM data
/

        ID ENTRY
---------- -----
         1 0001 
         1 0004 
         1 0006 
         2 0001 
         2 0002 
         2 0006 
         3 0001 
         3 0002 
         3 0003 
         4 0002 
         4 0003 
         4 0004 

 12 rows selected

Solution:

So first step would be to calculate the effective entry and the entry – “value an entry had if everything was gapless” (the column is called distance).

WITH data (id,entry) AS (SELECT 1, '0001' FROM dual UNION ALL
                         SELECT 1, '0004' FROM dual UNION ALL
                         SELECT 1, '0006' FROM dual UNION ALL
                         SELECT 2, '0001' FROM dual UNION ALL
                         SELECT 2, '0002' FROM dual UNION ALL
                         SELECT 2, '0006' FROM dual UNION ALL
                         SELECT 3, '0001' FROM dual UNION ALL
                         SELECT 3, '0002' FROM dual UNION ALL
                         SELECT 3, '0003' FROM dual UNION ALL
                         SELECT 4, '0002' FROM dual UNION ALL
                         SELECT 4, '0003' FROM dual UNION ALL
                         SELECT 4, '0004' FROM dual)
SELECT id, entry
     , TO_NUMBER(entry) - ROW_NUMBER() OVER (PARTITION BY id 
                                             ORDER BY entry) AS distance
  FROM data
/

        ID ENTRY   DISTANCE
---------- ----- ----------
         1 0001           0
         1 0004           2
         1 0006           3
         2 0001           0
         2 0002           0
         2 0006           3
         3 0001           0
         3 0002           0
         3 0003           0
         4 0002           1
         4 0003           1
         4 0004           1

12 rows selected

As long as the distance is 0 the entry belongs to a gapless series. To find the first missing entry, we are looking for the last entry with a distance of 0.

WITH data (id,entry) AS (SELECT 1, '0001' FROM dual UNION ALL
                         SELECT 1, '0004' FROM dual UNION ALL
                         SELECT 1, '0006' FROM dual UNION ALL
                         SELECT 2, '0001' FROM dual UNION ALL
                         SELECT 2, '0002' FROM dual UNION ALL
                         SELECT 2, '0006' FROM dual UNION ALL
                         SELECT 3, '0001' FROM dual UNION ALL
                         SELECT 3, '0002' FROM dual UNION ALL
                         SELECT 3, '0003' FROM dual UNION ALL
                         SELECT 4, '0002' FROM dual UNION ALL
                         SELECT 4, '0003' FROM dual UNION ALL
                         SELECT 4, '0004' FROM dual)
SELECT id, MAX(DECODE(distance,0,id,NULL)) AS last_in_serie
  FROM (SELECT id, entry
             , TO_NUMBER(entry) - ROW_NUMBER() OVER (PARTITION BY id
                                                         ORDER BY entry) AS distance
          FROM data)
  GROUP BY id
/

        ID LAST_IN_SERIE
---------- -------------
         1             1
         2             2
         3             3
         4     

By increasing this last_in_series value by 1 we get the first missing entry.

WITH data (id,entry) AS (SELECT 1, '0001' FROM dual UNION ALL
                         SELECT 1, '0004' FROM dual UNION ALL
                         SELECT 1, '0006' FROM dual UNION ALL
                         SELECT 2, '0001' FROM dual UNION ALL
                         SELECT 2, '0002' FROM dual UNION ALL
                         SELECT 2, '0006' FROM dual UNION ALL
                         SELECT 3, '0001' FROM dual UNION ALL
                         SELECT 3, '0002' FROM dual UNION ALL
                         SELECT 3, '0003' FROM dual UNION ALL
                         SELECT 4, '0002' FROM dual UNION ALL
                         SELECT 4, '0003' FROM dual UNION ALL
                         SELECT 4, '0004' FROM dual)
SELECT id, TO_CHAR(NVL(MAX(DECODE(distance,0,id,NULL)),0) + 1,'FM0000') as next_entry
  FROM (SELECT id, entry
             , TO_NUMBER(entry) - ROW_NUMBER() OVER (PARTITION BY id
                                                         ORDER BY entry) AS distance
          FROM data)
  GROUP BY id
/

        ID NEXT_ENTRY
---------- ----------
         1 0002      
         2 0003      
         3 0004      
         4 0001 
Exit mobile version