Oracle SQL | Category Archives: query optimizing

Deterministic function vs scalar subquery caching. Part 2

Posted on February 11, 2013 by Sayan Malakshinov Posted in deterministic functions, oracle, query optimizing, scalar subquery caching 3 Comments

In previous part i already point out that:

Both mechanisms are based on hash functions.
Deterministic caching depends on fetch size(arraysize) – results cached only within one fetch call, ssc has no this limitation.
Hash collisions depends on the single parameter “_query_execution_cache_max_size” for both mechanizms, but they are more frequent in SSC.

Today’s topic:
4. Deterministic functions does not keeps last result as scalar subquery caching
5. Caching of deterministic functions results turns off after a certain number of attempts to get the value from the cache. But SSC always returns results from cache if values already cached.
Continue reading→

Materialization in subquery factoring without hint “materialize” can be considered only when exists at least one predicate

Posted on February 9, 2013 by Sayan Malakshinov Posted in CBO, oracle, query optimizing, undocumented 6 Comments

I found just now that materialization can not be considered by optimizer if there are no predicates in subquery factoring clause. Of course, i mean cases without forcing materialization through hint “materialize”.
Simple example:

Spoiler


SQL> create table tt1 as select 1 id from dual;

Table created.

SQL> exec dbms_stats.gather_table_stats('','TT1');

PL/SQL procedure successfully completed.

SQL> explain plan for
  2  with gen as (select * from tt1)
  3  select * from gen,gen g2;

Explained.

SQL> @xplan

PLAN_TABLE_OUTPUT
----------------------------------------------------------------------------------------------------------
Plan hash value: 486748850

-----------------------------------------------------------------------------
| Id  | Operation            | Name | Rows  | Bytes | Cost (%CPU)| Time     |
-----------------------------------------------------------------------------
|   0 | SELECT STATEMENT     |      |     1 |     6 |     6   (0)| 00:00:01 |
|   1 |  MERGE JOIN CARTESIAN|      |     1 |     6 |     6   (0)| 00:00:01 |
|   2 |   TABLE ACCESS FULL  | TT1  |     1 |     3 |     3   (0)| 00:00:01 |
|   3 |   BUFFER SORT        |      |     1 |     3 |     3   (0)| 00:00:01 |
|   4 |    TABLE ACCESS FULL | TT1  |     1 |     3 |     3   (0)| 00:00:01 |
-----------------------------------------------------------------------------

SQL> explain plan for
  2  with gen as (select * from tt1 where 1=1)
  3  select * from gen,gen g2;

Explained.

SQL> @xplan

PLAN_TABLE_OUTPUT
----------------------------------------------------------------------------------------------------------
Plan hash value: 2673059801

---------------------------------------------------------------------------------------------------------
| Id  | Operation                  | Name                       | Rows  | Bytes | Cost (%CPU)| Time  |
---------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT           |                            |     1 |    26 |     7   (0)| 00:00:01 |
|   1 |  TEMP TABLE TRANSFORMATION |                            |       |       |            |       |
|   2 |   LOAD AS SELECT           | SYS_TEMP_0FD9D6610_6641830 |       |       |            |       |
|   3 |    TABLE ACCESS FULL       | TT1                        |     1 |     3 |     3   (0)| 00:00:01 |
|   4 |   MERGE JOIN CARTESIAN     |                            |     1 |    26 |     4   (0)| 00:00:01 |
|   5 |    VIEW                    |                            |     1 |    13 |     2   (0)| 00:00:01 |
|   6 |     TABLE ACCESS FULL      | SYS_TEMP_0FD9D6610_6641830 |     1 |     3 |     2   (0)| 00:00:01 |
|   7 |    BUFFER SORT             |                            |     1 |    13 |     4   (0)| 00:00:01 |
|   8 |     VIEW                   |                            |     1 |    13 |     2   (0)| 00:00:01 |
|   9 |      TABLE ACCESS FULL     | SYS_TEMP_0FD9D6610_6641830 |     1 |     3 |     2   (0)| 00:00:01 |
---------------------------------------------------------------------------------------------------------

[collapse]

Update: I did some additional tests and found:

with “table()” but without “xmltable” materialization occurs always regardless of existence of predicates or another tables in subquery factoring clause
with “xmltable” behavior is very strange – decision about materialization depends on ‘columns …’ clause: when it exists – materialization occurs, if not – doesn’t.
with “selects a subset of table columns” as David Aldridge said – decision still depends from predicates existence

Tests with table() were like this:

with t as (select * from table(cast(:a as sys.ku$_vcnt)) /*where 1=0*/ )
select count(*) from t,t t2;
with t as (select * from t10,table(cast(:a as sys.ku$_vcnt)) /*where 1=0*/ )
select count(*) from t,t t2;

[collapse]

Tests with xmltable() were like this:

with t as (select * from t10,xmltable(:a ) ttt where 1=1)
select count(*)
from t, t t1;
with t as (select * from t10,xmltable(:a columns n int) ttt where 1=0)
select count(*)
from t, t t1;
with t as (select/*+ no_merge */ * from table(cast(:a as sys.ku$_vcnt)),xmltable(:a) where 1=0 )
select count(*) from t,t t2;

[collapse]

Test with 2 from 1000 columns

declare 
  c  varchar2(32767):='create table t_1000_cols as select ';
  c2 varchar2(32767);
begin
  for i in 1..1000 loop
    c2:=c2||',lpad(1,4000,1) c'||i;
  end loop;
  c:=c||ltrim(c2,',')||' from dual connect by level<=100';
  execute immediate c;
end;
/
exec dbms_stats.gather_table_stats('','T_1000_COLS');
alter session set tracefile_identifier = mat1000;
alter session set events='10053 trace name context forever, level 1';

with t as (select c1,c2 from t_1000_cols)
select count(*)
from t, t t2;

with t as (select c1,c2 from t_1000_cols where 1=1)
select count(*)
from t, t t2;

[collapse]

Amazing optimization of getting distinct values from the index, and TopN for each of them

Posted on September 21, 2012 by Sayan Malakshinov Posted in CBO, Index bouncy scan, oracle, query optimizing 5 Comments

A couple of days ago someone posted a question on the forum which at the first glance seemed old, boring, beaten up and down:

There is a news feed. All news are divided into 10 categories (Politics, sport, auto, real estate, etc).
I need to get top 4 news sorted by time descending for each category with 1 query.
If you sort the results – you get 4 politics news, then 4 sport news etc.

But the task was to make it optimal, and the standard solution with usual TopN using row_number can not be called optimal in any way, especially in case of big tables, relatively small number of categories and uneven distribution or just overall low selectivity.

So my idea was to start from min() and get next values using “Index range scan(min/max)” recursively. I couldn’t find a good name for this technique, so let’s call it as Jonathan Lewis – “Index bouncy scan”:

1. Getting distinct values from the index

Suppose we have a table with index on the “а” column:

create table xt_test(a not null,b not null,c)
as
select
    length(object_name)
   ,nvl(object_id,0)
   ,o.OBJECT_NAME
from dba_objects o;
create index ix_test_a on xt_test(a);
SQL> select i.index_name
  2        ,i.distinct_keys,i.num_rows
  3        ,i.blevel,i.leaf_blocks
  4        ,i.avg_leaf_blocks_per_key,i.avg_data_blocks_per_key
  5  from user_indexes i where i.table_name='XT_TEST';

INDEX_NAME  DISTINCT_KEYS  NUM_ROWS   BLEVEL LEAF_BLOCKS AVG_LEAF_BLOCKS_PER_KEY AVG_DATA_BLOCKS_PER_KEY
----------- ------------- --------- -------- ----------- ----------------------- -----------------------
IX_TEST_A              30     69230        1         135                       4                     191

1 row selected.

DDL for this test case:

Spoiler

drop table xt_test purge;
create table xt_test(a not null,b not null,c)
as
select
    length(object_name)
   ,nvl(object_id,0)
   ,o.OBJECT_NAME
from dba_objects o
;
create index ix_test_a on xt_test(a);
begin
  dbms_stats.gather_table_stats(
     ''
    ,'XT_TEST'
    ,estimate_percent=>100
    ,cascade=>true
    ,method_opt => 'for all indexed columns size auto'
   );
end;
/

select i.index_name
      ,i.distinct_keys,i.num_rows
      ,i.blevel,i.leaf_blocks
      ,i.avg_leaf_blocks_per_key,i.avg_data_blocks_per_key
from user_indexes i 
where i.table_name='XT_TEST';

[collapse]

This field have very skewed distribution of values:

distribution

A	COUNT(*)
1	11
2	20
3	59
4	92
5	178
6	251
7	521
9	570
10	636
8	640
11	962
12	970
13	1151
15	1363
14	1544
16	1692
18	2021
17	2023
19	2550
20	2606
21	3050
22	3171
23	3395
24	3472
29	3527
27	3596
26	3698
28	4130
25	4268
30	17063
ALL	69230

[collapse]

A standard query using distinct is very unsuccessful – there are only 30 distinct keys in the index, while there are 135 blocks to read!
With IFS:

DB11G/XTENDER> select/*+ INDEX(xt_test) */ distinct a from xt_test;

30 rows selected.

Elapsed: 00:00:00.02

Execution Plan
----------------------------------------------------------
Plan hash value: 3405466263

--------------------------------------------------------------------------------
| Id  | Operation          | Name      | Rows  | Bytes | Cost (%CPU)| Time     |
--------------------------------------------------------------------------------
|   0 | SELECT STATEMENT   |           |    30 |    90 |   140   (3)| 00:00:02 |
|   1 |  SORT UNIQUE NOSORT|           |    30 |    90 |   140   (3)| 00:00:02 |
|   2 |   INDEX FULL SCAN  | IX_TEST_A | 69230 |   202K|   137   (1)| 00:00:02 |
--------------------------------------------------------------------------------

Statistics
----------------------------------------------------------
          1  recursive calls
          0  db block gets
        138  consistent gets
          0  physical reads
          0  redo size
        751  bytes sent via SQL*Net to client
        431  bytes received via SQL*Net from client
          3  SQL*Net roundtrips to/from client
          0  sorts (memory)
          0  sorts (disk)
         30  rows processed

With IFFS:

DB11G/XTENDER&gt; select distinct a from xt_test;

30 rows selected.

Elapsed: 00:00:00.05

Execution Plan
----------------------------------------------------------
Plan hash value: 4206828362

-----------------------------------------------------------------------------------
| Id  | Operation             | Name      | Rows  | Bytes | Cost (%CPU)| Time     |
-----------------------------------------------------------------------------------
|   0 | SELECT STATEMENT      |           |    30 |    90 |    42  (10)| 00:00:01 |
|   1 |  HASH UNIQUE          |           |    30 |    90 |    42  (10)| 00:00:01 |
|   2 |   INDEX FAST FULL SCAN| IX_TEST_A | 69230 |   202K|    38   (0)| 00:00:01 |
-----------------------------------------------------------------------------------

Statistics
----------------------------------------------------------
          1  recursive calls
          0  db block gets
        143  consistent gets
          0  physical reads
          0  redo size
        751  bytes sent via SQL*Net to client
        431  bytes received via SQL*Net from client
          3  SQL*Net roundtrips to/from client
          0  sorts (memory)
          0  sorts (disk)
         30  rows processed

[collapse]

We also could go along the tree visiting only the required blocks, but not all leaf blocks! However, Oracle can’t manage this on its own so we have to make a certain twist: aside from IFS(min/max) Oracle also has IRS(min/max) which works well with ranges and boundaries. We can use recursive query to make it read only what we need!

DB11G/XTENDER> with t_unique( a ) as (
  2                select min(t1.a)
  3                from xt_test t1
  4                union all
  5                select (select min(t1.a) from xt_test t1 where t1.a&amp;amp;gt;t.a)
  6                from t_unique t
  7                where a is not null
  8  )
  9  select * from t_unique where a is not null;

30 rows selected.

Elapsed: 00:00:00.00

Execution Plan
----------------------------------------------------------
Plan hash value: 2791305641

-------------------------------------------------------------------------------------------------------
| Id  | Operation                                 | Name      | Rows  | Bytes | Cost (%CPU)| Time     |
-------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                          |           |     2 |    26 |     4   (0)| 00:00:01 |
|*  1 |  VIEW                                     |           |     2 |    26 |     4   (0)| 00:00:01 |
|   2 |   UNION ALL (RECURSIVE WITH) BREADTH FIRST|           |       |       |            |          |
|   3 |    SORT AGGREGATE                         |           |     1 |     3 |            |          |
|   4 |     INDEX FULL SCAN (MIN/MAX)             | IX_TEST_A |     1 |     3 |     2   (0)| 00:00:01 |
|   5 |    SORT AGGREGATE                         |           |     1 |     3 |            |          |
|   6 |     FIRST ROW                             |           |     1 |     3 |     2   (0)| 00:00:01 |
|*  7 |      INDEX RANGE SCAN (MIN/MAX)           | IX_TEST_A |     1 |     3 |     2   (0)| 00:00:01 |
|*  8 |    RECURSIVE WITH PUMP                    |           |       |       |            |          |
-------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   1 - filter("A" IS NOT NULL)
   7 - access("T1"."A"&amp;amp;gt;:B1)
   8 - filter("A" IS NOT NULL)

Statistics
----------------------------------------------------------
          1  recursive calls
          0  db block gets
         36  consistent gets
          0  physical reads
          0  redo size
        751  bytes sent via SQL*Net to client
        431  bytes received via SQL*Net from client
          3  SQL*Net roundtrips to/from client
         32  sorts (memory)
          0  sorts (disk)
         30  rows processed

The difference is obvious: 36 consistent gets for 30 values, instead of 135. Note that this is a very small table, and we will have а much notable difference for millions and billions of entries!
Here is the explanation of the algorithm:

In the first part of union all (3-4 strings of plan) we specify where to start the recursion, and more specifically we choose a minimal (first) the value from the index.
After that we choose the first value that is bigger than the one chosen in the previous step, using IRS(min/max) (7-6-5 stings of the plan).
Repeat the recursion while we find anything

Proceed to the next:

2. TopN entries for every key value
Now as we are armed with an easy tool to get every initial value, we can easily get Top N for each of them. The only problem that remains is that, we can not use inline view with row_number/rownum, as the predicate from higher level won’t be pushed there, and we will have to use simple restriction by count stop key (by rownum) with required access by IRS descending (order by is generally unnecessary there, but it further reduces reading costs of IRS descending, which is necessary for implicit sorting) with the index_desc hint, to nail it dead, otherwise sorting may break. So to make this happen we either have to use an undocumented Lateral() with a corresponding event turned on, or use a simpler and standard table(multiset(…)) or a little harder with xmltable() – but it is not so dangerous. Yet another variant is the use cursor() with pushed predicates:

With cursor()

with t_unique( a ) as (
              select min(t1.a)
              from xt_test t1
              union all
              select (select min(t1.a) from xt_test t1 where t1.a&amp;amp;amp;amp;gt;t.a)
              from t_unique t
              where a is not null
)
select cursor(
              select rid from(
                 select/*+ index_desc(tt ix_xt_test_ab) */
                    tt.a
                   ,tt.rowid rid
                   ,row_number()over(partition by a order by b desc) rn
                 from xt_test tt
                 order by tt.b desc
              )
              where a=v.a and rn&amp;amp;amp;amp;lt;=5
       )
from t_unique v


[collapse]

With table() and multiset()

DB11G/XTENDER&amp;amp;amp;amp;gt; with t_unique( a ) as (
  2                select min(t1.a)
  3                from xt_test t1
  4                union all
  5                select (select min(t1.a) from xt_test t1 where t1.a&amp;amp;amp;amp;gt;t.a)
  6                from t_unique t
  7                where a is not null
  8  )
  9  select/*+ use_nl(rids tt) */ *
 10  from t_unique v
 11      ,table(
 12            cast(
 13                 multiset(
 14                          select/*+ index_desc(tt ix_xt_test_ab) */ tt.rowid rid
 15                          from xt_test tt
 16                          where tt.a=v.a
 17                            and rownum&amp;amp;amp;amp;lt;=5
 18                          order by tt.b desc
 19                         )
 20                 as sys.odcivarchar2list
 21                )
 22            ) rids
 23      ,xt_test tt
 24  where tt.rowid=rids.column_value
 25  order by tt.a,tt.b desc;

150 rows selected.

Elapsed: 00:00:00.01

Execution Plan
----------------------------------------------------------
Plan hash value: 4085270117

----------------------------------------------------------------------------------------------------------------------
| Id  | Operation                                    | Name          | Rows  | Bytes |TempSpc| Cost (%CPU)| Time     |
----------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                             |               |    11M|   506M|       |   149K  (1)| 00:29:54 |
|   1 |  SORT ORDER BY                               |               |    11M|   506M|   649M|   149K  (1)| 00:29:54 |
|   2 |   NESTED LOOPS                               |               |    11M|   506M|       | 16402   (1)| 00:03:17 |
|   3 |    NESTED LOOPS                              |               | 16336 |   239K|       |    60   (0)| 00:00:01 |
|   4 |     VIEW                                     |               |     2 |    26 |       |     4   (0)| 00:00:01 |
|   5 |      UNION ALL (RECURSIVE WITH) BREADTH FIRST|               |       |       |       |         |             |
|   6 |       SORT AGGREGATE                         |               |     1 |     3 |       |         |             |
|   7 |        INDEX FULL SCAN (MIN/MAX)             | IX_TEST_A     |     1 |     3 |       |     2   (0)| 00:00:01 |
|   8 |       SORT AGGREGATE                         |               |     1 |     3 |       |         |             |
|   9 |        FIRST ROW                             |               |     1 |     3 |       |     2   (0)| 00:00:01 |
|* 10 |         INDEX RANGE SCAN (MIN/MAX)           | IX_TEST_A     |     1 |     3 |       |     2   (0)| 00:00:01 |
|* 11 |       RECURSIVE WITH PUMP                    |               |       |       |       |         |             |
|  12 |     COLLECTION ITERATOR SUBQUERY FETCH       |               |  8168 | 16336 |       |    28   (0)| 00:00:01 |
|* 13 |      COUNT STOPKEY                           |               |       |       |       |         |             |
|* 14 |       INDEX RANGE SCAN DESCENDING            | IX_XT_TEST_AB |  2308 | 64624 |       |     8   (0)| 00:00:01 |
|* 15 |    TABLE ACCESS BY USER ROWID                | XT_TEST       |   692 | 22144 |       |     1   (0)| 00:00:01 |
----------------------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

  10 - access("T1"."A"&amp;amp;amp;amp;gt;:B1)
  11 - filter("A" IS NOT NULL)
  13 - filter(ROWNUM&amp;amp;amp;amp;lt;=5)
  14 - access("TT"."A"=:B1)
  15 - access(CHARTOROWID(VALUE(KOKBF$)))

Statistics
----------------------------------------------------------
          1  recursive calls
          0  db block gets
        166  consistent gets
          0  physical reads
          0  redo size
       7523  bytes sent via SQL*Net to client
        519  bytes received via SQL*Net from client
         11  SQL*Net roundtrips to/from client
         33  sorts (memory)
          0  sorts (disk)
        150  rows processed


[collapse]

It is similarly possible through “lateral”:

code

alter session set events '22829 trace name context forever';
with t_unique( a ) as (
              select min(t1.a)
              from xt_test t1
              union all
              select (select min(t1.a) from xt_test t1 where t1.a&amp;amp;amp;amp;gt;t.a)
              from t_unique t
              where a is not null
)
select/*+ use_nl(rids tt) */ *
from t_unique v
    ,lateral(
              select/*+ index_desc(tt ix_xt_test_ab) */ tt.*
              from xt_test tt
              where tt.a=v.a
                and rownum&amp;amp;amp;amp;lt;=5
              order by tt.a, b desc
     ) r
order by r.a,r.b desc


[collapse]

In general, we could do without the dangerous sorting, using “xmltable” and dbms_xmlgen instead of “table” sending a parameter directly to the internal subquery, but this is a bit harder than the regular ”table”

With xmltable()

with t_unique( owner ) as (
              select min(owner)
              from ttt
              union all
              select (select min(t1.owner) from ttt t1 where t1.owner&amp;amp;amp;amp;gt;t.owner)
              from t_unique t
              where owner is not null
)
select r.*
from t_unique v
    ,xmltable('/ROWSET/ROW'
              passing(
                dbms_xmlgen.getxmltype(
                  q'[select *
                     from (
                       select/*+ index_asc(tt ix_ttt) */ owner, to_char(created,'yyyy-mm-dd hh24:mi:ss') created
                       from ttt tt
                       where tt.owner=']'||v.owner||q'['
                       order by tt.created asc
                     )
                     where rownum&amp;amp;amp;amp;lt;=5
                  ]'
                )
              )
              columns
                owner   varchar2(30) path 'OWNER'
               ,created varchar2(30) path 'CREATED'
               ,x xmltype path '.'
             ) r
where
  v.owner is not null
order by r.owner,r.created asc;

-----------------------------------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation                                   | Name                   | Starts | E-Rows | A-Rows |   A-Time   | Buffers |  OMem |  1Mem | Used-Mem |
-----------------------------------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                            |                        |      1 |        |    148 |00:00:00.28 |     365 |       |       |          |
|   1 |  SORT ORDER BY                              |                        |      1 |  16336 |    148 |00:00:00.28 |     365 | 20480 | 20480 |18432  (0)|
|   2 |   NESTED LOOPS                              |                        |      1 |  16336 |    148 |00:00:00.10 |     365 |       |       |          |
|*  3 |    VIEW                                     |                        |      1 |      2 |     30 |00:00:00.01 |      66 |       |       |          |
|   4 |     UNION ALL (RECURSIVE WITH) BREADTH FIRST|                        |      1 |        |     31 |00:00:00.01 |      66 |       |       |          |
|   5 |      SORT AGGREGATE                         |                        |      1 |      1 |      1 |00:00:00.01 |       3 |       |       |          |
|   6 |       INDEX FULL SCAN (MIN/MAX)             | IX_TTT                 |      1 |      1 |      1 |00:00:00.01 |       3 |       |       |          |
|   7 |      SORT AGGREGATE                         |                        |     30 |      1 |     30 |00:00:00.01 |      63 |       |       |          |
|   8 |       FIRST ROW                             |                        |     30 |      1 |     29 |00:00:00.01 |      63 |       |       |          |
|*  9 |        INDEX RANGE SCAN (MIN/MAX)           | IX_TTT                 |     30 |      1 |     29 |00:00:00.01 |      63 |       |       |          |
|  10 |      RECURSIVE WITH PUMP                    |                        |     31 |        |     30 |00:00:00.01 |       0 |       |       |          |
|  11 |    COLLECTION ITERATOR PICKLER FETCH        | XMLSEQUENCEFROMXMLTYPE |     30 |   8168 |    148 |00:00:00.10 |     299 |       |       |          |
-----------------------------------------------------------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   3 - filter("V"."OWNER" IS NOT NULL)
   9 - access("T1"."OWNER"&amp;amp;amp;amp;gt;:B1)


[collapse]

Update: Since Oracle 12c it would be much better to use Laterals

About the performance of exception handling

Posted on May 18, 2012 by Sayan Malakshinov Posted in oracle, query optimizing 1 Comment

This article is about a well-known fact about the poor performance of exception handling.

Yes, the exception handling is rather slow, however, it is not necessary to try to avoid exceptions whenever possible, and by any means. For example, I often see that people are trying to avoid them even in cases of search by primary key where probability of receiving “no_data_found” is minimal.
In general, we should analyze the possible frequency of exceptions and “overhead”, which is added by the chosen way with exception handlers.

Let me explain this with an example, which I mentioned earlier: suppose we have a code that returns a field from the table by “pk” and it returns “null” in case there is no such entry.
Test table:

create table t_test(a primary key, b)
as
select level,level from dual connect by level<=1e5;

Lets create a standard function for tests:

create or replace function f1(p in number) return number
as
  res number;
begin
  select/*+ F1 */ b into res
  from t_test t
  where t.a=p;
  return res;
exception when no_data_found then
  return null;
end;

The most common options to avoid the exceptions mechanism in these cases are the followings:

Variant 1

create or replace function f2(p in number) return number
as
begin
  for rec in (select/*+ F2 */ b from t_test t where t.a=p) loop
    return rec.b;
  end loop;
  return null;
end;

[collapse]

By the way, don’t use this in case your cursor can not have more than one entry:

Spoiler

create or replace function f2(p in number) return number
as
  res number;
begin
  for rec in (select/*+ F2 */ b from t_test t where t.a=p) loop
    res:=rec.b;
  end loop;
  return res;
end;

[collapse]

Otherwise, there will be attempts of the second iteration, which you can observe in the profiler.

Variant 2

create or replace function f3(p in number) return number
as
  res number;
begin
  select/*+ F3 */ min(b) into res
  from t_test t
  where t.a=p;
  return res;
end;

[collapse]

Here I would like to propose my own option for this:

Spoiler

create or replace function f4(p in number) return number
as
  res number;
begin
  select/*+ F4 */ 
    (select b from t_test t where t.a=p)
    into res
  from dual;
  return res;
end;

[collapse]

And now lets carry out a basic test by executing these functions for the test table:

declare
  v       integer;
  v_start integer:= 1;
  v_end   integer:= 100000;
    l_timer integer := dbms_utility.get_time;
    procedure print(msg varchar2) is
    begin
      dbms_output.put_line(to_char((dbms_utility.get_time-l_timer)/100,'9990.00')||' '||msg);
      l_timer:=dbms_utility.get_time;
    end;
     
begin
  print('start');
  for i in v_start..v_end loop
    v:=f1(i);
  end loop;
  print('1');
  for i in v_start..v_end loop
    v:=f2(i);
  end loop;
  print('2');
  for i in v_start..v_end loop
    v:=f3(i);
  end loop;
  print('3');
  for i in v_start..v_end loop
    v:=f4(i);
  end loop;
  print('4');
end;

As a result, we get the following ratio:

Variant	Time(sec)
Variant 1(with exception)	3.03
Variant 2(with cycle)	3.62
Variant 3(with min)	3.34
Variant 4(scalar subquery)	3.10

As you can see, the original query is the fastest in case the exceptions are not called! Lets now check it with different percents of exceptions: exceptions will be for queries с i<=0, the total number of calls will be 100001, I will change v_start and v_end in pairs: (-5000, 95000), (10000, 90000), (-50000, 50000), (-90000, 10000):

declare
  v       integer;
  v_start integer:=-50000;
  v_end   integer:= 50000;
    l_timer integer := dbms_utility.get_time;
    procedure print(msg varchar2) is
    begin
      dbms_output.put_line(to_char((dbms_utility.get_time-l_timer)/100,'9990.00')||' '||msg);
      l_timer:=dbms_utility.get_time;
    end;
     
begin
  print('start');
  for i in v_start..v_end loop
    v:=f1(i);
  end loop;
  print('1');
  for i in v_start..v_end loop
    v:=f2(i);
  end loop;
  print('2');
  for i in v_start..v_end loop
    v:=f3(i);
  end loop;
  print('3');
  for i in v_start..v_end loop
    v:=f4(i);
  end loop;
  print('4');
end;
/

Summary table of multiple comparisons:

Variant	0%	~5%	~10%	~50%	~90%
Variant 1(with exception)	3.04	3.12	3.16	3.82	4.51
Variant 2(with cycle)	3.18	3.21	3.20	3.51	3.85
Variant 3(with min)	3.37	3.34	3.29	3.25	3.18
Variant 4(scalar subquery)	3.12	3.06	3.03	2.98	2.94

What conclusions can be drawn from this:

As you can see, 5% of exceptions is a kind of turning point for this table, when the standard option with exception becomes less effective than the option with subquery (by about ~4.5% to be precise), and about ~10% than the remaining two.
Options with “min” and a cycle as a whole are worse than the option with a subquery.
Options with subquery and “min” becomes faster when the number of “empty” queries increases.

A funny fact about collect

Posted on April 28, 2012 by Sayan Malakshinov Posted in collect, oracle, parallel, query optimizing 1 Comment

Many people know that oracle creates domain types on its own when necessary, for example when using a type declared in a package (before 11g they could be observed in dba_objects with the name like ‘PLSQL%’).

Fact #1

It acts in the same way when calling an aggregate function “collect”.

-- Firstly we check if there are such types
DB11G/XTENDER> select t.type_name,t.type_name,t.typecode 
 2 from dba_types t 
 3 where t.type_name like 'SYSTP%';
 
no rows selected
 
 
-- Executing a query with collect
DB11G/XTENDER> select collect(level) from dual connect by level<=10;
 
COLLECT(LEVEL)
-------------------------------------------------------------------------
 
SYSTPZvGjVQTySRSjYVlHXyEE2Q==(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
 
1 row selected.
 
 
-- We check it again and observe that a new type SYSTP% has appeared
DB11G/XTENDER> select t.type_name,t.type_name,t.typecode 
 2 from dba_types t 
 3 where t.type_name like 'SYSTP%';
 
TYPE_NAME                      TYPE_NAME                      TYPECODE
------------------------------ ------------------------------ ------------
SYSTPZvGjVQTySRSjYVlHXyEE2Q==  SYSTPZvGjVQTySRSjYVlHXyEE2Q==  COLLECTION

And also we select data on it from sys.obj$. We will need it later:

DB11G/XTENDER> select obj#,type#,ctime,mtime,stime,status
  2  from sys.obj$ o$
  3  where o$.name = 'SYSTPZvGjVQTySRSjYVlHXyEE2Q==';
 
  OBJ#  TYPE# CTIME               MTIME               STIME              
------ ------ ------------------- ------------------- -------------------
103600     10 28.04.2012 01:02:35 28.04.2012 01:02:35 28.04.2012 01:02:35

This is a natural result as you can not return information to the client without having described it.

Fact #2

Now as we know that the type is created, it is interesting to know what will happen to this type: whether it will be removed either after fetch or after disconnecting the client? For example, the domain type was automatically dropped when the packet type was removed. Are we going to have a similar automatic remove here?

According to Bug 4033868: COLLECT FUNCTION LEAVES TEMPORARY SYS TYPES BEHIND this bug is fixed in “11.0”, but I am testing it on 11.2.0.1 and a basic check after disconnection showed, that this type exists until instance restart. In fact, it stays there even after that, but it is not displayed in dba_objects.

I will drop it manually, so I won’t have to restart the instance once again. This is absolutely similar to how oracle “removes” this type in 11.2:

DB11G/XTENDER> drop type "SYSTPZvGjVQTySRSjYVlHXyEE2Q==";
 
Type dropped.

And now lets check it:

DB11G/XTENDER> select * from dba_types
   where type_name='SYSTPZvGjVQTySRSjYVlHXyEE2Q==';
 
no rows selected

It seems like we have removed it, but what if we look in the sys.obj$:

DB11G/XTENDER> select obj#,type#,ctime,mtime,stime,status
  2  from sys.obj$ o$
  3  where o$.name = 'SYSTPZvGjVQTySRSjYVlHXyEE2Q==';
 
  OBJ#  TYPE# CTIME               MTIME               STIME              
------ ------ ------------------- ------------------- -------------------
103600     10 28.04.2012 01:02:35 28.04.2012 01:40:37 31.12.4712 23:59:59

As you can see, the object is still there, but with type#=10 and with “stime” equal to the last date of year 4712, and before that it was type#=13 and stime=mtime=ctime, and in 10.2 after manual drop of this type no entries were left. I will explain the correspondence of the fields from “sys.obj$” and “dba_objects” to clarify this: obj# – object_id, type# ~ type code, ctime,mtime,stime – created, last_ddl_time, timestamp respectively. By the “dba_objects” view code we will see that type# = 10 is supposedly “NON-EXISTENT” and displaying it is not necessary.

and (o.type# not in (1  /* INDEX - handled below */,
                      10 /* NON-EXISTENT */)

And the date being set to 31.12.4712 23:59:59 indicates its irrelevance – this future is too distant 🙂

Fact #3

“Collect” is a pretty buggy thing in general, as I have repeatedly noticed, and Metalink has information about lots of bugs related to “collect” (for example, Bug 11906197 “Parallel query with COLLECT function fails with ORA-7445/ORA-600.”,”Bug 8912282: COLLECT+UNIQUE+ORDER DOES NOT REMOVE DUPLICATES”, “Bug 6145841: ORA-600[KOLOGSF2] ON CAST(COLLECT(..)) CALL”,”Bug 11802848: CAST/COLLECT DOES NOT WORK IN VERSION 11.2.0.2 WITH TYPE SYS.DBMS_DEBUG_VC2COLL”, “Bug 6996176: SELECT COLLECT DISTINCT GROUP BY STATEMENT RETURNS DUPLICATE VALUES”)

Test table

create table test_parallel parallel 8 as
select mod(level,8) a, level b 
from dual 
connect by level<=1000;
create index IX_TEST_PARALLEL on TEST_PARALLEL (A);

[collapse]

Errors

DB11G/XTENDER> select/*+ PARALLEL(2)*/ cast(collect(a) as number_table) from test_parallel ;
select/*+ PARALLEL(2)*/ cast(collect(a) as number_table) from test_parallel
*
ERROR at line 1:
ORA-12801: error signaled in parallel query server P000
ORA-21710: argument is expecting a valid memory address of an object
 
Elapsed: 00:00:00.12
DB11G/XTENDER> select cast(collect(b) as number_table) from test_parallel group by a;
select cast(collect(b) as number_table) from test_parallel group by a
*
ERROR at line 1:
ORA-12805: parallel query server died unexpectedly
 
Elapsed: 00:00:17.57

[collapse]

I don’t experience such errors while using my slow aggregate. Here is a common dilemma: whether to use an unstable but speedy “collect”, or a slow aggregate of my own…

An example of an aggregate

create or replace type ncollect_type as object
(
  
  data            sys.ku$_objnumset,
   
  static function ODCIAggregateInitialize
    ( sctx in out ncollect_type )
    return number ,
 
  member function ODCIAggregateIterate
    ( self  in out ncollect_type ,
      val   in     number
    ) return number ,
     
  member function ODCIAggregateDelete
    (  self in out  ncollect_type, 
       val  in      number
    ) return number ,
  member function ODCIAggregateTerminate
    ( self        in  ncollect_type,
      returnval   out sys.ku$_objnumset,
      flags in number
    ) return number ,
     
  member function ODCIAggregateMerge
    ( self in out ncollect_type,
      ctx2 in     ncollect_type
    ) return number
)
/
create or replace type body ncollect_type is
 
  static function ODCIAggregateInitialize
  ( sctx in out ncollect_type )
  return number
  is
  begin
    sctx := ncollect_type( sys.ku$_objnumset()) ;
    return ODCIConst.Success ;
  end;
 
  member function ODCIAggregateIterate
  ( self  in out ncollect_type ,
    val   in     number
  ) return number
  is
  begin
    self.data:=self.data multiset union sys.ku$_objnumset(val);
    return ODCIConst.Success;
  end;
 
  member function ODCIAggregateDelete
  (  self in out  ncollect_type, 
     val  in      number
  ) return number
  is
  begin
    self.data:=self.data multiset except sys.ku$_objnumset(val);
    return ODCIConst.Success;
  end;
 
  member function ODCIAggregateTerminate
  ( self        in  ncollect_type ,
    returnval   out sys.ku$_objnumset ,
    flags       in  number
  ) return number
  is
  begin
    returnval:=self.data;
    return ODCIConst.Success;
  end;
   
  member function ODCIAggregateMerge
  ( self in out ncollect_type ,
    ctx2 in     ncollect_type
  ) return number
  is
  begin
    self.data := self.data multiset union ctx2.data;
    return ODCIConst.Success;
  end;
end;
/

[collapse]

And the results:

Variant	Time(sec)
select/+ NO_PARALLEL/ cast(collect(b) as number_table) from test_parallel group by a;	0.03
select/+ NO_PARALLEL/ ncollect(b) from test_parallel group by a	0.08
select ncollect(b) from test_parallel group by a;	0.07
select/+ NO_PARALLEL/ collect(a) from test_parallel;	0.02
select/+ NO_PARALLEL/ ncollect(a) from test_parallel	0.18
select ncollect(a) from test_parallel;	0.19
select/+ NO_PARALLEL/ collect(b) from test_parallel;	0.02
select/+ NO_PARALLEL/ ncollect(b) from test_parallel	0.18
select ncollect(b) from test_parallel;	0.06