SQL Apply vs UDF vs inline vs join

Matching a small text value for a data migration

All queries below return 18,000,101 records

Have a data migration exercise, with a fair number of old columns to be matched into new to find the replacement new ID for a reference table

 

Looking for an indicative mechanism for best performance (best is at the bottom)


 

SQL 2008 + MGMT Studio 2008

This uses a scalar UDF, takes ~7 mins

select
       pmIT.OID
      ,pmIT.RECORDTYPE
      ,dbo.getNewItemTypeIDFromOldAsText(pmIT.RECORDTYPE) as pmITMigratedItemTypeID
      into TC_5_2_1ITEMS_PMTEST
      from ArchwayStaffProductionBaseline.gladis.ITEM pmIT
      join ArchwayStaffProductionBaseline.gladis.LISTCONTROL pmLC on pmLC.OID = pmIT.LISTCONTROLOID
      where 1= 1
      and (pmLC.OID in              -- Migrated Job
                  (     select pmJI.ENTITYOID from ArchwayStaffProductionBaseline.gladis.JOBITEMS pmJI
                        where pmJI.ENTITYTYPE = 'List'
                  )
            OR pmLC.REVIEWSTATE = 'APPROVED'
      )

 

UDF:

-- ==================================================
-- Author: Lindsay
-- Create date: 28/02/2013
-- Description: Convert Old Textual Item Type to new Item Type ID
-- ==================================================
CREATE FUNCTION [dbo].getNewItemTypeIDFromOldAsText(@Input nvarchar(50)) 
       RETURNS int 
AS 
BEGIN; 
  DECLARE @Result int; 
  select @Result = ID from archway.dbo.RecordType rt
where rt.name =
case @Input
when 'ARTWORK' then 'Art Work'
when 'MAP_PLAN' then 'Map/Plan'
when 'MOVING_IMAGE' then 'Moving image'
when 'NOT_DETERMINED' then 'Not Determined'
when 'OBJECT' then 'Object'
when 'PHOTOGRAPH' then 'Photograph'
when 'SOUND_RECORDING' then 'Sound recording'
when 'TEXT' then 'Text'
when '' then 'Text'
when null then 'Text'
else @Input end
  RETURN @Result; 
END
GO
-- ==================================================
-- Author: Lindsay
-- Create date: 28/02/2013
-- Description: Convert Old Textual Item Type to new Item Type ID
-- ==================================================
CREATE FUNCTION [dbo].getNewItemTypeIDFromOldAsText(@Input nvarchar(50)) 
       RETURNS int 
AS 
BEGIN; 
  DECLARE @Result int; 
  select @Result = ID from archway.dbo.RecordType rt
where rt.name =
case @Input
when 'ARTWORK' then 'Art Work'
when 'MAP_PLAN' then 'Map/Plan'
when 'MOVING_IMAGE' then 'Moving image'
when 'NOT_DETERMINED' then 'Not Determined'
when 'OBJECT' then 'Object'
when 'PHOTOGRAPH' then 'Photograph'
when 'SOUND_RECORDING' then 'Sound recording'
when 'TEXT' then 'Text'
when '' then 'Text'
when null then 'Text'
else @Input end
  RETURN @Result; 
END
GO

 

 


 

The above uses the a scalar UDF  which have known issues causing low performance, a common workaround is to convert to a table udf to return a single row/value which is what I did next:

http://connect.microsoft.com/SQLServer/feedback/details/273443/the-scalar-expression-function-would-speed-performance-while-keeping-the-benefits-of-functions

Took 02:05 mins

select
       pmIT.OID
      ,pmIT.RECORDTYPE
      ,(select ID from dbo.getNewItemTypeIDFromOldAsTextTblUDF(pmIT.RECORDTYPE)) as pmITMigratedItemTypeID
      into TC_5_2_1ITEMS_PMTEST20
      from ArchwayStaffProductionBaseline.gladis.ITEM pmIT
      join ArchwayStaffProductionBaseline.gladis.LISTCONTROL pmLC on pmLC.OID = pmIT.LISTCONTROLOID
      where 1= 1
      and (pmLC.OID in              -- Migrated Job
                  (     select pmJI.ENTITYOID from ArchwayStaffProductionBaseline.gladis.JOBITEMS pmJI
                        where pmJI.ENTITYTYPE = 'List'
                  )
            OR pmLC.REVIEWSTATE = 'APPROVED'
      )

UDF

-- ==================================================
-- Author: Lindsay
-- Create date: 28/02/2013
-- Description: Convert Old Textual Item Type to new Item Type ID
-- ==================================================
CREATE function [dbo].getNewItemTypeIDFromOldAsTextTblUDF(@Input nvarchar(50)) 
       RETURNS table 
AS 
return (
  select ID from archway.dbo.RecordType rt
where rt.name =
case @Input
when 'ARTWORK' then 'Art Work'
when 'MAP_PLAN' then 'Map/Plan'
when 'MOVING_IMAGE' then 'Moving image'
when 'NOT_DETERMINED' then 'Not Determined'
when 'OBJECT' then 'Object'
when 'PHOTOGRAPH' then 'Photograph'
when 'SOUND_RECORDING' then 'Sound recording'
when 'TEXT' then 'Text'
when '' then 'Text'
when null then 'Text'
else @Input end
)
GO

 


 

Of course I like UDF's for encapsulation… but I'm looking for more performance..

Moving UDF getNewItemTypeIDFromOldAsText functionality inline

Took between 0:58 & 1:06 min over a couple of runs

select
       pmIT.OID
      ,pmIT.RECORDTYPE
 
      into TC_5_2_1ITEMS_PMTEST4
      from ArchwayStaffProductionBaseline.gladis.ITEM pmIT
      join ArchwayStaffProductionBaseline.gladis.LISTCONTROL pmLC on pmLC.OID = pmIT.LISTCONTROLOID
      outer apply (
            select ID as pmITMigratedItemTypeID
            from archway.dbo.RecordType rt
            where rt.name =
                  case pmIT.RECORDTYPE
                  when 'ARTWORK' then 'Art Work'
                  when 'MAP_PLAN' then 'Map/Plan'
                  when 'MOVING_IMAGE' then 'Moving image'
                  when 'NOT_DETERMINED' then 'Not Determined'
                  when 'OBJECT' then 'Object'
                  when 'PHOTOGRAPH' then 'Photograph'
                  when 'SOUND_RECORDING' then 'Sound recording'
                  when 'TEXT' then 'Text'
                  when '' then 'Text'
                  when null then 'Text'
                  ELSE NULL END    
      ) naRT
      where 1= 1
      and (pmLC.OID in              -- Migrated Job
                  (     select pmJI.ENTITYOID from ArchwayStaffProductionBaseline.gladis.JOBITEMS pmJI
                        where pmJI.ENTITYTYPE = 'List'
                  )
            OR pmLC.REVIEWSTATE = 'APPROVED'
      )

 


 

 

Next I created a table with an index on the text to be compared..

Took 0:54-0:55 min

select
       pmIT.OID
      ,pmIT.RECORDTYPE
     
      ,naRT.id as pmITMigratedItemTypeID
 
      into TC_5_2_1ITEMS_PMTEST7
      from ArchwayStaffProductionBaseline.gladis.ITEM pmIT
      join ArchwayStaffProductionBaseline.gladis.LISTCONTROL pmLC on pmLC.OID = pmIT.LISTCONTROLOID
      join MatchOldRecordTypeTextToNewID naRT on naRT.OldTypeAsText = ISNULL(pmIT.RECORDTYPE, '')
      where 1= 1
      and (pmLC.OID in              -- Migrated Job
                  (     select pmJI.ENTITYOID from ArchwayStaffProductionBaseline.gladis.JOBITEMS pmJI
                        where pmJI.ENTITYTYPE = 'List'
                  )
            OR pmLC.REVIEWSTATE = 'APPROVED'
      )

Creating the table

-- Start lookup table for migrating item
RECORDTYPE to na.RecordType -----------------------------------
 
IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[MatchOldRecordTypeTextToNewID]') AND type in (N'U'))
      DROP TABLE [dbo].MatchOldRecordTypeTextToNewID
     
select *
      into MatchOldRecordTypeTextToNewID
from
      (
      select
             cast(rt.id as int) as id           -- drop identity properties
            ,name             ,CASE rt.name
                  when 'Art Work' then 'ARTWORK'
                  when 'Map/Plan' then 'MAP_PLAN'
                  when 'Moving image' then 'MOVING_IMAGE'
                  when 'Not Determined' then 'NOT_DETERMINED'
                  when 'Object' then 'OBJECT'
                  when 'Photograph' then 'PHOTOGRAPH'
                  when 'Sound recording' then 'SOUND_RECORDING'
                  when 'Text' then 'TEXT'
                  end as OldTypeAsText
      from archway.dbo.RecordType rt
      ) t
where OldTypeAsText is not NULL                 -- filter out those not relevant to conversion
     
insert into dbo.MatchOldRecordTypeTextToNewID
select
       (select ID from archway.dbo.RecordType rt where name = 'Text') as ID
      ,'Text' as name
      ,'' as OldTypeAsText
     
create unique index idx_MatchOldRecordTypeTextToNewID on MatchOldRecordTypeTextToNewID(OldTypeAsText);
-- End lookup table for migrating item RECORDTYPE to na.RecordType---