T SQL Table Valued Function to Split a Column on commas
I wrote a Table Valued Function in Microsoft SQL Server 2008 to take a comma delimited column in a database to spit out separate rows for each value.
Ex: "one,two,three,four" would return a new table with only one column containing the following values:
one
two
three
four
Does this code look error prone to you guys? When I test it with
SELECT * FROM utvf_Split('one,two,three,four',',') 
it just runs forever and never returns anything. This is getting really disheartening especially since there are no built in split functions on MSSQL server (WHY WHY WHY?!) and all the similar functions I've found on the web are absolute trash or simply irrelevant to what I'm trying to do.
Here is the function:
USE *myDBname*
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
ALTER FUNCTION [dbo].[utvf_SPlit] (@String VARCHAR(MAX), @delimiter CHAR)
RETURNS @SplitValues TABLE
(
    Asset_ID VARCHAR(MAX) NOT NULL
)
AS
BEGIN
            DECLARE @FoundIndex INT
            DECLARE @ReturnValue VARCHAR(MAX)
            SET @FoundIndex = CHARINDEX(@delimiter, @String)
            WHILE (@FoundIndex <> 0)
            BEGIN
                  DECLARE @NextFoundIndex INT
                  SET @NextFoundIndex = CHARINDEX(@delimiter, @String, @FoundIndex+1)
                  SET @ReturnValue = SUBSTRING(@String, @FoundIndex,@NextFoundIndex-@FoundIndex)
                  SET @FoundIndex = CHARINDEX(@delimiter, @String)
                  INSERT @SplitValues (Asset_ID) VALUES (@ReturnValue)
            END
            RETURN
END
sql-server sql-server-2008 t-sql functions
add a comment |
I wrote a Table Valued Function in Microsoft SQL Server 2008 to take a comma delimited column in a database to spit out separate rows for each value.
Ex: "one,two,three,four" would return a new table with only one column containing the following values:
one
two
three
four
Does this code look error prone to you guys? When I test it with
SELECT * FROM utvf_Split('one,two,three,four',',') 
it just runs forever and never returns anything. This is getting really disheartening especially since there are no built in split functions on MSSQL server (WHY WHY WHY?!) and all the similar functions I've found on the web are absolute trash or simply irrelevant to what I'm trying to do.
Here is the function:
USE *myDBname*
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
ALTER FUNCTION [dbo].[utvf_SPlit] (@String VARCHAR(MAX), @delimiter CHAR)
RETURNS @SplitValues TABLE
(
    Asset_ID VARCHAR(MAX) NOT NULL
)
AS
BEGIN
            DECLARE @FoundIndex INT
            DECLARE @ReturnValue VARCHAR(MAX)
            SET @FoundIndex = CHARINDEX(@delimiter, @String)
            WHILE (@FoundIndex <> 0)
            BEGIN
                  DECLARE @NextFoundIndex INT
                  SET @NextFoundIndex = CHARINDEX(@delimiter, @String, @FoundIndex+1)
                  SET @ReturnValue = SUBSTRING(@String, @FoundIndex,@NextFoundIndex-@FoundIndex)
                  SET @FoundIndex = CHARINDEX(@delimiter, @String)
                  INSERT @SplitValues (Asset_ID) VALUES (@ReturnValue)
            END
            RETURN
END
sql-server sql-server-2008 t-sql functions
add a comment |
I wrote a Table Valued Function in Microsoft SQL Server 2008 to take a comma delimited column in a database to spit out separate rows for each value.
Ex: "one,two,three,four" would return a new table with only one column containing the following values:
one
two
three
four
Does this code look error prone to you guys? When I test it with
SELECT * FROM utvf_Split('one,two,three,four',',') 
it just runs forever and never returns anything. This is getting really disheartening especially since there are no built in split functions on MSSQL server (WHY WHY WHY?!) and all the similar functions I've found on the web are absolute trash or simply irrelevant to what I'm trying to do.
Here is the function:
USE *myDBname*
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
ALTER FUNCTION [dbo].[utvf_SPlit] (@String VARCHAR(MAX), @delimiter CHAR)
RETURNS @SplitValues TABLE
(
    Asset_ID VARCHAR(MAX) NOT NULL
)
AS
BEGIN
            DECLARE @FoundIndex INT
            DECLARE @ReturnValue VARCHAR(MAX)
            SET @FoundIndex = CHARINDEX(@delimiter, @String)
            WHILE (@FoundIndex <> 0)
            BEGIN
                  DECLARE @NextFoundIndex INT
                  SET @NextFoundIndex = CHARINDEX(@delimiter, @String, @FoundIndex+1)
                  SET @ReturnValue = SUBSTRING(@String, @FoundIndex,@NextFoundIndex-@FoundIndex)
                  SET @FoundIndex = CHARINDEX(@delimiter, @String)
                  INSERT @SplitValues (Asset_ID) VALUES (@ReturnValue)
            END
            RETURN
END
sql-server sql-server-2008 t-sql functions
I wrote a Table Valued Function in Microsoft SQL Server 2008 to take a comma delimited column in a database to spit out separate rows for each value.
Ex: "one,two,three,four" would return a new table with only one column containing the following values:
one
two
three
four
Does this code look error prone to you guys? When I test it with
SELECT * FROM utvf_Split('one,two,three,four',',') 
it just runs forever and never returns anything. This is getting really disheartening especially since there are no built in split functions on MSSQL server (WHY WHY WHY?!) and all the similar functions I've found on the web are absolute trash or simply irrelevant to what I'm trying to do.
Here is the function:
USE *myDBname*
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
ALTER FUNCTION [dbo].[utvf_SPlit] (@String VARCHAR(MAX), @delimiter CHAR)
RETURNS @SplitValues TABLE
(
    Asset_ID VARCHAR(MAX) NOT NULL
)
AS
BEGIN
            DECLARE @FoundIndex INT
            DECLARE @ReturnValue VARCHAR(MAX)
            SET @FoundIndex = CHARINDEX(@delimiter, @String)
            WHILE (@FoundIndex <> 0)
            BEGIN
                  DECLARE @NextFoundIndex INT
                  SET @NextFoundIndex = CHARINDEX(@delimiter, @String, @FoundIndex+1)
                  SET @ReturnValue = SUBSTRING(@String, @FoundIndex,@NextFoundIndex-@FoundIndex)
                  SET @FoundIndex = CHARINDEX(@delimiter, @String)
                  INSERT @SplitValues (Asset_ID) VALUES (@ReturnValue)
            END
            RETURN
END
sql-server sql-server-2008 t-sql functions
sql-server sql-server-2008 t-sql functions
edited Jan 9 '18 at 15:00
Aaron Bertrand♦
150k18286484
150k18286484
asked Jul 18 '12 at 15:42
OvetSOvetS
53114
53114
add a comment |
add a comment |
                                5 Answers
                            5
                        
active
oldest
votes
Re-worked it slightly...
DECLARE @FoundIndex INT
DECLARE @ReturnValue VARCHAR(MAX)
SET @FoundIndex = CHARINDEX(@delimiter, @String)
WHILE (@FoundIndex <> 0)
BEGIN
      SET @ReturnValue = SUBSTRING(@String, 0, @FoundIndex)
      INSERT @SplitValues (Asset_ID) VALUES (@ReturnValue)
      SET @String = SUBSTRING(@String, @FoundIndex + 1, len(@String) - @FoundIndex)
      SET @FoundIndex = CHARINDEX(@delimiter, @String)
END
INSERT @SplitValues (Asset_ID) VALUES (@String)
RETURN
add a comment |
I wouldn't do this with a loop; there are much better alternatives. By far the best, when you have to split, is CLR, and Adam Machanic's approach is the fastest I've tested.
Next best approach IMHO, if you can't implement CLR, is a numbers table:
SET NOCOUNT ON;
DECLARE @UpperLimit INT = 1000000;
WITH n AS
(
    SELECT
        x = ROW_NUMBER() OVER (ORDER BY s1.[object_id])
    FROM       sys.all_objects AS s1
    CROSS JOIN sys.all_objects AS s2
    CROSS JOIN sys.all_objects AS s3
)
SELECT Number = x
  INTO dbo.Numbers
  FROM n
  WHERE x BETWEEN 1 AND @UpperLimit
  OPTION (MAXDOP 1); -- protecting from Paul White's observation
GO
CREATE UNIQUE CLUSTERED INDEX n ON dbo.Numbers(Number) 
    --WITH (DATA_COMPRESSION = PAGE);
GO
... which allows this function:
CREATE FUNCTION dbo.SplitStrings_Numbers
(
   @List       NVARCHAR(MAX),
   @Delimiter  NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
   RETURN
   (
       SELECT Item = SUBSTRING(@List, Number, 
         CHARINDEX(@Delimiter, @List + @Delimiter, Number) - Number)
       FROM dbo.Numbers
       WHERE Number <= CONVERT(INT, LEN(@List))
         AND SUBSTRING(@Delimiter + @List, Number, 1) = @Delimiter
   );
GO
I believe all of these will perform better than the function you have, when you get it working, especially since they are inline instead of multi-statement. I haven't investigated why yours isn't working, because I don't think it's worth it to get that function working.
But that all said...
Since you are using SQL Server 2008, is there a reason you need to split in the first place? I would rather use a TVP for this:
CREATE TYPE dbo.strings AS TABLE
(
  string NVARCHAR(4000)
);
Now you can accept this as a parameter to your stored procedures, and use the contents just like you would use a TVF:
CREATE PROCEDURE dbo.foo
  @strings dbo.strings READONLY
AS
BEGIN
  SET NOCOUNT ON;
  SELECT Asset_ID = string FROM @strings;
  -- SELECT Asset_ID FROM dbo.utvf_split(@other_param, ',');
END
And you can pass a TVP directly from C# etc. as a DataTable. This will almost certainly outperform any of the solutions above, especially if you are building a comma-separated string in your app specifically so that your stored procedure can call a TVP to split it apart again. For a lot more info on TVPs see Erland Sommarskog's great article.
More recently, I've written a series on splitting strings:
- http://sqlperformance.com/2012/07/t-sql-queries/split-strings
- http://sqlperformance.com/2012/08/t-sql-queries/splitting-strings-follow-up
- http://sqlperformance.com/2012/08/t-sql-queries/splitting-strings-now-with-less-t-sql
And if you are using SQL Server 2016 or newer (or Azure SQL Database), there is a new STRING_SPLIT function, which I blogged about here:
- Performance Surprises and Assumptions : STRING_SPLIT()
- STRING_SPLIT() in SQL Server 2016 : Follow-Up #1
- STRING_SPLIT() in SQL Server 2016 : Follow-Up #2
add a comment |
SQL Server 2016 introduced the STRING_SPLIT() function. It has two parameters - the string to be chopped up and the separator. The output is one row per value returned.
For the given example
SELECT * FROM string_split('one,two,three,four', ',');
will return
value
------------------
one
two
three
four
add a comment |
I have been using and loving Jeff Moden's string splitter just about since it came out.
Tally OH! An Improved SQL 8K “CSV Splitter” Function
CREATE FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(@pString VARCHAR(8000), @pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(@pString, l.N1, l.L1)
FROM cteLen l
;
add a comment |
CREATE FUNCTION [dbo].[fnSplit]
(
    @sInputList VARCHAR(8000),         -- List of delimited items
    @sDelimiter VARCHAR(8000) = ','    -- delimiter that separates items
)
RETURNS @List TABLE (colData VARCHAR(8000))
BEGIN
DECLARE @sItem VARCHAR(8000)
    WHILE CHARINDEX(@sDelimiter,@sInputList,0) <> 0
    BEGIN
        SELECT @sItem=RTRIM(LTRIM(SUBSTRING(@sInputList,1,CHARINDEX
(@sDelimiter,@sInputList,0)-1))),
        @sInputList=RTRIM(LTRIM(SUBSTRING(@sInputList,CHARINDEX(@sDelimiter,@sInputList,0)
+LEN(@sDelimiter),LEN(@sInputList))))
        IF LEN(@sItem) > 0
            INSERT INTO @List SELECT @sItem
        END
        IF LEN(@sInputList) > 0
            INSERT INTO @List SELECT @sInputList -- Put the last item in
        RETURN
    END
--TEST
--Example 1: select * from fnSplit('1,22,333,444,,5555,666', ',')
--Example 2: select * from fnSplit('1##22#333##444','##')  --note second colData has embedded #
--Example 3: select * from fnSplit('1 22 333 444  5555 666', ' ')

add a comment |
                                5 Answers
                            5
                        
active
oldest
votes
                                5 Answers
                            5
                        
active
oldest
votes
active
oldest
votes
active
oldest
votes
Re-worked it slightly...
DECLARE @FoundIndex INT
DECLARE @ReturnValue VARCHAR(MAX)
SET @FoundIndex = CHARINDEX(@delimiter, @String)
WHILE (@FoundIndex <> 0)
BEGIN
      SET @ReturnValue = SUBSTRING(@String, 0, @FoundIndex)
      INSERT @SplitValues (Asset_ID) VALUES (@ReturnValue)
      SET @String = SUBSTRING(@String, @FoundIndex + 1, len(@String) - @FoundIndex)
      SET @FoundIndex = CHARINDEX(@delimiter, @String)
END
INSERT @SplitValues (Asset_ID) VALUES (@String)
RETURN
add a comment |
Re-worked it slightly...
DECLARE @FoundIndex INT
DECLARE @ReturnValue VARCHAR(MAX)
SET @FoundIndex = CHARINDEX(@delimiter, @String)
WHILE (@FoundIndex <> 0)
BEGIN
      SET @ReturnValue = SUBSTRING(@String, 0, @FoundIndex)
      INSERT @SplitValues (Asset_ID) VALUES (@ReturnValue)
      SET @String = SUBSTRING(@String, @FoundIndex + 1, len(@String) - @FoundIndex)
      SET @FoundIndex = CHARINDEX(@delimiter, @String)
END
INSERT @SplitValues (Asset_ID) VALUES (@String)
RETURN
add a comment |
Re-worked it slightly...
DECLARE @FoundIndex INT
DECLARE @ReturnValue VARCHAR(MAX)
SET @FoundIndex = CHARINDEX(@delimiter, @String)
WHILE (@FoundIndex <> 0)
BEGIN
      SET @ReturnValue = SUBSTRING(@String, 0, @FoundIndex)
      INSERT @SplitValues (Asset_ID) VALUES (@ReturnValue)
      SET @String = SUBSTRING(@String, @FoundIndex + 1, len(@String) - @FoundIndex)
      SET @FoundIndex = CHARINDEX(@delimiter, @String)
END
INSERT @SplitValues (Asset_ID) VALUES (@String)
RETURN
Re-worked it slightly...
DECLARE @FoundIndex INT
DECLARE @ReturnValue VARCHAR(MAX)
SET @FoundIndex = CHARINDEX(@delimiter, @String)
WHILE (@FoundIndex <> 0)
BEGIN
      SET @ReturnValue = SUBSTRING(@String, 0, @FoundIndex)
      INSERT @SplitValues (Asset_ID) VALUES (@ReturnValue)
      SET @String = SUBSTRING(@String, @FoundIndex + 1, len(@String) - @FoundIndex)
      SET @FoundIndex = CHARINDEX(@delimiter, @String)
END
INSERT @SplitValues (Asset_ID) VALUES (@String)
RETURN
answered Jul 18 '12 at 15:52
Derek KrommDerek Kromm
1,3711016
1,3711016
add a comment |
add a comment |
I wouldn't do this with a loop; there are much better alternatives. By far the best, when you have to split, is CLR, and Adam Machanic's approach is the fastest I've tested.
Next best approach IMHO, if you can't implement CLR, is a numbers table:
SET NOCOUNT ON;
DECLARE @UpperLimit INT = 1000000;
WITH n AS
(
    SELECT
        x = ROW_NUMBER() OVER (ORDER BY s1.[object_id])
    FROM       sys.all_objects AS s1
    CROSS JOIN sys.all_objects AS s2
    CROSS JOIN sys.all_objects AS s3
)
SELECT Number = x
  INTO dbo.Numbers
  FROM n
  WHERE x BETWEEN 1 AND @UpperLimit
  OPTION (MAXDOP 1); -- protecting from Paul White's observation
GO
CREATE UNIQUE CLUSTERED INDEX n ON dbo.Numbers(Number) 
    --WITH (DATA_COMPRESSION = PAGE);
GO
... which allows this function:
CREATE FUNCTION dbo.SplitStrings_Numbers
(
   @List       NVARCHAR(MAX),
   @Delimiter  NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
   RETURN
   (
       SELECT Item = SUBSTRING(@List, Number, 
         CHARINDEX(@Delimiter, @List + @Delimiter, Number) - Number)
       FROM dbo.Numbers
       WHERE Number <= CONVERT(INT, LEN(@List))
         AND SUBSTRING(@Delimiter + @List, Number, 1) = @Delimiter
   );
GO
I believe all of these will perform better than the function you have, when you get it working, especially since they are inline instead of multi-statement. I haven't investigated why yours isn't working, because I don't think it's worth it to get that function working.
But that all said...
Since you are using SQL Server 2008, is there a reason you need to split in the first place? I would rather use a TVP for this:
CREATE TYPE dbo.strings AS TABLE
(
  string NVARCHAR(4000)
);
Now you can accept this as a parameter to your stored procedures, and use the contents just like you would use a TVF:
CREATE PROCEDURE dbo.foo
  @strings dbo.strings READONLY
AS
BEGIN
  SET NOCOUNT ON;
  SELECT Asset_ID = string FROM @strings;
  -- SELECT Asset_ID FROM dbo.utvf_split(@other_param, ',');
END
And you can pass a TVP directly from C# etc. as a DataTable. This will almost certainly outperform any of the solutions above, especially if you are building a comma-separated string in your app specifically so that your stored procedure can call a TVP to split it apart again. For a lot more info on TVPs see Erland Sommarskog's great article.
More recently, I've written a series on splitting strings:
- http://sqlperformance.com/2012/07/t-sql-queries/split-strings
- http://sqlperformance.com/2012/08/t-sql-queries/splitting-strings-follow-up
- http://sqlperformance.com/2012/08/t-sql-queries/splitting-strings-now-with-less-t-sql
And if you are using SQL Server 2016 or newer (or Azure SQL Database), there is a new STRING_SPLIT function, which I blogged about here:
- Performance Surprises and Assumptions : STRING_SPLIT()
- STRING_SPLIT() in SQL Server 2016 : Follow-Up #1
- STRING_SPLIT() in SQL Server 2016 : Follow-Up #2
add a comment |
I wouldn't do this with a loop; there are much better alternatives. By far the best, when you have to split, is CLR, and Adam Machanic's approach is the fastest I've tested.
Next best approach IMHO, if you can't implement CLR, is a numbers table:
SET NOCOUNT ON;
DECLARE @UpperLimit INT = 1000000;
WITH n AS
(
    SELECT
        x = ROW_NUMBER() OVER (ORDER BY s1.[object_id])
    FROM       sys.all_objects AS s1
    CROSS JOIN sys.all_objects AS s2
    CROSS JOIN sys.all_objects AS s3
)
SELECT Number = x
  INTO dbo.Numbers
  FROM n
  WHERE x BETWEEN 1 AND @UpperLimit
  OPTION (MAXDOP 1); -- protecting from Paul White's observation
GO
CREATE UNIQUE CLUSTERED INDEX n ON dbo.Numbers(Number) 
    --WITH (DATA_COMPRESSION = PAGE);
GO
... which allows this function:
CREATE FUNCTION dbo.SplitStrings_Numbers
(
   @List       NVARCHAR(MAX),
   @Delimiter  NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
   RETURN
   (
       SELECT Item = SUBSTRING(@List, Number, 
         CHARINDEX(@Delimiter, @List + @Delimiter, Number) - Number)
       FROM dbo.Numbers
       WHERE Number <= CONVERT(INT, LEN(@List))
         AND SUBSTRING(@Delimiter + @List, Number, 1) = @Delimiter
   );
GO
I believe all of these will perform better than the function you have, when you get it working, especially since they are inline instead of multi-statement. I haven't investigated why yours isn't working, because I don't think it's worth it to get that function working.
But that all said...
Since you are using SQL Server 2008, is there a reason you need to split in the first place? I would rather use a TVP for this:
CREATE TYPE dbo.strings AS TABLE
(
  string NVARCHAR(4000)
);
Now you can accept this as a parameter to your stored procedures, and use the contents just like you would use a TVF:
CREATE PROCEDURE dbo.foo
  @strings dbo.strings READONLY
AS
BEGIN
  SET NOCOUNT ON;
  SELECT Asset_ID = string FROM @strings;
  -- SELECT Asset_ID FROM dbo.utvf_split(@other_param, ',');
END
And you can pass a TVP directly from C# etc. as a DataTable. This will almost certainly outperform any of the solutions above, especially if you are building a comma-separated string in your app specifically so that your stored procedure can call a TVP to split it apart again. For a lot more info on TVPs see Erland Sommarskog's great article.
More recently, I've written a series on splitting strings:
- http://sqlperformance.com/2012/07/t-sql-queries/split-strings
- http://sqlperformance.com/2012/08/t-sql-queries/splitting-strings-follow-up
- http://sqlperformance.com/2012/08/t-sql-queries/splitting-strings-now-with-less-t-sql
And if you are using SQL Server 2016 or newer (or Azure SQL Database), there is a new STRING_SPLIT function, which I blogged about here:
- Performance Surprises and Assumptions : STRING_SPLIT()
- STRING_SPLIT() in SQL Server 2016 : Follow-Up #1
- STRING_SPLIT() in SQL Server 2016 : Follow-Up #2
add a comment |
I wouldn't do this with a loop; there are much better alternatives. By far the best, when you have to split, is CLR, and Adam Machanic's approach is the fastest I've tested.
Next best approach IMHO, if you can't implement CLR, is a numbers table:
SET NOCOUNT ON;
DECLARE @UpperLimit INT = 1000000;
WITH n AS
(
    SELECT
        x = ROW_NUMBER() OVER (ORDER BY s1.[object_id])
    FROM       sys.all_objects AS s1
    CROSS JOIN sys.all_objects AS s2
    CROSS JOIN sys.all_objects AS s3
)
SELECT Number = x
  INTO dbo.Numbers
  FROM n
  WHERE x BETWEEN 1 AND @UpperLimit
  OPTION (MAXDOP 1); -- protecting from Paul White's observation
GO
CREATE UNIQUE CLUSTERED INDEX n ON dbo.Numbers(Number) 
    --WITH (DATA_COMPRESSION = PAGE);
GO
... which allows this function:
CREATE FUNCTION dbo.SplitStrings_Numbers
(
   @List       NVARCHAR(MAX),
   @Delimiter  NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
   RETURN
   (
       SELECT Item = SUBSTRING(@List, Number, 
         CHARINDEX(@Delimiter, @List + @Delimiter, Number) - Number)
       FROM dbo.Numbers
       WHERE Number <= CONVERT(INT, LEN(@List))
         AND SUBSTRING(@Delimiter + @List, Number, 1) = @Delimiter
   );
GO
I believe all of these will perform better than the function you have, when you get it working, especially since they are inline instead of multi-statement. I haven't investigated why yours isn't working, because I don't think it's worth it to get that function working.
But that all said...
Since you are using SQL Server 2008, is there a reason you need to split in the first place? I would rather use a TVP for this:
CREATE TYPE dbo.strings AS TABLE
(
  string NVARCHAR(4000)
);
Now you can accept this as a parameter to your stored procedures, and use the contents just like you would use a TVF:
CREATE PROCEDURE dbo.foo
  @strings dbo.strings READONLY
AS
BEGIN
  SET NOCOUNT ON;
  SELECT Asset_ID = string FROM @strings;
  -- SELECT Asset_ID FROM dbo.utvf_split(@other_param, ',');
END
And you can pass a TVP directly from C# etc. as a DataTable. This will almost certainly outperform any of the solutions above, especially if you are building a comma-separated string in your app specifically so that your stored procedure can call a TVP to split it apart again. For a lot more info on TVPs see Erland Sommarskog's great article.
More recently, I've written a series on splitting strings:
- http://sqlperformance.com/2012/07/t-sql-queries/split-strings
- http://sqlperformance.com/2012/08/t-sql-queries/splitting-strings-follow-up
- http://sqlperformance.com/2012/08/t-sql-queries/splitting-strings-now-with-less-t-sql
And if you are using SQL Server 2016 or newer (or Azure SQL Database), there is a new STRING_SPLIT function, which I blogged about here:
- Performance Surprises and Assumptions : STRING_SPLIT()
- STRING_SPLIT() in SQL Server 2016 : Follow-Up #1
- STRING_SPLIT() in SQL Server 2016 : Follow-Up #2
I wouldn't do this with a loop; there are much better alternatives. By far the best, when you have to split, is CLR, and Adam Machanic's approach is the fastest I've tested.
Next best approach IMHO, if you can't implement CLR, is a numbers table:
SET NOCOUNT ON;
DECLARE @UpperLimit INT = 1000000;
WITH n AS
(
    SELECT
        x = ROW_NUMBER() OVER (ORDER BY s1.[object_id])
    FROM       sys.all_objects AS s1
    CROSS JOIN sys.all_objects AS s2
    CROSS JOIN sys.all_objects AS s3
)
SELECT Number = x
  INTO dbo.Numbers
  FROM n
  WHERE x BETWEEN 1 AND @UpperLimit
  OPTION (MAXDOP 1); -- protecting from Paul White's observation
GO
CREATE UNIQUE CLUSTERED INDEX n ON dbo.Numbers(Number) 
    --WITH (DATA_COMPRESSION = PAGE);
GO
... which allows this function:
CREATE FUNCTION dbo.SplitStrings_Numbers
(
   @List       NVARCHAR(MAX),
   @Delimiter  NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
   RETURN
   (
       SELECT Item = SUBSTRING(@List, Number, 
         CHARINDEX(@Delimiter, @List + @Delimiter, Number) - Number)
       FROM dbo.Numbers
       WHERE Number <= CONVERT(INT, LEN(@List))
         AND SUBSTRING(@Delimiter + @List, Number, 1) = @Delimiter
   );
GO
I believe all of these will perform better than the function you have, when you get it working, especially since they are inline instead of multi-statement. I haven't investigated why yours isn't working, because I don't think it's worth it to get that function working.
But that all said...
Since you are using SQL Server 2008, is there a reason you need to split in the first place? I would rather use a TVP for this:
CREATE TYPE dbo.strings AS TABLE
(
  string NVARCHAR(4000)
);
Now you can accept this as a parameter to your stored procedures, and use the contents just like you would use a TVF:
CREATE PROCEDURE dbo.foo
  @strings dbo.strings READONLY
AS
BEGIN
  SET NOCOUNT ON;
  SELECT Asset_ID = string FROM @strings;
  -- SELECT Asset_ID FROM dbo.utvf_split(@other_param, ',');
END
And you can pass a TVP directly from C# etc. as a DataTable. This will almost certainly outperform any of the solutions above, especially if you are building a comma-separated string in your app specifically so that your stored procedure can call a TVP to split it apart again. For a lot more info on TVPs see Erland Sommarskog's great article.
More recently, I've written a series on splitting strings:
- http://sqlperformance.com/2012/07/t-sql-queries/split-strings
- http://sqlperformance.com/2012/08/t-sql-queries/splitting-strings-follow-up
- http://sqlperformance.com/2012/08/t-sql-queries/splitting-strings-now-with-less-t-sql
And if you are using SQL Server 2016 or newer (or Azure SQL Database), there is a new STRING_SPLIT function, which I blogged about here:
- Performance Surprises and Assumptions : STRING_SPLIT()
- STRING_SPLIT() in SQL Server 2016 : Follow-Up #1
- STRING_SPLIT() in SQL Server 2016 : Follow-Up #2
edited Nov 13 '18 at 22:55
Community♦
1
1
answered Jul 18 '12 at 15:56
Aaron Bertrand♦Aaron Bertrand
150k18286484
150k18286484
add a comment |
add a comment |
SQL Server 2016 introduced the STRING_SPLIT() function. It has two parameters - the string to be chopped up and the separator. The output is one row per value returned.
For the given example
SELECT * FROM string_split('one,two,three,four', ',');
will return
value
------------------
one
two
three
four
add a comment |
SQL Server 2016 introduced the STRING_SPLIT() function. It has two parameters - the string to be chopped up and the separator. The output is one row per value returned.
For the given example
SELECT * FROM string_split('one,two,three,four', ',');
will return
value
------------------
one
two
three
four
add a comment |
SQL Server 2016 introduced the STRING_SPLIT() function. It has two parameters - the string to be chopped up and the separator. The output is one row per value returned.
For the given example
SELECT * FROM string_split('one,two,three,four', ',');
will return
value
------------------
one
two
three
four
SQL Server 2016 introduced the STRING_SPLIT() function. It has two parameters - the string to be chopped up and the separator. The output is one row per value returned.
For the given example
SELECT * FROM string_split('one,two,three,four', ',');
will return
value
------------------
one
two
three
four
answered Nov 13 '17 at 9:02


Michael GreenMichael Green
14.4k83060
14.4k83060
add a comment |
add a comment |
I have been using and loving Jeff Moden's string splitter just about since it came out.
Tally OH! An Improved SQL 8K “CSV Splitter” Function
CREATE FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(@pString VARCHAR(8000), @pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(@pString, l.N1, l.L1)
FROM cteLen l
;
add a comment |
I have been using and loving Jeff Moden's string splitter just about since it came out.
Tally OH! An Improved SQL 8K “CSV Splitter” Function
CREATE FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(@pString VARCHAR(8000), @pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(@pString, l.N1, l.L1)
FROM cteLen l
;
add a comment |
I have been using and loving Jeff Moden's string splitter just about since it came out.
Tally OH! An Improved SQL 8K “CSV Splitter” Function
CREATE FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(@pString VARCHAR(8000), @pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(@pString, l.N1, l.L1)
FROM cteLen l
;
I have been using and loving Jeff Moden's string splitter just about since it came out.
Tally OH! An Improved SQL 8K “CSV Splitter” Function
CREATE FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(@pString VARCHAR(8000), @pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(@pString, l.N1, l.L1)
FROM cteLen l
;
edited Nov 30 '17 at 13:01
Paul White♦
50.2k14267431
50.2k14267431
answered Feb 2 '15 at 18:44


Erik DarlingErik Darling
21.3k1264104
21.3k1264104
add a comment |
add a comment |
CREATE FUNCTION [dbo].[fnSplit]
(
    @sInputList VARCHAR(8000),         -- List of delimited items
    @sDelimiter VARCHAR(8000) = ','    -- delimiter that separates items
)
RETURNS @List TABLE (colData VARCHAR(8000))
BEGIN
DECLARE @sItem VARCHAR(8000)
    WHILE CHARINDEX(@sDelimiter,@sInputList,0) <> 0
    BEGIN
        SELECT @sItem=RTRIM(LTRIM(SUBSTRING(@sInputList,1,CHARINDEX
(@sDelimiter,@sInputList,0)-1))),
        @sInputList=RTRIM(LTRIM(SUBSTRING(@sInputList,CHARINDEX(@sDelimiter,@sInputList,0)
+LEN(@sDelimiter),LEN(@sInputList))))
        IF LEN(@sItem) > 0
            INSERT INTO @List SELECT @sItem
        END
        IF LEN(@sInputList) > 0
            INSERT INTO @List SELECT @sInputList -- Put the last item in
        RETURN
    END
--TEST
--Example 1: select * from fnSplit('1,22,333,444,,5555,666', ',')
--Example 2: select * from fnSplit('1##22#333##444','##')  --note second colData has embedded #
--Example 3: select * from fnSplit('1 22 333 444  5555 666', ' ')

add a comment |
CREATE FUNCTION [dbo].[fnSplit]
(
    @sInputList VARCHAR(8000),         -- List of delimited items
    @sDelimiter VARCHAR(8000) = ','    -- delimiter that separates items
)
RETURNS @List TABLE (colData VARCHAR(8000))
BEGIN
DECLARE @sItem VARCHAR(8000)
    WHILE CHARINDEX(@sDelimiter,@sInputList,0) <> 0
    BEGIN
        SELECT @sItem=RTRIM(LTRIM(SUBSTRING(@sInputList,1,CHARINDEX
(@sDelimiter,@sInputList,0)-1))),
        @sInputList=RTRIM(LTRIM(SUBSTRING(@sInputList,CHARINDEX(@sDelimiter,@sInputList,0)
+LEN(@sDelimiter),LEN(@sInputList))))
        IF LEN(@sItem) > 0
            INSERT INTO @List SELECT @sItem
        END
        IF LEN(@sInputList) > 0
            INSERT INTO @List SELECT @sInputList -- Put the last item in
        RETURN
    END
--TEST
--Example 1: select * from fnSplit('1,22,333,444,,5555,666', ',')
--Example 2: select * from fnSplit('1##22#333##444','##')  --note second colData has embedded #
--Example 3: select * from fnSplit('1 22 333 444  5555 666', ' ')

add a comment |
CREATE FUNCTION [dbo].[fnSplit]
(
    @sInputList VARCHAR(8000),         -- List of delimited items
    @sDelimiter VARCHAR(8000) = ','    -- delimiter that separates items
)
RETURNS @List TABLE (colData VARCHAR(8000))
BEGIN
DECLARE @sItem VARCHAR(8000)
    WHILE CHARINDEX(@sDelimiter,@sInputList,0) <> 0
    BEGIN
        SELECT @sItem=RTRIM(LTRIM(SUBSTRING(@sInputList,1,CHARINDEX
(@sDelimiter,@sInputList,0)-1))),
        @sInputList=RTRIM(LTRIM(SUBSTRING(@sInputList,CHARINDEX(@sDelimiter,@sInputList,0)
+LEN(@sDelimiter),LEN(@sInputList))))
        IF LEN(@sItem) > 0
            INSERT INTO @List SELECT @sItem
        END
        IF LEN(@sInputList) > 0
            INSERT INTO @List SELECT @sInputList -- Put the last item in
        RETURN
    END
--TEST
--Example 1: select * from fnSplit('1,22,333,444,,5555,666', ',')
--Example 2: select * from fnSplit('1##22#333##444','##')  --note second colData has embedded #
--Example 3: select * from fnSplit('1 22 333 444  5555 666', ' ')

CREATE FUNCTION [dbo].[fnSplit]
(
    @sInputList VARCHAR(8000),         -- List of delimited items
    @sDelimiter VARCHAR(8000) = ','    -- delimiter that separates items
)
RETURNS @List TABLE (colData VARCHAR(8000))
BEGIN
DECLARE @sItem VARCHAR(8000)
    WHILE CHARINDEX(@sDelimiter,@sInputList,0) <> 0
    BEGIN
        SELECT @sItem=RTRIM(LTRIM(SUBSTRING(@sInputList,1,CHARINDEX
(@sDelimiter,@sInputList,0)-1))),
        @sInputList=RTRIM(LTRIM(SUBSTRING(@sInputList,CHARINDEX(@sDelimiter,@sInputList,0)
+LEN(@sDelimiter),LEN(@sInputList))))
        IF LEN(@sItem) > 0
            INSERT INTO @List SELECT @sItem
        END
        IF LEN(@sInputList) > 0
            INSERT INTO @List SELECT @sInputList -- Put the last item in
        RETURN
    END
--TEST
--Example 1: select * from fnSplit('1,22,333,444,,5555,666', ',')
--Example 2: select * from fnSplit('1##22#333##444','##')  --note second colData has embedded #
--Example 3: select * from fnSplit('1 22 333 444  5555 666', ' ')

edited Aug 27 '14 at 8:38
Mark Sinkinson
7,72432747
7,72432747
answered Aug 27 '14 at 8:13
MudassirMudassir
1
1
add a comment |
add a comment |