Skip to content

Instantly share code, notes, and snippets.

@theSuiGenerisAakash
Last active January 23, 2019 06:14
Show Gist options
  • Save theSuiGenerisAakash/709fa432440290f6c9de2c3a868d3c8d to your computer and use it in GitHub Desktop.
Save theSuiGenerisAakash/709fa432440290f6c9de2c3a868d3c8d to your computer and use it in GitHub Desktop.
A UDF to split strings based on a delimiter in MySQL
-- Select your database
USE sample_database;
/* You might need to run this if MySQL says that the below function is non-determininstic (although I've added DETERMINSTIC keyword in it) */
-- SET GLOBAL log_bin_trust_function_creators = 1;
DELIMITER $$
DROP FUNCTION IF EXISTS splitString $$
-- Function splitString that returns a substring based on a certain delimiter's position
CREATE FUNCTION splitString (
x VARCHAR(255),
delim VARCHAR(12),
pos INT
)
RETURNS VARCHAR(255) DETERMINISTIC
RETURN REPLACE(SUBSTRING(SUBSTRING_INDEX(x, delim, pos),
LENGTH(SUBSTRING_INDEX(x, delim, pos -1)) + 1),
delim, '') $$
DROP PROCEDURE IF EXISTS getAllStrings $$
-- Procedure to get all the words from the input string passed to it as argument
CREATE PROCEDURE getAllStrings(IN idString varchar(256))
BEGIN
CREATE TEMPORARY TABLE IF NOT EXISTS tbl_temp_sep_strings( SPLIT_VALUES VARCHAR(256));
SELECT LENGTH(idString) - LENGTH(REPLACE(idString, ', ', '')) INTO @noOfCommas;
IF @noOfCommas = 0
THEN
INSERT INTO tbl_temp_sep_strings(SPLIT_VALUES) values(idString);
ELSE
SET @y = 1;
WHILE @y < @noOfCommas DO
SELECT splitString(idString, ', ', @y) INTO @tempSeparate;
SET @y = @y + 1;
INSERT INTO tbl_temp_sep_strings(SPLIT_VALUES) values(@tempSeparate);
END WHILE;
END IF;
END $$
CALL getAllStrings('ab, ac'); -- Call the function like this
/* Now, you have the result, i.e., all the split words in the column SPLIT_VALUES of the tbl_temp_sep_strings
which you can use by doing a SELECT action. This is helpful as it can be used in SQL IN clauses and other
similar places where you might be looking for a tuple of values.
Please note that the table is temporary which is good because then it will get deallocated when the connection
is closed or the session ends as we wouldn't want a table in our db to persist. You can remove the TEMPORARY keyword
otherwise.
*/
-- SELECT * FROM sample_database.tbl_temp_sep_strings
-- BONUS: If a field in some table has mixed values as in single as well as mixed, comma-separated values then you can use the below procedure
DROP PROCEDURE IF EXISTS getIndividualSkills $$
CREATE PROCEDURE cleanMixedUpColumn()
BEGIN
SELECT COUNT(DISTINCT infected_column) INTO @noOfSkills FROM sample_database.sample_table WHERE Skill_id IS NOT NULL;
SET @y1 = 0;
WHILE @y1 < @noOfSkills DO
SET @sql1 = CONCAT('SELECT DISTINCT infected_column INTO @currentValue FROM sample_database.sample_table WHERE infected_column IS NOT NULL ORDER BY infected_column LIMIT 1 OFFSET ' ,@y1,';');
PREPARE stmt1 FROM @sql1;
EXECUTE stmt1;
DEALLOCATE PREPARE stmt1;
CALL getAllStrings(@currentValue);
SET @y1 = @y1 + 1;
END WHILE;
END $$
DELIMITER ;
CALL cleanMixedUpColumn();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment