Description:
Procedure to implement an SQL UDF that will test the values of columns via regular expressions. Returns '0' if no match; '1' if match, or NULL if parameters not acceptable. Ideally, will be used in WHERE clause, such as in:
SELECT FIRST_NAME FROM NAME_TABLE WHERE REGEXP_LIKE(FIRST_NAME, '^JOH{0,1}N', 'i') = '1'
(above will match any name starting with JOHN or JON, ignoring case differences.)
Another example: to find a string of digits at least 6 wide:
SELECT SOMETHING FROM SOME_TABLE WHERE REGEXP_LIKE(SOME_COLUMN, '[0-9]{6}') = '1'
The first parameter is the string to be compared.
Second parameter is the regular expression.
Third parameter may contain the following characters:
i = Ignore case differences
n = Period may match a new-line character
m = ^ and $ do not match Beginning-of-line and End-of-line, respectively
This is a reasonable emulation of the Oracle REGEXP_LIKE function. For more documentation on that, see
http://docs.oracle.com/cd/B14117_01/server.101/b10759/conditions018.htm
|
Code:
- H/TITLE SQL function provides REGEXP_LIKE capability
- H NOMAIN
- H OPTION(*NOSHOWCPY:*NOEXPDDS:*NODEBUGIO:*SRCSTMT) DEBUG
- H BNDDIR('QC2LE')
-
- // Copyright (C) 2012 Dennis Lovelady, All rights reserved.
- //
- // Released to public domain 18-Sep-2012
-
- // *-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
- //
- // SQL function to simulate Oracle's REGEXP_LIKE capability
- //
- // ---Log---------------------------------------Author-------Date---
- // Original version Lovelady 18Sep2010
- // ---End of log----------------------------------------------------
- // *-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
- //
- // CRTRPGMOD <lib>/regex_like SRCFILE(<lib>/QRPGLESRC)
- // CRTSRVPGM <lib>/regex_like
- // MODULE(<lib>/regex_like) EXPORT(*ALL)
- //
- //
- // CREATE FUNCTION <lib>/regexp_like(varchar(512)
- // , varchar(512))
- // RETURNS char(1)
- // RETURNS NULL ON NULL INPUT
- // LANGUAGE RPGLE
- // EXTERNAL NAME '<lib>/REGEXPLIKE(regexp_like)'
- // IS DETERMINISTIC
- // NO SQL
- // NO EXTERNAL ACTION
- // PARAMETER STYLE GENERAL
- // ALLOW PARALLEL
- // NOT FENCED
- //
- //
- // Note that the function that handles three input parameters
- // (string, pattern, options)
- // does NOT use "RETURNS NULL ON NULL INPUT" option, and
- // instead uses "CALLED ON NULL INPUT". This is because
- // we actually treat a NULL third parameter as though it
- // was not passed, processing exactly like it wasn't there.
- //
- // CREATE FUNCTION <lib>/REGEXPLIKE(varchar(512)
- // , varchar(512)
- // , varchar(8))
- // RETURNS char(1)
- // CALLED ON NULL INPUT
- // LANGUAGE RPGLE
- // EXTERNAL NAME '<lib>/regex_like(regexp_like_opts)'
- // IS DETERMINISTIC
- // NO SQL
- // NO EXTERNAL ACTION
- // PARAMETER STYLE GENERAL
- // ALLOW PARALLEL
- // NOT FENCED
-
-
- // Scott Klement's REGEX.H module included here
- // If you have or can find that module, then (great!) plug it
- // in here by replacing the following asterisk with a slash,
- // and delete following lines until <<<END OF REGEX_H>>>
-
- *Include REGEX_H
- //
- // ** Header file for calling the "Regular Expression" functions
- // ** provided by the ILE C Runtime Library from an RPG IV
- // ** program. Scott Klement, 2001-05-04
- // ** Converted to qualified DS 2003-11-29
- // **
-
- //******************************************************************
- // Regular Expressions (REGEXP) stuff
- //******************************************************************
-
- // For information about how regular expressions work, and the
- // difference between BASIC and EXTENDED, please see the following
- // link: http://en.wikipedia.org/wiki/Regular_expression
-
- **------------------------------------------------------------
- * cflags for regcomp()
- **------------------------------------------------------------
- D REG_BASIC C 0
- D REG_EXTENDED C 1
- D REG_ICASE C 2
- D REG_NEWLINE C 4
- D REG_NOSUB C 8
-
-
- **------------------------------------------------------------
- * eflags for regexec()
- **------------------------------------------------------------
- D REG_NOTBOL C 256
- D REG_NOTEOL C 512
-
-
- **------------------------------------------------------------
- * errors returned
- **------------------------------------------------------------
- D REG_NOMATCH C 1
- D REG_BADPAT C 2
- D REG_ECOLLATE C 3
- D REG_ECTYPE C 4
- D REG_EESCAPE C 5
- D REG_ESUBREG C 6
- D REG_EBRACK C 7
- D REG_EPAREN C 8
- D REG_EBRACE C 9
- D REG_BADBR C 10
- D REG_ERANGE C 11
- D REG_ESPACE C 12
- D REG_BADRPT C 13
- D REG_ECHAR C 14
- D REG_EBOL C 15
- D REG_EEOL C 16
- D REG_ECOMP C 17
- D REG_EEXEC C 18
-
-
- **------------------------------------------------------------
- * Structure of a compiled regular expression:
- *
- * #define __REG_SUBEXP_MAX 9
- * typedef struct {
- * size_t re_nsub;
- * void *re_comp;
- * int re_cflags;
- * size_t re_erroff;
- * size_t re_len;
- * _LC_colval_t re_ucoll[2];
- * void *re_lsub[__REG_SUBEXP_MAX+1];
- * void *re_esub[__REG_SUBEXP_MAX+1];
- * unsigned char re_map[256];
- * mbstate_t re_shift;
- * short re_dbcs;
- * } regex_t;
- **------------------------------------------------------------
- D REG_SUBEXP_MAX C 10
- D regex_t DS qualified
- D align based(prototype_only)
- D re_nsub 10I 0
- D re_comp *
- D re_cflags 10I 0
- D re_erroff 10I 0
- D re_len 10I 0
- D re_ucoll 10I 0 dim(2)
- D re_lsub * DIM(REG_SUBEXP_MAX)
- D re_esub * DIM(REG_SUBEXP_MAX)
- D re_map 256A
- D re_shift 5I 0
- D re_dbcs 5I 0
-
-
- **------------------------------------------------------------
- * structure used to report matches found by regexec()
- *
- * typedef struct {
- * _off_t rm_so; /* offset of substring */
- * mbstate_t rm_ss; /* shift state at start of subst */
- * _off_t rm_eo; /* offset of next char after subst */
- * mbstate_t rm_es; /* shift state at end of subst */
- * } regmatch_t;
- *
- * NOTE: It's important to remember that C starts numbering
- * string positions with '0' and RPG starts with '1'.
- * Thus, rm_so+1 is the first char in substring, rm_eo is
- * the last char in the substring in RPG.
- **------------------------------------------------------------
- D regmatch_t DS qualified
- D align based(prototype_only)
- D rm_so 10I 0
- D rm_ss 5I 0
- D rm_eo 10I 0
- D rm_es 5I 0
-
-
- *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- * regcomp() -- Compile a Regular Expression ("RE")
- *
- * int regcomp(regex_t *preg, const char *pattern,
- * int cflags);
- *
- * where:
- * preg (output) = the compiled regular expression.
- * pattern (input) = the RE to be compiled.
- * cflags (input) = the sum of the cflag constants
- * (listed above) for this RE.
- *
- * Returns 0 = success, otherwise an error number.
- *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- D regcomp PR 10I 0 extproc('regcomp')
- D preg like(regex_t)
- D pattern * value options(*string)
- D cflags 10I 0 value
-
-
- *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- * regexec() -- Execute a compiled Regular Expression ("RE")
- *
- * int regexec(const regex_t *preg, const char *string,
- * size_t nmatch, regmatch_t *pmatch, int eflags);
- *
- * where:
- * preg (input) = the compiled regular expression
- * (the output of regcomp())
- * string (input) = string to run the RE upon
- * nmatch (input) = the number of matches to return.
- * pmatch (output) = array of regmatch_t DS's
- * showing what matches were found.
- * eflags (input) = the sum of the flags (constants
- * provided above) modifying the RE
- *
- * Returns 0 = success, otherwise an error number.
- *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- D regexec PR 10I 0 extproc('regexec')
- D preg like(regex_t) const
- D string * value options(*string)
- D nmatch 10U 0 value
- D pmatch like(regmatch_t) dim(100)
- D options(*varsize)
- D eflags 10I 0 value
-
-
- *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- * regerror() -- return error information from regcomp/regexec
- *
- * size_t regerror(int errcode, const regex_t *preg,
- * char *errbuf, size_t errbuf_size);
- *
- * where:
- * errcode (input) = the error code to return info on
- * (obtained as the return value from
- * either regcomp() or regexec())
- * preg (input) = the (compiled) RE to return the
- * error for.
- * errbuf (output) = buffer containing human-readable
- * error message.
- * errbuf_size (input) = size of errbuf (max length of msg
- * that will be returned)
- *
- * returns: length of buffer needed to get entire error msg
- *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- D regerror PR 10U 0 extproc('regerror')
- D errcode 10I 0 value
- D preg like(regex_t) const
- D errbuf * value
- D errbuf_size 10I 0 value
-
-
- *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- * regfree() -- free memory locked by Regular Expression
- *
- * void regfree(regex_t *preg);
- *
- * where:
- * preg (input) = regular expression to free mem for.
- *
- * NOTE: regcomp() will always allocate extra memory
- * to be pointed to by the various pointers in
- * the regex_t structure. If you don't call this,
- * that memory will never be returned to the system!
- *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
- D regfree PR extproc('regfree')
- D preg like(regex_t)
-
- //******************************************************************
- // End of Regular Expressions (REGEXP) stuff
- //******************************************************************
-
- // <<<END OF REGEX_H>>>
-
- D IS_NULL C -1
- D NOT_NULL C *Zero
-
- // Prototype for SQL function:
- // REGEXP_LIKE(varchar(512), varchar(512))
- // SQL-style parameters are used so that NULL may be
- // returned as a result.
-
- D regexp_like PR ExtProc('regexp_like')
- D parmValIn 512 Varying Const
- D parmRegEx 512 Varying Const
- D parmIsLike N
- D valInNull 5I 0 Const
- D regExNull 5I 0 Const
- D rtnNull 5I 0
- D State 5
- D FuncName 517 Varying
- D SpecName 128 Varying
- D MsgTxt 70 Varying
-
-
- // Prototype for SQL function:
- // REGEXP_LIKE(varchar(512), varchar(512), varchar(8))
- // SQL-style parameters are used so that NULL may be
- // returned as a result.
-
- D regexp_like_opts...
- D PR ExtProc('regexp_like_opts')
- D parmValIn 512 Varying Const
- D parmRegEx 512 Varying Const
- D parmOpts 8 Varying Const
- D parmIsLike N
- D valInNull 5I 0 Const
- D regExNull 5I 0 Const
- D optNull 5I 0 Const
- D rtnNull 5I 0
- D State 5
- D FuncName 517 Varying
- D SpecName 128 Varying
- D MsgTxt 70 Varying
-
-
-
- P regexp_like B Export
- D regexp_like PI
- D parmValIn 512 Varying Const
- D parmRegEx 512 Varying Const
- D parmIsLike N
- D valInNull 5I 0 Const
- D regExNull 5I 0 Const
- D rtnNull 5I 0
- D State 5
- D FuncName 517 Varying
- D SpecName 128 Varying
- D MsgTxt 70 Varying
-
-
- /Free
-
- // Called with only two parameters. So, simply call regexp_like_opts
- // with the same two inputs, plus a NULL parameter 3 (options). It
- // will know what to do.
-
- regexp_like_opts(parmValIn: parmRegEx: *Blank
- : parmIsLike
- : valInNull: regExNull: IS_NULL
- : rtnNull
- : State: FuncName: SpecName
- : msgTxt
- ) ;
- Return ;
- /End-free
-
- P regexp_like E
-
-
- // This is where all the work gets done. We strive for efficient
- // operation by maintaining the previous regular expression and its
- // compiled version in memory. We will free and recompile the compiled
- // version only when the pattern and/or options differ from previous use.
- //
- // Note that this may result in a memory leak, amounting to the memory
- // consumed by the compiled regex on final call. (Presumably, this will
- // be cleaned up at the same time that this procedure's static memory is
- // wiped, but I have no evidence of that, especially since the memory
- // used by the compiled regular expression is obtained via alloc().)
- // If you don't want that effect then change /UNDEFINE to /DEFINE below
-
- /UNDEFINE NO_MEM_LEAK_PLEASE
-
- P regexp_like_opts...
- P B Export
- D regexp_like_opts...
- D PI
- D parmValIn 512 Varying Const
- D parmRegEx 512 Varying Const
- D parmOpts 8 Varying Const
- D parmIsLike N
- D valInNull 5I 0 Const
- D regExNull 5I 0 Const
- D optNull 5I 0 Const
- D rtnNull 5I 0
- D State 5
- D FuncName 517 Varying
- D SpecName 128 Varying
- D MsgTxt 70 Varying
-
- D rc S 10I 0
- D prevRegEx S Like(parmRegEx)
- /IF NOT DEFINED(NO_MEM_LEAK_PLEASE)
- D Static
- /ENDIF
- D prevOpts S Like(parmOpts)
- /IF NOT DEFINED(NO_MEM_LEAK_PLEASE)
- D Static
- /EndIF
- /IF NOT DEFINED(NO_MEM_LEAK_PLEASE)
- D firstPass S N Inz(*On) Static
- /EndIF
- D compiled S N Inz(*Off)
- /IF NOT DEFINED(NO_MEM_LEAK_PLEASE)
- D Static
- /EndIF
- D currOpts S Like(parmOpts)
- D repeatRegEx S N Inz(*Off)
- D myRegEx DS LikeDS(regex_t)
- D match DS LikeDS(regMatch_T) Inz
- D regExFlags S 10I 0
- D errBuff S 256
- D len S 10I 0
- D P S 10I 0
-
- /Free
- rtnNull = NOT_NULL ;
- If valInNull = IS_NULL or regExNull = IS_NULL ;
- rtnNull = IS_NULL ;
- Return ;
- EndIF ;
- If optNull = IS_NULL ;
- currOpts = *Blank ;
- Else ;
- currOpts = parmOpts ;
- EndIF ;
- /IF NOT DEFINED(NO_MEM_LEAK_PLEASE)
- If firstPass ;
- firstPass = *Off ;
- repeatRegEx = *Off ;
- Else ;
- repeatRegEx = (parmRegEx = prevRegEx)
- and (currOpts = prevOpts) ;
- If Compiled and (Not repeatRegEx) ;
- regFree(myRegex) ;
- EndIF ;
- EndIF ;
- If (Not repeatRegEx) or (not Compiled) ;
- /EndIF
- regExFlags = REG_EXTENDED + REG_NOSUB ;
- If %Scan('i': currOpts) > *Zero ;
- regExFlags += REG_ICASE ;
- EndIF ;
- If %Scan('n': currOpts) > *Zero ;
- regExFlags += REG_NEWLINE ;
- EndIF ;
- If %Scan('m': currOpts) > *Zero ;
- regExFlags += REG_NOTBOL + REG_NOTEOL ;
- EndIF ;
- rc = regcomp(myRegEx: parmRegEx: regExFlags) ;
- Compiled = (rc = *Zero) ;
- If Not Compiled ;
- len = regerror(rc: myRegEx
- : %Addr(errBuff): %Size(errBuff)) ;
- // If you're going to write the regcomp error to the
- // joblog or whatever, here is the place to do that.
- // Error message is %Subst(errBuff: 1: len)
- rtnNull = IS_NULL ;
- Return ;
- EndIF ;
- prevRegEx = parmRegEx ;
- prevOpts = currOpts ;
- /IF NOT DEFINED(NO_MEM_LEAK_PLEASE)
- EndIF ;
- /ENDIF
- If Compiled ;
- rc = regexec(myRegEx // Attempt to match string to pattern
- : parmValIn
- : *Zero
- : match
- : *Zero
- ) ;
- parmIsLike = (rc = *Zero) ;
- Else ;
- rtnNull = IS_NULL ;
- EndIF ;
- /IF DEFINED(NO_MEM_LEAK_PLEASE)
- regFree(myRegex) ;
- /ENDIF
- /End-free
-
- P regexp_like_opts...
- P E
-
-
|
|