midrange.com code scratchpad
Name:
REGEXPLIKE Provide an SQL function that emulate's Oracle's REGEXP_LIKE capability
Scriptlanguage:
Plain Text
Tabwidth:
4
Date:
09/19/2012 01:04:52 am
IP:
Logged
Description:
Procedure to implement an SQL UDF that will test the values of columns via regular expressions. Returns '0' if no match; '1' if match, or NULL if parameters not acceptable. Ideally, will be used in WHERE clause, such as in:

SELECT FIRST_NAME FROM NAME_TABLE WHERE REGEXP_LIKE(FIRST_NAME, '^JOH{0,1}N', 'i') = '1'

(above will match any name starting with JOHN or JON, ignoring case differences.)

Another example: to find a string of digits at least 6 wide:

SELECT SOMETHING FROM SOME_TABLE WHERE REGEXP_LIKE(SOME_COLUMN, '[0-9]{6}') = '1'

The first parameter is the string to be compared.
Second parameter is the regular expression.
Third parameter may contain the following characters:
i = Ignore case differences
n = Period may match a new-line character
m = ^ and $ do not match Beginning-of-line and End-of-line, respectively

This is a reasonable emulation of the Oracle REGEXP_LIKE function. For more documentation on that, see
http://docs.oracle.com/cd/B14117_01/server.101/b10759/conditions018.htm
Code:
  1.      H/TITLE SQL function provides REGEXP_LIKE capability
  2.      H NOMAIN
  3.      H OPTION(*NOSHOWCPY:*NOEXPDDS:*NODEBUGIO:*SRCSTMT) DEBUG
  4.      H BNDDIR('QC2LE')
  5.  
  6.        // Copyright (C) 2012 Dennis Lovelady, All rights reserved.
  7.        //
  8.        // Released to public domain 18-Sep-2012
  9.  
  10.        // *-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
  11.        //
  12.        // SQL function to simulate Oracle's REGEXP_LIKE capability
  13.        //
  14.        // ---Log---------------------------------------Author-------Date---
  15.        // Original version                             Lovelady   18Sep2010
  16.        // ---End of log----------------------------------------------------
  17.        // *-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
  18.        //
  19.        //    CRTRPGMOD <lib>/regex_like SRCFILE(<lib>/QRPGLESRC)
  20.        //    CRTSRVPGM <lib>/regex_like
  21.        //              MODULE(<lib>/regex_like) EXPORT(*ALL)
  22.        //
  23.        //
  24.        //      CREATE FUNCTION <lib>/regexp_like(varchar(512)
  25.        //                                      , varchar(512))
  26.        //      RETURNS  char(1)
  27.        //      RETURNS  NULL ON NULL INPUT
  28.        //      LANGUAGE RPGLE
  29.        //      EXTERNAL NAME '<lib>/REGEXPLIKE(regexp_like)'
  30.        //      IS DETERMINISTIC
  31.        //      NO SQL
  32.        //      NO EXTERNAL ACTION
  33.        //      PARAMETER STYLE GENERAL
  34.        //      ALLOW PARALLEL
  35.        //      NOT FENCED
  36.        //
  37.        //
  38.        // Note that the function that handles three input parameters
  39.        //      (string, pattern, options)
  40.        //      does NOT use "RETURNS NULL ON NULL INPUT" option, and
  41.        //      instead uses "CALLED ON NULL INPUT".  This is because
  42.        //      we actually treat a NULL third parameter as though it
  43.        //      was not passed, processing exactly like it wasn't there.
  44.        //
  45.        //      CREATE FUNCTION <lib>/REGEXPLIKE(varchar(512)
  46.        //                                      , varchar(512)
  47.        //                                      , varchar(8))
  48.        //      RETURNS  char(1)
  49.        //      CALLED ON NULL INPUT
  50.        //      LANGUAGE RPGLE
  51.        //      EXTERNAL NAME '<lib>/regex_like(regexp_like_opts)'
  52.        //      IS DETERMINISTIC
  53.        //      NO SQL
  54.        //      NO EXTERNAL ACTION
  55.        //      PARAMETER STYLE GENERAL
  56.        //      ALLOW PARALLEL
  57.        //      NOT FENCED
  58.  
  59.  
  60.        // Scott Klement's REGEX.H module included here
  61.        // If you have or can find that module, then (great!) plug it
  62.        // in here by replacing the following asterisk with a slash,
  63.        // and delete following lines until <<<END OF REGEX_H>>>
  64.  
  65.       *Include REGEX_H
  66.        //
  67.        //  ** Header file for calling the "Regular Expression" functions
  68.        //  **   provided by the ILE C Runtime Library from an RPG IV
  69.        //  **   program.                 Scott Klement, 2001-05-04
  70.        //  **                       Converted to qualified DS 2003-11-29
  71.        //  **
  72.  
  73.        //******************************************************************
  74.        // Regular Expressions (REGEXP) stuff
  75.        //******************************************************************
  76.  
  77.        // For information about how regular expressions work, and the
  78.        // difference between BASIC and EXTENDED, please see the following
  79.        // link:  http://en.wikipedia.org/wiki/Regular_expression
  80.  
  81.       **------------------------------------------------------------
  82.       * cflags for regcomp()
  83.       **------------------------------------------------------------
  84.      D REG_BASIC       C                   0
  85.      D REG_EXTENDED    C                   1
  86.      D REG_ICASE       C                   2
  87.      D REG_NEWLINE     C                   4
  88.      D REG_NOSUB       C                   8
  89.  
  90.  
  91.       **------------------------------------------------------------
  92.       * eflags for regexec()
  93.       **------------------------------------------------------------
  94.      D REG_NOTBOL      C                   256
  95.      D REG_NOTEOL      C                   512
  96.  
  97.  
  98.       **------------------------------------------------------------
  99.       *  errors returned
  100.       **------------------------------------------------------------
  101.      D REG_NOMATCH     C                   1
  102.      D REG_BADPAT      C                   2
  103.      D REG_ECOLLATE    C                   3
  104.      D REG_ECTYPE      C                   4
  105.      D REG_EESCAPE     C                   5
  106.      D REG_ESUBREG     C                   6
  107.      D REG_EBRACK      C                   7
  108.      D REG_EPAREN      C                   8
  109.      D REG_EBRACE      C                   9
  110.      D REG_BADBR       C                   10
  111.      D REG_ERANGE      C                   11
  112.      D REG_ESPACE      C                   12
  113.      D REG_BADRPT      C                   13
  114.      D REG_ECHAR       C                   14
  115.      D REG_EBOL        C                   15
  116.      D REG_EEOL        C                   16
  117.      D REG_ECOMP       C                   17
  118.      D REG_EEXEC       C                   18
  119.  
  120.  
  121.       **------------------------------------------------------------
  122.       *  Structure of a compiled regular expression:
  123.       *
  124.       *     #define __REG_SUBEXP_MAX       9
  125.       *     typedef struct {
  126.       *         size_t        re_nsub;
  127.       *         void          *re_comp;
  128.       *         int           re_cflags;
  129.       *         size_t        re_erroff;
  130.       *         size_t        re_len;
  131.       *         _LC_colval_t  re_ucoll[2];
  132.       *         void          *re_lsub[__REG_SUBEXP_MAX+1];
  133.       *         void          *re_esub[__REG_SUBEXP_MAX+1];
  134.       *         unsigned char re_map[256];
  135.       *         mbstate_t     re_shift;
  136.       *         short         re_dbcs;
  137.       *     } regex_t;
  138.       **------------------------------------------------------------
  139.      D REG_SUBEXP_MAX  C                   10
  140.      D regex_t         DS                  qualified
  141.      D                                     align based(prototype_only)
  142.      D   re_nsub                     10I 0
  143.      D   re_comp                       *
  144.      D   re_cflags                   10I 0
  145.      D   re_erroff                   10I 0
  146.      D   re_len                      10I 0
  147.      D   re_ucoll                    10I 0 dim(2)
  148.      D   re_lsub                       *   DIM(REG_SUBEXP_MAX)
  149.      D   re_esub                       *   DIM(REG_SUBEXP_MAX)
  150.      D   re_map                     256A
  151.      D   re_shift                     5I 0
  152.      D   re_dbcs                      5I 0
  153.  
  154.  
  155.       **------------------------------------------------------------
  156.       *  structure used to report matches found by regexec()
  157.       *
  158.       *     typedef struct {
  159.       *         _off_t     rm_so; /* offset of substring             */
  160.       *         mbstate_t  rm_ss; /* shift state at start of subst   */
  161.       *         _off_t     rm_eo; /* offset of next char after subst */
  162.       *         mbstate_t  rm_es; /* shift state at end of subst     */
  163.       *     } regmatch_t;
  164.       *
  165.       * NOTE: It's important to remember that C starts numbering
  166.       *    string positions with '0' and RPG starts with '1'.
  167.       *    Thus, rm_so+1 is the first char in substring, rm_eo is
  168.       *    the last char in the substring in RPG.
  169.       **------------------------------------------------------------
  170.      D regmatch_t      DS                  qualified
  171.      D                                     align based(prototype_only)
  172.      D   rm_so                       10I 0
  173.      D   rm_ss                        5I 0
  174.      D   rm_eo                       10I 0
  175.      D   rm_es                        5I 0
  176.  
  177.  
  178.       *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  179.       * regcomp() -- Compile a Regular Expression ("RE")
  180.       *
  181.       *     int regcomp(regex_t *preg, const char *pattern,
  182.       *              int cflags);
  183.       *
  184.       * where:
  185.       *       preg (output) = the compiled regular expression.
  186.       *    pattern (input)  = the RE to be compiled.
  187.       *     cflags (input)  = the sum of the cflag constants
  188.       *                       (listed above) for this RE.
  189.       *
  190.       * Returns 0 = success, otherwise an error number.
  191.       *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  192.      D regcomp         PR            10I 0 extproc('regcomp')
  193.      D   preg                              like(regex_t)
  194.      D   pattern                       *   value options(*string)
  195.      D   cflags                      10I 0 value
  196.  
  197.  
  198.       *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  199.       * regexec() -- Execute a compiled Regular Expression ("RE")
  200.       *
  201.       *     int regexec(const regex_t *preg, const char *string,
  202.       *              size_t nmatch, regmatch_t *pmatch, int eflags);
  203.       *
  204.       * where:
  205.       *       preg (input)  = the compiled regular expression
  206.       *                       (the output of regcomp())
  207.       *     string (input)  = string to run the RE upon
  208.       *     nmatch (input)  = the number of matches to return.
  209.       *     pmatch (output) = array of regmatch_t DS's
  210.       *                       showing what matches were found.
  211.       *     eflags (input)  = the sum of the flags (constants
  212.       *                       provided above) modifying the RE
  213.       *
  214.       * Returns 0 = success, otherwise an error number.
  215.       *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  216.      D regexec         PR            10I 0 extproc('regexec')
  217.      D   preg                              like(regex_t) const
  218.      D   string                        *   value options(*string)
  219.      D   nmatch                      10U 0 value
  220.      D   pmatch                            like(regmatch_t) dim(100)
  221.      D                                     options(*varsize)
  222.      D   eflags                      10I 0 value
  223.  
  224.  
  225.       *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  226.       * regerror() -- return error information from regcomp/regexec
  227.       *
  228.       *   size_t regerror(int errcode, const regex_t *preg,
  229.       *              char *errbuf, size_t errbuf_size);
  230.       *
  231.       *  where:
  232.       *    errcode (input)  = the error code to return info on
  233.       *                      (obtained as the return value from
  234.       *                      either regcomp() or regexec())
  235.       *       preg (input)  = the (compiled) RE to return the
  236.       *                      error for.
  237.       *     errbuf (output) = buffer containing human-readable
  238.       *                      error message.
  239.       * errbuf_size (input) = size of errbuf (max length of msg
  240.       *                      that will be returned)
  241.       *
  242.       * returns:  length of buffer needed to get entire error msg
  243.       *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  244.      D regerror        PR            10U 0 extproc('regerror')
  245.      D   errcode                     10I 0 value
  246.      D   preg                              like(regex_t) const
  247.      D   errbuf                        *   value
  248.      D   errbuf_size                 10I 0 value
  249.  
  250.  
  251.       *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  252.       * regfree() -- free memory locked by Regular Expression
  253.       *
  254.       *    void regfree(regex_t *preg);
  255.       *
  256.       *   where:
  257.       *        preg (input) = regular expression to free mem for.
  258.       *
  259.       *   NOTE:  regcomp() will always allocate extra memory
  260.       *        to be pointed to by the various pointers in
  261.       *        the regex_t structure.  If you don't call this,
  262.       *        that memory will never be returned to the system!
  263.       *+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  264.      D regfree         PR                  extproc('regfree')
  265.      D   preg                              like(regex_t)
  266.  
  267.        //******************************************************************
  268.        // End of Regular Expressions (REGEXP) stuff
  269.        //******************************************************************
  270.  
  271.        // <<<END OF REGEX_H>>>
  272.  
  273.      D IS_NULL         C                   -1
  274.      D NOT_NULL        C                   *Zero
  275.  
  276.        // Prototype for SQL function:
  277.        //     REGEXP_LIKE(varchar(512), varchar(512))
  278.        // SQL-style parameters are used so that NULL may be
  279.        // returned as a result.
  280.  
  281.      D regexp_like     PR                  ExtProc('regexp_like')
  282.      D  parmValIn                   512    Varying Const
  283.      D  parmRegEx                   512    Varying Const
  284.      D  parmIsLike                     N
  285.      D  valInNull                     5I 0 Const
  286.      D  regExNull                     5I 0 Const
  287.      D  rtnNull                       5I 0
  288.      D  State                         5
  289.      D  FuncName                    517    Varying
  290.      D  SpecName                    128    Varying
  291.      D  MsgTxt                       70    Varying
  292.  
  293.  
  294.        // Prototype for SQL function:
  295.        //     REGEXP_LIKE(varchar(512), varchar(512), varchar(8))
  296.        // SQL-style parameters are used so that NULL may be
  297.        // returned as a result.
  298.  
  299.      D regexp_like_opts...
  300.      D                 PR                  ExtProc('regexp_like_opts')
  301.      D  parmValIn                   512    Varying Const
  302.      D  parmRegEx                   512    Varying Const
  303.      D  parmOpts                      8    Varying Const
  304.      D  parmIsLike                     N
  305.      D  valInNull                     5I 0 Const
  306.      D  regExNull                     5I 0 Const
  307.      D  optNull                       5I 0 Const
  308.      D  rtnNull                       5I 0
  309.      D  State                         5
  310.      D  FuncName                    517    Varying
  311.      D  SpecName                    128    Varying
  312.      D  MsgTxt                       70    Varying
  313.  
  314.  
  315.  
  316.      P regexp_like     B                   Export
  317.      D regexp_like     PI
  318.      D  parmValIn                   512    Varying Const
  319.      D  parmRegEx                   512    Varying Const
  320.      D  parmIsLike                     N
  321.      D  valInNull                     5I 0 Const
  322.      D  regExNull                     5I 0 Const
  323.      D  rtnNull                       5I 0
  324.      D  State                         5
  325.      D  FuncName                    517    Varying
  326.      D  SpecName                    128    Varying
  327.      D  MsgTxt                       70    Varying
  328.  
  329.  
  330.       /Free
  331.  
  332.        // Called with only two parameters.  So, simply call regexp_like_opts
  333.        // with the same two inputs, plus a NULL parameter 3 (options).  It
  334.        // will know what to do.
  335.  
  336.        regexp_like_opts(parmValIn: parmRegEx: *Blank
  337.                       : parmIsLike
  338.                       : valInNull: regExNull: IS_NULL
  339.                       : rtnNull
  340.                       : State: FuncName: SpecName
  341.                       : msgTxt
  342.                        ) ;
  343.        Return ;
  344.       /End-free
  345.  
  346.      P regexp_like     E
  347.  
  348.  
  349.        // This is where all the work gets done.  We strive for efficient
  350.        // operation by maintaining the previous regular expression and its
  351.        // compiled version in memory.  We will free and recompile the compiled
  352.        // version only when the pattern and/or options differ from previous use.
  353.        //
  354.        // Note that this may result in a memory leak, amounting to the memory
  355.        // consumed by the compiled regex on final call.  (Presumably, this will
  356.        // be cleaned up at the same time that this procedure's static memory is
  357.        // wiped, but I have no evidence of that, especially since the memory
  358.        // used by the compiled regular expression is obtained via alloc().)
  359.        //  If you don't want that effect then change /UNDEFINE to /DEFINE below
  360.  
  361.       /UNDEFINE NO_MEM_LEAK_PLEASE
  362.  
  363.      P regexp_like_opts...
  364.      P                 B                   Export
  365.      D regexp_like_opts...
  366.      D                 PI
  367.      D  parmValIn                   512    Varying Const
  368.      D  parmRegEx                   512    Varying Const
  369.      D  parmOpts                      8    Varying Const
  370.      D  parmIsLike                     N
  371.      D  valInNull                     5I 0 Const
  372.      D  regExNull                     5I 0 Const
  373.      D  optNull                       5I 0 Const
  374.      D  rtnNull                       5I 0
  375.      D  State                         5
  376.      D  FuncName                    517    Varying
  377.      D  SpecName                    128    Varying
  378.      D  MsgTxt                       70    Varying
  379.  
  380.      D rc              S             10I 0
  381.      D prevRegEx       S                   Like(parmRegEx)
  382.       /IF NOT DEFINED(NO_MEM_LEAK_PLEASE)
  383.      D                                     Static
  384.       /ENDIF
  385.      D prevOpts        S                   Like(parmOpts)
  386.       /IF NOT DEFINED(NO_MEM_LEAK_PLEASE)
  387.      D                                     Static
  388.       /EndIF
  389.       /IF NOT DEFINED(NO_MEM_LEAK_PLEASE)
  390.      D firstPass       S               N   Inz(*On) Static
  391.       /EndIF
  392.      D compiled        S               N   Inz(*Off)
  393.       /IF NOT DEFINED(NO_MEM_LEAK_PLEASE)
  394.      D                                     Static
  395.       /EndIF
  396.      D currOpts        S                   Like(parmOpts)
  397.      D repeatRegEx     S               N   Inz(*Off)
  398.      D myRegEx         DS                  LikeDS(regex_t)
  399.      D match           DS                  LikeDS(regMatch_T) Inz
  400.      D regExFlags      S             10I 0
  401.      D errBuff         S            256
  402.      D len             S             10I 0
  403.      D P               S             10I 0
  404.  
  405.       /Free
  406.        rtnNull = NOT_NULL ;
  407.        If valInNull = IS_NULL or regExNull = IS_NULL ;
  408.           rtnNull = IS_NULL ;
  409.           Return ;
  410.        EndIF ;
  411.        If optNull = IS_NULL ;
  412.            currOpts = *Blank ;
  413.        Else ;
  414.            currOpts = parmOpts ;
  415.        EndIF ;
  416.       /IF NOT DEFINED(NO_MEM_LEAK_PLEASE)
  417.        If firstPass ;
  418.           firstPass = *Off ;
  419.           repeatRegEx = *Off ;
  420.        Else ;
  421.           repeatRegEx = (parmRegEx = prevRegEx)
  422.                       and (currOpts = prevOpts) ;
  423.           If Compiled and (Not repeatRegEx) ;
  424.              regFree(myRegex) ;
  425.           EndIF ;
  426.        EndIF ;
  427.        If (Not repeatRegEx) or (not Compiled) ;
  428.       /EndIF
  429.           regExFlags = REG_EXTENDED + REG_NOSUB ;
  430.           If %Scan('i': currOpts) > *Zero ;
  431.              regExFlags += REG_ICASE ;
  432.           EndIF ;
  433.           If %Scan('n': currOpts) > *Zero ;
  434.              regExFlags += REG_NEWLINE ;
  435.           EndIF ;
  436.           If %Scan('m': currOpts) > *Zero ;
  437.              regExFlags += REG_NOTBOL + REG_NOTEOL ;
  438.           EndIF ;
  439.           rc = regcomp(myRegEx: parmRegEx: regExFlags) ;
  440.           Compiled = (rc = *Zero) ;
  441.           If Not Compiled ;
  442.              len = regerror(rc: myRegEx
  443.                           : %Addr(errBuff): %Size(errBuff)) ;
  444.              // If you're going to write the regcomp error to the
  445.              // joblog or whatever, here is the place to do that.
  446.              // Error message is %Subst(errBuff: 1: len)
  447.              rtnNull = IS_NULL ;
  448.              Return ;
  449.           EndIF ;
  450.           prevRegEx = parmRegEx ;
  451.           prevOpts  = currOpts ;
  452.       /IF NOT DEFINED(NO_MEM_LEAK_PLEASE)
  453.        EndIF ;
  454.       /ENDIF
  455.        If Compiled ;
  456.           rc = regexec(myRegEx      // Attempt to match string to pattern
  457.                      : parmValIn
  458.                      : *Zero
  459.                      : match
  460.                      : *Zero
  461.                       ) ;
  462.           parmIsLike = (rc = *Zero) ;
  463.        Else ;
  464.           rtnNull = IS_NULL ;
  465.        EndIF ;
  466.       /IF DEFINED(NO_MEM_LEAK_PLEASE)
  467.        regFree(myRegex) ;
  468.       /ENDIF
  469.       /End-free
  470.  
  471.      P regexp_like_opts...
  472.      P                 E
  473.  
  474.  
© 2004-2019 by midrange.com generated in 0.007s valid xhtml & css