Summary of common functions of PostgreSQL regular expressions Regular expressions are undoubtedly a very useful tool for programs that require complex data processing. This article focuses on explaining some commonly used regular expression functions of PostgreSQL and some functions in the source code. Directory structure of the regular related part [root@localhost regex]# pwd /opt/hgdb-core/src/include/regex [root@localhost regex]# ll total 40 -rw-r--r--. 1 postgres postgres 3490 Mar 19 19:00 regcustom.h -rw-r--r--. 1 postgres postgres 1332 Mar 19 18:59 regerrs.h -rw-r--r--. 1 postgres postgres 6703 Mar 19 19:00 regex.h -rw-r--r--. 1 postgres postgres 2353 Mar 19 19:00 regexport.h -rw-r--r--. 1 postgres postgres 16454 Mar 19 19:00 regguts.h Regular expression compilation, matching, release, and error information related files will be introduced in detail later [root@localhost regex]# pwd /opt/hgdb-core/src/backend/regex [root@localhost regex]# ll reg*.c -rw-r--r--. 1 postgres postgres 55851 Mar 19 19:00 regcomp.c -rw-r--r--. 1 postgres postgres 3671 Mar 19 18:59 regerror.c -rw-r--r--. 1 postgres postgres 34873 Mar 19 19:00 regexec.c -rw-r--r--. 1 postgres postgres 2123 Mar 19 18:59 regfree.c [root@localhost regex]# Built-in functions are implemented in regexp.c [root@localhost adt]# pwd /opt/hgdb-core/src/backend/utils/adt [root@localhost adt]# ll regexp.c -rw-r--r--. 1 postgres postgres 34863 Apr 12 02:29 regexp.c [root@localhost adt]# Built-in function declaration: /* src/include/catalog/pg_proc.h */ DATA(insert OID = 2073 ( substring PGNSP PGUID 12 1 0 0 0 fffftfi 2 0 25 "25 25" _null_ _null_ _null_ _null_ _null_ textregexsubstr _null_ _null_ _null_ )); DESCR("extract text matching regular expression"); DATA(insert OID = 2074 ( substring PGNSP PGUID 14 1 0 0 0 fffftfi 3 0 25 "25 25 25" _null_ _null_ _null_ _null_ _null_ "select pg_catalog.substring($1, pg_catalog.similar_escape($2, $3))" _null_ _null_ _null_ )); DESCR("extract text matching SQL99 regular expression"); DATA(insert OID = 2284 ( regexp_replace PGNSP PGUID 12 1 0 0 0 fffftfi 3 0 25 "25 25 25" _null_ _null_ _null_ _null_ _null_ textregexreplace_noopt _null_ _null_ _null_ )); DESCR("replace text using regexp"); DATA(insert OID = 2285 ( regexp_replace PGNSP PGUID 12 1 0 0 0 fffftfi 4 0 25 "25 25 25 25" _null_ _null_ _null_ _null_ _null_ textregexreplace _null_ _null_ _null_ )); DESCR("replace text using regexp"); DATA(insert OID = 2763 ( regexp_matches PGNSP PGUID 12 1 1 0 0 fffftti 2 0 1009 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_matches_no_flags _null_ _null_ _null_ )); DESCR("find all match groups for regexp"); DATA(insert OID = 2764 ( regexp_matches PGNSP PGUID 12 1 10 0 0 fffftti 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ _null_ regexp_matches _null_ _null_ _null_ )); DESCR("find all match groups for regexp"); DATA(insert OID = 2765 ( regexp_split_to_table PGNSP PGUID 12 1 1000 0 0 fffftti 2 0 25 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_split_to_table_no_flags _null_ _null_ _null_ )); DESCR("split string by pattern"); DATA(insert OID = 2766 ( regexp_split_to_table PGNSP PGUID 12 1 1000 0 0 fffftti 3 0 25 "25 25 25" _null_ _null_ _null_ _null_ _null_ regexp_split_to_table _null_ _null_ _null_ )); DESCR("split string by pattern"); DATA(insert OID = 2767 ( regexp_split_to_array PGNSP PGUID 12 1 0 0 0 fffftfi 2 0 1009 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_split_to_array_no_flags _null_ _null_ _null_ )); DESCR("split string by pattern"); DATA(insert OID = 2768 ( regexp_split_to_array PGNSP PGUID 12 1 0 0 0 fffftfi 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ _null_ regexp_split_to_array _null_ _null_ _null_ )); Parameter types and return value types: postgres=# select oid,typname from pg_type where oid = 25 or oid = 1009; oid | typname ------+--------- 25 | text 1009 | _text (2 rows) The substring(string from pattern) function provides a method for extracting a substring from a string that matches a POSIX regular expression pattern. It returns NULL if there is no match, otherwise the part of the text that matched the pattern. The regexp_replace(source, pattern, replacement [, flags ]) function provides functionality to replace a substring matching a POSIX regular expression pattern with new text. The regexp_matches(string, pattern[, flags ]) function returns an array of text containing all substrings that match the POSIX regular expression pattern. The regexp_split_to_table(string, pattern[, flags ]) function splits a string using a POSIX regular expression pattern as a delimiter. The return result is a string. . The regexp_split_to_array (string, pattern[, flags ]) function behaves the same as regexp_split_to_table, but returns the result as a text array. Please refer to the user manual for specific usage. src/include/regex/regex.h regex_t structure /* the biggie, a compiled RE (or rather, a front end to same) */ typedef struct { int re_magic; /* magic number */ size_t re_nsub; /* number of subexpressions */ long re_info; /* information about RE */ #define REG_UBACKREF 000001 #define REG_ULOOKAHEAD 000002 #define REG_UBOUNDS 000004 #define REG_UBRACES 000010 #define REG_UBSALNUM 000020 #define REG_UPBOTCH 000040 #define REG_UBBS 000100 #define REG_UNONPOSIX 000200 #define REG_UUNSPEC 000400 #define REG_UUNPORT 001000 #define REG_ULOCALE 002000 #define REG_UEMPTYMATCH 004000 #define REG_UIMPOSSIBLE 010000 #define REG_USHORTEST 020000 int re_csize; /* sizeof(character) */ char *re_endp; /* backward compatibility kludge */ Oid re_collation; /* Collation that defines LC_CTYPE behavior */ /* the rest is opaque pointers to hidden innards */ char *re_guts; /* `char *' is more portable than `void *' */ char *re_fns; } regex_t; Store compiled regular expressions regmatch_t structure /* result reporting (may acquire more fields later) */ typedef struct { regoff_t rm_so; /* start of substring */ regoff_t rm_eo; /* end of substring */ } regmatch_t; typedef long regoff_t; The member rm_so stores the starting position of the matching text string in the target string, and rm_eo stores the ending position. Usually we define a group of such structures in the form of an array. There are several main function declarations: /* * the prototypes for exported functions */ extern int pg_regcomp(regex_t *, const pg_wchar *, size_t, int, Oid); extern int pg_regexec(regex_t *, const pg_wchar *, size_t, size_t, rm_detail_t *, size_t, regmatch_t[], int); extern int pg_regprefix(regex_t *, pg_wchar **, size_t *); extern void pg_regfree(regex_t *); extern size_t pg_regerror(int, const regex_t *, char *, size_t); extern void pg_set_regex_collation(Oid collation); Commonly used functions for processing regular expressions are pg_regcomp(), pg_regexec(), pg_regfree(), and pg_regerror(). General processing steps: compile regular expression pg_regcomp(), match regular expression pg_regexec(), release regular expression pg_regfree(). pg_regerror(): When an error occurs while executing regcomp or regexec, this function can be called to return a string containing the error message. Parameter Description int pg_regcomp(regex_t *re, const chr *string, /* regular expression string*/ size_t len, /* Regular expression string length*/ int flags, Oid collation) int pg_regexec(regex_t *re, /* Regular expression compiled with regcomp function*/ const chr *string, /* target string*/ size_t len, /* target string length*/ size_t search_start, /* Matching start position*/ rm_detail_t *details, /* NULL */ size_t nmatch, /* is the length of the regmatch_t structure array*/ regmatch_t pmatch[], /* regmatch_t type structure array, storing the position information of the matching text string*/ int flags) flags src/backend/utils/adt/regexp.c /* all the options of interest for regex functions */ typedef struct pg_re_flags { int cflags; /* compile flags for Spencer's regex code */ bool glob; /* do it globally (for each occurrence) */ } pg_re_flags; /* * parse_re_flags - parse the options argument of regexp_matches and friends * * flags --- output argument, filled with desired options * opts --- TEXT object, or NULL for defaults * * This accepts all the options allowed by any of the callers; callers that * don't want some have to reject them after the fact. */ static void parse_re_flags(pg_re_flags *flags, text *opts) { /* regex flavor is always folded into the compile flags */ flags->cflags = REG_ADVANCED; flags->glob = false; if (opts) { char *opt_p = VARDATA_ANY(opts); int opt_len = VARSIZE_ANY_EXHDR(opts); int i; for (i = 0; i < opt_len; i++) { switch (opt_p[i]) { case 'g': flags->glob = true; break; case 'b': /* BREs (but why???) */ flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED | REG_QUOTE); break; case 'c': /* case sensitive */ flags->cflags &= ~REG_ICASE; break; case 'e': /* plain EREs */ flags->cflags |= REG_EXTENDED; flags->cflags &= ~(REG_ADVANCED | REG_QUOTE); break; case 'i': /* case insensitive */ flags->cflags |= REG_ICASE; break; case 'm': /* Perloid synonym for n */ case 'n': /* \n affects ^ $ . [^ */ flags->cflags |= REG_NEWLINE; break; case 'p': /* ~Perl, \n affects . [^ */ flags->cflags |= REG_NLSTOP; flags->cflags &= ~REG_NLANCH; break; case 'q': /* literal string */ flags->cflags |= REG_QUOTE; flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED); break; case 's': /* single line, \n ordinary */ flags->cflags &= ~REG_NEWLINE; break; case 't': /* tight syntax */ flags->cflags &= ~REG_EXPANDED; break; case 'w': /* weird, \n affects ^ $ only */ flags->cflags &= ~REG_NLSTOP; flags->cflags |= REG_NLANCH; break; case 'x': /* expanded syntax */ flags->cflags |= REG_EXPANDED; break; default: ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid regexp option: \"%c\"", opt_p[i]))); break; } } } }
The above is a detailed example of the commonly used functions of PostgreSQL regular expressions. If you have any questions, please leave a message or discuss in the community of this site. Thank you for reading and I hope it can help you. Thank you for your support of this site! You may also be interested in:
|
>>: VUE+Canvas implements the sample code of the desktop pinball brick-breaking game
Table of contents Symbol Data Type The reason why...
Effect html <div class="sp-container"...
JavaScript clicks to change the shape of the pict...
Use the Linux chmod command to control who can ac...
Optgroup is used in the select tag to make the dro...
Case Description: - Use tables to achieve page ef...
virtualenv is a tool for creating isolated Python...
Whether the a tag opens a new page: (1) Baidu Ency...
1. Reason I just needed to reinstall MySQL on a n...
This tutorial shares the installation and configu...
Article Structure 1. Preparation 2. Install Java ...
The PC version of React was refactored to use Ama...
You can use yum to install all dependencies toget...
How to view files in a docker image 1. If it is a...
It is very common to highlight images on a page. ...