C/C++ Regex Replace Function

The aim is to implement a regex replace function that works in C/C++. Similar to php's preg_replace and perl's '=~'. The function is called regexreplace and supports groupings and back references. This function uses the regexec and regcomp functions, is simple easy to use and doesn't need Boost! One C/C++ function that does regular expression matching and replacement.

int regexreplace(char *pattern, char* replacement, char* instring,  char** outstring, int maxlength);

It accepts a pattern to be matched, the replacement, the input string a char** outstring which will contain the result and the maxlength is the limit of the outstring.

Here is an example of how it can be used. Note the #include "regexreplace.h" that is required as the functions are defined in there.
 
 1. #include <stdlib.h>
 2. #include <stdio.h>
 3. #include <string.h>
 4. 
 5. #include "regexreplace.h"
 6. 
 7. int main(int vv, char** cc){
 8. 
 9.     char* result;
10. 
11.     if(regexreplace("shaped_\\([0-9]*\\)_\\([0-9]*\\)", "quota_policer(\\1,\\2)", "shaped_500_400  and shaped_900_100", &result, 256)==0){
12.         printf("====>%s<===\n",result); 
13.         free(result);
14.     }
15. 
16.     if(regexreplace("\\([0-9][0-9]*\\)_\\([abc]\\)", "quota_policer(\\1,\\2)", "hello_100_c mr barbie 987_a naf", &result, 256)==0){
17.         printf("====>%s<===\n",result); 
18.         free(result);
19.     }
20. 
21.     if(regexreplace("\\([0-9][0-9]*\\)_\\([abc]\\)", "quota_policer(\\1,\\2-\\3)", "hello_100_c mr barbie 987_a naf", &result, 256)==0){
22.         printf("====>%s<===\n",result); 
23.         free(result);
24.     }
25. 
26.     if(regexreplace("\\([0-9][0-9]*\\)_\\([abc]\\)", "", "hello_100_c mr barbie 987_a naf", &result, 256)==0){
27.         printf("====>%s<===\n",result); 
28.         free(result);
29.     }
30. 
31.     if(regexreplace("shaped_\\([0-9]*\\)_\\([0-9]*\\)", "quota_policer(\\1,\\2)", "shaped_500_400  and shaped_900_100", &result, 24)==0){
32.         printf("====>%s<===\n",result); 
33.         free(result);
34.     }
35. }
Hide line numbers

Below is the listing of the headerfile regexreplace.h. The idea was to keep it simple and not have to arrays and copy between arrays.
 1. #ifndef REGEXREPLACE_H
 2. #define REGEXREPLACE
 3. #include <string.h>
 4. #include <stdlib.h>
 5. #include <regex.h>
 6. 
 7. #define MAX_NMATCH    10
 8. typedef struct{
 9.     int offset;
10.     regmatch_t pmatch[MAX_NMATCH];
11. } regmatch_tt;
12. /* Move a string into a string */
13. int s1(char* s, int sl, char* r, int rl){
14.     memmove(s+rl, s+sl, strlen(s)-sl+1);
15.     memcpy(s, r, rl);
16.     return rl-sl;    
17. }
18. /* Handle realloc */
19. int reallocx(char** x, int s, int max){
20.     char *tmp;
21.     if(s>max)
22.         return -1;
23.     if((tmp = realloc(*x, s))!=0){
24.         *x = tmp;
25.         return 0; 
26.     }
27.     else
28.         return -1; 
29. } 
30. /* Use regexec, but to match all occurrences */
31. int regexec_matchall(regex_t* pregx, const char* instring, size_t nmatch_t, regmatch_tt * pmatcht, int flags ){
32.     int offset, counter;
33.     offset = 0;
34.     counter = 0;
35.     while (regexec (pregx, instring+offset, MAX_NMATCH, pmatcht[counter].pmatch, REG_EXTENDED)==0 && counter<nmatch_t) {  /* While matches found. */
36.         pmatcht[counter].offset = offset;
37.         offset+= pmatcht[counter].pmatch[0].rm_eo;
38.         counter++;
39.     }
40.     while(counter<nmatch_t-1){
41.         pmatcht[counter].offset=-1;
42.         counter++;
43.     }
44.     if(offset == 0)
45.         return -1;
46.     else
47.         return 0;
48. }
49. /* The function that actually does the work */
50. int preg_replace(regex_t* pregxo, char* replacement,  regex_t*  pregx, char* tomatchx, char** out, int maxlength){
51. 
52.     int i,j,k, offset, offset2; 
53. 
54.     regmatch_tt pmatcht[MAX_NMATCH];
55.     regmatch_tt pmatchto[MAX_NMATCH];
56. 
57.     regmatch_tt* pt, *mt, *xt;
58.     regmatch_t * p,  *m,  *x;
59. 
60.     char    tmpvalue2[256];
61.     char    *c;
62.     char    *tomatch = 0;
63.     int    deltalen;
64.     char    *tmpreplacement = 0;
65. 
66.     if(reallocx(&tomatch, strlen(tomatchx)+1,maxlength)==-1)
67.         goto FINISHED;
68.     else
69.         strcpy(tomatch, tomatchx);
70. 
71. 
72.     if(regexec_matchall(pregxo, replacement, 10, pmatchto, 0)!=0){
73.     }
74.     if(regexec_matchall(pregx, tomatch, 10, pmatcht, 0)!=0){
75.         return -1;
76.     }
77. 
78.     offset=0;
79. 
80.     for(i = 0 ; i < MAX_NMATCH &&  pmatcht[i].offset!=-1; i++){
81.         if(reallocx(&tmpreplacement, strlen(replacement)+1,maxlength)==-1)
82.             goto FINISHED;
83.         else
84.             strcpy(tmpreplacement, replacement);
85.         offset2=0;
86. 
87.         for( k = 0 ;  k < MAX_NMATCH && pmatchto[k].offset !=-1 ;k ++){
88.             pt = &pmatchto[k];
89.             p = &pt->pmatch[0];
90.             
91.             memset(tmpvalue2, 0, 256);
92.             strncpy(tmpvalue2, &tmpreplacement[p->rm_so+pt->offset+offset2], p->rm_eo-p->rm_so);
93. 
94.             while((c=strchr(tmpvalue2, '\\'))!=0)
95.                 *c='0';
96.             j = atoi(tmpvalue2);
97.             if(j< MAX_NMATCH && pmatcht[i].pmatch[j].rm_so !=-1 ){
98.                 mt = &pmatcht[i];
99.                 m = &mt->pmatch[j];
100.                 deltalen=(m->rm_eo-m->rm_so)-(p->rm_eo-p->rm_so);
101.                 if(deltalen>0 && reallocx(&tmpreplacement,strlen(tmpreplacement)+deltalen+1, maxlength)==-1)
102.                     goto FINISHED;
103.                 offset2+=s1(&tmpreplacement[p->rm_so+pt->offset+offset2], p->rm_eo-p->rm_so, 
104.                         &tomatch[m->rm_so+mt->offset+offset], m->rm_eo-m->rm_so);
105.             }
106.             else{
107.                 char* x ="";
108.                 offset2+=s1(&tmpreplacement[p->rm_so+pt->offset+offset2], p->rm_eo-p->rm_so, x, 0);
109.             }
110.         }    
111.         xt = &pmatcht[i];
112.         x = &xt->pmatch[0];
113. 
114.         deltalen=(strlen(tmpreplacement)-(x->rm_eo-x->rm_so));
115.         if(deltalen> 0 && reallocx(&tomatch, strlen(tomatch)+deltalen+1,maxlength)==-1)
116.             goto FINISHED;
117.         offset+=s1(&tomatch[x->rm_so+xt->offset+offset], x->rm_eo-x->rm_so, tmpreplacement, strlen(tmpreplacement ) );
118.         free(tmpreplacement);
119.         tmpreplacement=0;
120. 
121.     }
122.     *out=tomatch;
123.     return 0;
124. FINISHED:
125.     if(tomatch)
126.         free(tomatch);
127.     if(tmpreplacement)
128.         free(tmpreplacement);
129.     return -1;
130. }
131. /* The regexec replace function */
132. int regexreplace(char * pattern, char* replacement, char* instring,  char** outstring, int maxlength){
133.     regex_t pregx, pregxo;
134.     int retvalue;
135.     const char* patterno = "\\(\\\\[0-9][0-9]*\\)";
136.     if(regcomp(&pregxo, patterno, 0)!=0){
137.         return -1; }
138.     if(regcomp(&pregx, pattern, 0)!=0){
139.         return -1; }
140.     retvalue=preg_replace(&pregxo, replacement,  &pregx, instring, outstring, maxlength);
141.     regfree(&pregxo);
142.     regfree(&pregx);
143.     return retvalue;
144. }
145. #endif
Hide line numbers