mirror of
				https://github.com/golang/go.git
				synced 2025-10-31 16:50:58 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			154 lines
		
	
	
	
		
			4.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			154 lines
		
	
	
	
		
			4.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
| Redistribution and use in source and binary forms, with or without
 | |
| modification, are permitted provided that the following conditions are met:
 | |
| 
 | |
|     * Redistributions of source code must retain the above copyright
 | |
|     notice, this list of conditions and the following disclaimer.
 | |
| 
 | |
|     * Redistributions in binary form must reproduce the above copyright
 | |
|     notice, this list of conditions and the following disclaimer in the
 | |
|     documentation and/or other materials provided with the distribution.
 | |
| 
 | |
|     * Neither the name of "The Computer Language Benchmarks Game" nor the
 | |
|     name of "The Computer Language Shootout Benchmarks" nor the names of
 | |
|     its contributors may be used to endorse or promote products derived
 | |
|     from this software without specific prior written permission.
 | |
| 
 | |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 | |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | |
| ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 | |
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 | |
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 | |
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 | |
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 | |
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 | |
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 | |
| POSSIBILITY OF SUCH DAMAGE.
 | |
| */
 | |
| 
 | |
| /*
 | |
| ** The Computer Language Shootout
 | |
| ** http://shootout.alioth.debian.org/
 | |
| ** contributed by Mike Pall
 | |
| **
 | |
| ** regex-dna benchmark using PCRE
 | |
| **
 | |
| ** compile with:
 | |
| **   gcc -O3 -fomit-frame-pointer -o regexdna regexdna.c -lpcre
 | |
| */
 | |
| 
 | |
| #define __USE_STRING_INLINES
 | |
| #include <stdio.h>
 | |
| #include <string.h>
 | |
| #include <stdlib.h>
 | |
| #include <pcre.h>
 | |
| 
 | |
| typedef struct fbuf {
 | |
|   char *buf;
 | |
|   size_t size, len;
 | |
| } fbuf_t;
 | |
| 
 | |
| static void fb_init(fbuf_t *b)
 | |
| {
 | |
|   b->buf = NULL;
 | |
|   b->len = b->size = 0;
 | |
| }
 | |
| 
 | |
| static char *fb_need(fbuf_t *b, size_t need)
 | |
| {
 | |
|   need += b->len;
 | |
|   if (need > b->size) {
 | |
|     if (b->size == 0) b->size = need;
 | |
|     else while (need > b->size) b->size += b->size;
 | |
|     if (!(b->buf = realloc(b->buf, b->size))) exit(1);
 | |
|   }
 | |
|   return b->buf+b->len;
 | |
| }
 | |
| 
 | |
| #define FB_MINREAD	(3<<16)
 | |
| 
 | |
| /* Read all of a stdio stream into dst buffer. */
 | |
| static size_t fb_readall(fbuf_t *dst, FILE *fp)
 | |
| {
 | |
|   char *dp;
 | |
|   int n;
 | |
|   for (dp = fb_need(dst, FB_MINREAD);
 | |
|        (n = fread(dp, 1, dst->size-dst->len, fp)) > 0;
 | |
|        dp = fb_need(dst, FB_MINREAD)) dst->len += n;
 | |
|   if (ferror(fp)) exit(1);
 | |
|   return dst->len;
 | |
| }
 | |
| 
 | |
| /* Substitute pattern p with replacement r, copying from src to dst buffer. */
 | |
| static size_t fb_subst(fbuf_t *dst, fbuf_t *src, const char *p, const char *r)
 | |
| {
 | |
|   pcre *re;
 | |
|   pcre_extra *re_ex;
 | |
|   const char *re_e;
 | |
|   char *dp;
 | |
|   int re_eo, m[3], pos, rlen, clen;
 | |
|   if (!(re = pcre_compile(p, PCRE_CASELESS, &re_e, &re_eo, NULL))) exit(1);
 | |
|   re_ex = pcre_study(re, 0, &re_e);
 | |
|   for (dst->len = 0, rlen = strlen(r), pos = 0;
 | |
|        pcre_exec(re, re_ex, src->buf, src->len, pos, 0, m, 3) >= 0;
 | |
|        pos = m[1]) {
 | |
|     clen = m[0]-pos;
 | |
|     dp = fb_need(dst, clen+rlen);
 | |
|     dst->len += clen+rlen;
 | |
|     memcpy(dp, src->buf+pos, clen);
 | |
|     memcpy(dp+clen, r, rlen);
 | |
|   }
 | |
|   clen = src->len-pos;
 | |
|   dp = fb_need(dst, clen);
 | |
|   dst->len += clen;
 | |
|   memcpy(dp, src->buf+pos, clen);
 | |
|   return dst->len;
 | |
| }
 | |
| 
 | |
| /* Count all matches with pattern p in src buffer. */
 | |
| static int fb_countmatches(fbuf_t *src, const char *p)
 | |
| {
 | |
|   pcre *re;
 | |
|   pcre_extra *re_ex;
 | |
|   const char *re_e;
 | |
|   int re_eo, m[3], pos, count;
 | |
|   if (!(re = pcre_compile(p, PCRE_CASELESS, &re_e, &re_eo, NULL))) exit(1);
 | |
|   re_ex = pcre_study(re, 0, &re_e);
 | |
|   for (count = 0, pos = 0;
 | |
|        pcre_exec(re, re_ex, src->buf, src->len, pos, 0, m, 3) >= 0;
 | |
|        pos = m[1]) count++;
 | |
|   return count;
 | |
| }
 | |
| 
 | |
| static const char *variants[] = {
 | |
|   "agggtaaa|tttaccct",         "[cgt]gggtaaa|tttaccc[acg]",
 | |
|   "a[act]ggtaaa|tttacc[agt]t", "ag[act]gtaaa|tttac[agt]ct",
 | |
|   "agg[act]taaa|ttta[agt]cct", "aggg[acg]aaa|ttt[cgt]ccct",
 | |
|   "agggt[cgt]aa|tt[acg]accct", "agggta[cgt]a|t[acg]taccct",
 | |
|   "agggtaa[cgt]|[acg]ttaccct", NULL
 | |
| };
 | |
| 
 | |
| static const char *subst[] = {
 | |
|   "B", "(c|g|t)", "D", "(a|g|t)",   "H", "(a|c|t)", "K", "(g|t)",
 | |
|   "M", "(a|c)",   "N", "(a|c|g|t)", "R", "(a|g)",   "S", "(c|g)",
 | |
|   "V", "(a|c|g)", "W", "(a|t)",     "Y", "(c|t)",   NULL
 | |
| };
 | |
| 
 | |
| int main(int argc, char **argv)
 | |
| {
 | |
|   fbuf_t seq[2];
 | |
|   const char **pp;
 | |
|   size_t ilen, clen, slen;
 | |
|   int flip;
 | |
|   fb_init(&seq[0]);
 | |
|   fb_init(&seq[1]);
 | |
|   ilen = fb_readall(&seq[0], stdin);
 | |
|   clen = fb_subst(&seq[1], &seq[0], ">.*|\n", "");
 | |
|   for (pp = variants; *pp; pp++)
 | |
|     printf("%s %d\n", *pp, fb_countmatches(&seq[1], *pp));
 | |
|   for (slen = 0, flip = 1, pp = subst; *pp; pp += 2, flip = 1-flip)
 | |
|     slen = fb_subst(&seq[1-flip], &seq[flip], *pp, pp[1]);
 | |
|   printf("\n%zu\n%zu\n%zu\n", ilen, clen, slen);
 | |
|   return 0;
 | |
| }
 | 
