[Sc-devel] regexp support revisited :)

Dan Stowell danstowell at gmail.com
Sat Nov 3 16:07:08 PST 2007


Hi -

I'm currently ill so not doing any coding at the moment, so I can't
test, sorry. But this occurs to me: instead of returning the length of
the matched text, could you return the text itself?

e.g.
"sesame seeds".findRegExp("s.*e")
[[0, "se"], [2, "same"], [7, "see"]]

This would give more flexibility (because of course it's easy to find
the lengths from the above result).

Dan


2007/11/3, Florian Schmidt <mista.tapas at gmx.net>:
> On Saturday 03 November 2007, Florian Schmidt wrote:
> > On Saturday 03 November 2007, Florian Schmidt wrote:
> > > Erm, nah, my logic was just wrong ;)
> > >
> > > Soz for the noise :)
> >
> > Ok, here you go. Regular expression matching including subexpressions:
>
> patch with a small bugfix included attached..
>
> Index: build/SCClassLibrary/Common/Collections/String.sc
> ===================================================================
> --- build/SCClassLibrary/Common/Collections/String.sc   (revision 6504)
> +++ build/SCClassLibrary/Common/Collections/String.sc   (working copy)
> @@ -126,6 +126,12 @@
>         containsi { arg string, offset = 0;
>                 ^this.find(string, true, offset).notNil
>         }
> +
> +  findRegexp { arg regexp, offset = 0;
> +       _String_FindRegexp
> +       ^this.primitiveFailed
> +  }
> +
>         find { arg string, ignoreCase = false, offset = 0;
>                 _String_Find
>                 ^this.primitiveFailed
> Index: Source/lang/LangPrimSource/PyrStringPrim.cpp
> ===================================================================
> --- Source/lang/LangPrimSource/PyrStringPrim.cpp        (revision 6504)
> +++ Source/lang/LangPrimSource/PyrStringPrim.cpp        (working copy)
> @@ -40,6 +40,9 @@
>  # include <regex.h>
>  #endif
>
> +#include <boost/regex.hpp>
> +#include <string>
> +
>  int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed);
>  int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed)
>  {
> @@ -178,6 +181,101 @@
>         return(0);
>  }
>
> +
> +int prString_FindRegexp(struct VMGlobals *g, int numArgsPushed)
> +{
> +       int err;
> +
> +       PyrSlot *a = g->sp - 2; // source string
> +       PyrSlot *b = g->sp - 1; // pattern
> +       PyrSlot *c = g->sp;     // offset
> +
> +       // std::cout << " num of args: " <<  g->numpop << std::endl;
> +
> +       if (!isKindOfSlot(b, class_string)) return errWrongType;
> +
> +       int offset = c->ui;
> +
> +       char *string = (char*)malloc(a->uo->size + 1);
> +       err = slotStrVal(a, string, a->uo->size + 1);
> +       if (err) return err;
> +
> +       char *pattern = (char*)malloc(b->uo->size + 1);
> +       err = slotStrVal(b, pattern, b->uo->size + 1);
> +       if (err) return err;
> +
> +
> +       // std::cout << "input string: " << string << std::endl;
> +       // std::cout << "     pattern: " << pattern << std::endl;
> +
> +       // std::cout << "      offset: " << offset << std::endl;
> +
> +       std::string stringstring(string);
> +       std::string::const_iterator start, end;
> +
> +       start = stringstring.begin() + offset;
> +       end = stringstring.end();
> +
> +       boost::match_results<std::string::const_iterator> what;
> +       boost::match_flag_type flags = boost::match_default;
> +
> +       boost::regex expression(pattern);
> +
> +       int match_start = 0;
> +       int match_length = 0;
> +
> +       bool matched = boost::regex_search(start, end, what, expression, flags);
> +
> +       PyrObject *result_array = newPyrArray(g->gc, what.size(), 0, true);
> +       result_array->size = 0;
> +
> +
> +       if (matched)
> +       {
> +               for (size_t i = 0; i < what.size(); ++i)
> +               {
> +                       if (what[0].matched == false)
> +                       {
> +                               // std::cout << "oooh should we ever be here?" << std::endl;
> +                               result_array->size++;
> +                               SetNil(result_array->slots+i);
> +                       }
> +                       else
> +                       {
> +                               result_array->size++;
> +
> +                               // std::cout << "match!!"  << std::endl;
> +                               match_start =  what[i].first - start;
> +                               match_length = what[i].second - what[i].first;
> +
> +                               PyrObject *array = newPyrArray(g->gc, 2, 0, true);
> +                               array->size = 2;
> +
> +                               SetInt(array->slots, match_start);
> +                               SetInt(array->slots + 1, match_length);
> +
> +                               SetObject(result_array->slots + i, array);
> +
> +                               // std::cout << match_start << std::endl;
> +                               // std::cout << match_length << std::endl;
> +                       }
> +               }
> +       }
> +       else
> +       {
> +               // std::cout << "mismatch" << std::endl;
> +               SetObject(a, result_array);
> +               return errNone;
> +       }
> +
> +       SetObject(a, result_array);
> +
> +       //g->gc->GCWrite(array, a);
> +
> +       return errNone;
> +}
> +
> +
>  int prString_Regexp(struct VMGlobals *g, int numArgsPushed)
>  {
>         int err, start, end;
> @@ -622,11 +720,12 @@
>         definePrimitive(base, index++, "_String_AsFloat", prString_AsFloat, 1, 0);
>         definePrimitive(base, index++, "_String_AsCompileString",
> prString_AsCompileString, 1, 0);
>         definePrimitive(base, index++, "_String_Getenv", prString_Getenv, 1, 0);
> -    definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
> -    definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
> +       definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
> +       definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
>         definePrimitive(base, index++, "_String_FindBackwards",
> prString_FindBackwards, 4, 0);
> -    definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0);
> +       definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0);
>         definePrimitive(base, index++, "_String_Regexp", prString_Regexp, 4, 0);
> +       definePrimitive(base, index++, "_String_FindRegexp", prString_FindRegexp, 3,
> 0);
>         definePrimitive(base, index++, "_StripRtf", prStripRtf, 1, 0);
>         definePrimitive(base, index++, "_String_GetResourceDirPath",
> prString_GetResourceDirPath, 1, 0);
>         definePrimitive(base, index++, "_String_StandardizePath",
> prString_StandardizePath, 1, 0);
> Index: SConstruct
> ===================================================================
> --- SConstruct  (revision 6504)
> +++ SConstruct  (working copy)
> @@ -790,7 +790,7 @@
>                 '#Headers/server',
>                 '#Source/lang/LangSource/Bison'],
>      CPPDEFINES = [['USE_SC_TERMINAL_CLIENT', env['TERMINAL_CLIENT']]],
> -    LIBS = ['common', 'scsynth', 'pthread', 'dl', 'm'],
> +    LIBS = ['common', 'scsynth', 'pthread', 'dl', 'm', 'boost_regex'],
>      LIBPATH = 'build'
>      )
>  if PLATFORM == 'darwin':
>
>
>
> --
> Palimm Palimm!
> http://tapas.affenbande.org
>
> _______________________________________________
> Sc-devel mailing list
> Sc-devel at create.ucsb.edu
> http://www.create.ucsb.edu/mailman/listinfo/sc-devel
>
>
>


-- 
http://www.mcld.co.uk


More information about the Sc-devel mailing list