[Sc-devel] regexp support revisited :)

Jan Trutzschler sc3 at sampleAndHold.org
Tue Nov 20 22:41:11 PST 2007


Hi,
this seems to go in the direction of the advanced find, which i'm  
about to commit the coming days. I did not go to deep into regex, but  
since advanced find has this abilities i would be adding:
libicucore.dylib , which is indeed in the system already (is that  
actually the gnu regex?)
along with
< http://aarone.org/cocoaicu/ >

Jan


On Nov 20, 2007, at 10:27 PM, Dan Stowell wrote:

> Florian,
>
> I'm just having a look at this code. The code looks fine, but I
> wonder: why did you use boost regex rather than gnu regex? Gnu regex
> comes bundled with the Mac OSX dev libs (because it's bundled with gcc
> I think) so would be a really easy dependency, while boost would be
> adding a proper extra dependency. I'm not very familiar with these
> libs so maybe the answer is obvious. I'd prefer not to add
> dependencies to SC without good reason though.
>
> Dan
>
> 2007/11/4, Florian Schmidt <mista.tapas at gmx.net>:
>> On Saturday 03 November 2007, Florian Schmidt wrote:
>>
>>>> Ok, this version (see attachment or below) shows this behaviour  
>>>> besides
>>>> some additional bugfixes ;) This version also behaves more like the
>>>> String.find() in that it doesn't return the match position  
>>>> relative to
>>>> the offset anymore but also relative to the start of the string..
>>>
>>> Oops, this version has serious troubles with the GC ;) Any advice?
>>
>> Ok, latest version.. This one does actually seem to work :)
>>
>> Index: build/SCClassLibrary/Common/Collections/String.sc
>> ===================================================================
>> --- build/SCClassLibrary/Common/Collections/String.sc   (revision  
>> 6504)
>> +++ build/SCClassLibrary/Common/Collections/String.sc   (working  
>> copy)
>> @@ -126,6 +126,23 @@
>>         containsi { arg string, offset = 0;
>>                 ^this.find(string, true, offset).notNil
>>         }
>> +
>> +  findRegexp { arg regexp, offset = 0;
>> +       _String_FindRegexp
>> +       ^this.primitiveFailed
>> +  }
>> +       findAllRegexp { arg string, offset = 0;
>> +               var indices = [], i=[];
>> +               while {
>> +                       i = this.findRegexp(string, offset);
>> +                       i.notNil
>> +               }{
>> +                       indices = indices.add(i);
>> +                       offset = i[0][0] + 1;
>> +               }
>> +               ^indices
>> +       }
>> +
>>         find { arg string, ignoreCase = false, offset = 0;
>>                 _String_Find
>>                 ^this.primitiveFailed
>> Index: Source/lang/LangPrimSource/PyrStringPrim.cpp
>> ===================================================================
>> --- Source/lang/LangPrimSource/PyrStringPrim.cpp        (revision  
>> 6504)
>> +++ Source/lang/LangPrimSource/PyrStringPrim.cpp        (working  
>> copy)
>> @@ -40,6 +40,9 @@
>>  # include <regex.h>
>>  #endif
>>
>> +#include <boost/regex.hpp>
>> +#include <string>
>> +
>>  int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed);
>>  int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed)
>>  {
>> @@ -178,6 +181,117 @@
>>         return(0);
>>  }
>>
>> +
>> +int prString_FindRegexp(struct VMGlobals *g, int numArgsPushed)
>> +{
>> +       int err;
>> +
>> +       PyrSlot *a = g->sp - 2; // source string
>> +       PyrSlot *b = g->sp - 1; // pattern
>> +       PyrSlot *c = g->sp;     // offset
>> +
>> +       // std::cout << " num of args: " <<  g->numpop << std::endl;
>> +
>> +       if (!isKindOfSlot(b, class_string) || (c->utag != tagInt))  
>> return
>> errWrongType;
>> +
>> +       int offset = c->ui;
>> +
>> +       char *string = (char*)malloc(a->uo->size + 1);
>> +       err = slotStrVal(a, string, a->uo->size + 1);
>> +       if (err) return err;
>> +
>> +       char *pattern = (char*)malloc(b->uo->size + 1);
>> +       err = slotStrVal(b, pattern, b->uo->size + 1);
>> +       if (err) return err;
>> +
>> +
>> +       // std::cout << "input string: " << string << std::endl;
>> +       // std::cout << "     pattern: " << pattern << std::endl;
>> +
>> +       // std::cout << "      offset: " << offset << std::endl;
>> +
>> +       std::string stringstring(string);
>> +       std::string::const_iterator start, end;
>> +
>> +       start = stringstring.begin() + offset;
>> +       end = stringstring.end();
>> +
>> +       if (start >= end)
>> +       {
>> +               SetNil(a);
>> +               return errNone;
>> +       }
>> +
>> +       try
>> +       {
>> +               boost::match_results<std::string::const_iterator>  
>> what;
>> +               boost::match_flag_type flags = boost::match_default;
>> +
>> +               boost::regex expression(pattern);
>> +
>> +               bool matched = boost::regex_search(start, end,  
>> what, expression, flags);
>> +
>> +               PyrObject *result_array = newPyrArray(g->gc,  
>> what.size(), 0, true);
>> +               result_array->size = 0;
>> +
>> +
>> +               if (matched)
>> +               {
>> +                       for (size_t i = 0; i < what.size(); ++i)
>> +                       {
>> +                               if (what[0].matched == false)
>> +                               {
>> +                                       result_array->size++;
>> +                                       SetNil(result_array->slots 
>> +i);
>> +                               }
>> +                               else
>> +                               {
>> +                                       result_array->size++;
>> +
>> +                                       int match_start =  what 
>> [i].first - start;
>> +                                       int match_length = what 
>> [i].second - what[i].first;
>> +
>> +                                       char *match = (char*)malloc 
>> (match_length + 1);
>> +                                       strncpy(match, string +  
>> offset + match_start, match_length);
>> +                                       match[match_length] = 0;
>> +
>> +                                       PyrObject *array =  
>> newPyrArray(g->gc, 2, 0, true);
>> +                                       array->size = 2;
>> +
>> +                                       SetInt(array->slots,  
>> match_start + offset);
>> +
>> +                                       PyrObject *matched_string  
>> = (PyrObject*)newPyrString(g->gc, match, 0,
>> true);
>> +                                       SetObject(array->slots+1,  
>> matched_string);
>> +                                       g->gc->GCWrite 
>> (matched_string, array->slots + 1);
>> +
>> +                                       SetObject(result_array- 
>> >slots + i, array);
>> +                                       g->gc->GCWrite(array,  
>> result_array->slots + i);
>> +                               }
>> +                       }
>> +               }
>> +               else
>> +               {
>> +                       SetNil(a);
>> +                       return errNone;
>> +               }
>> +
>> +               SetObject(a, result_array);
>> +               g->gc->GCWrite(result_array,a);
>> +
>> +               return errNone;
>> +       }
>> +       catch (boost::regex_error e)
>> +       {
>> +               SetNil(a);
>> +               return errNone;
>> +       }
>> +
>> +       SetNil(a);
>> +
>> +       return errNone;
>> +}
>> +
>> +
>>  int prString_Regexp(struct VMGlobals *g, int numArgsPushed)
>>  {
>>         int err, start, end;
>> @@ -622,11 +736,12 @@
>>         definePrimitive(base, index++, "_String_AsFloat",  
>> prString_AsFloat, 1, 0);
>>         definePrimitive(base, index++, "_String_AsCompileString",
>> prString_AsCompileString, 1, 0);
>>         definePrimitive(base, index++, "_String_Getenv",  
>> prString_Getenv, 1, 0);
>> -    definePrimitive(base, index++, "_String_Setenv",  
>> prString_Setenv, 2, 0);
>> -    definePrimitive(base, index++, "_String_Find", prString_Find,  
>> 4, 0);
>> +       definePrimitive(base, index++, "_String_Setenv",  
>> prString_Setenv, 2, 0);
>> +       definePrimitive(base, index++, "_String_Find",  
>> prString_Find, 4, 0);
>>         definePrimitive(base, index++, "_String_FindBackwards",
>> prString_FindBackwards, 4, 0);
>> -    definePrimitive(base, index++, "_String_Format",  
>> prString_Format, 2, 0);
>> +       definePrimitive(base, index++, "_String_Format",  
>> prString_Format, 2, 0);
>>         definePrimitive(base, index++, "_String_Regexp",  
>> prString_Regexp, 4, 0);
>> +       definePrimitive(base, index++, "_String_FindRegexp",  
>> prString_FindRegexp, 3,
>> 0);
>>         definePrimitive(base, index++, "_StripRtf", prStripRtf, 1,  
>> 0);
>>         definePrimitive(base, index++, "_String_GetResourceDirPath",
>> prString_GetResourceDirPath, 1, 0);
>>         definePrimitive(base, index++, "_String_StandardizePath",
>> prString_StandardizePath, 1, 0);
>> Index: SConstruct
>> ===================================================================
>> --- SConstruct  (revision 6504)
>> +++ SConstruct  (working copy)
>> @@ -790,7 +790,7 @@
>>                 '#Headers/server',
>>                 '#Source/lang/LangSource/Bison'],
>>      CPPDEFINES = [['USE_SC_TERMINAL_CLIENT', env 
>> ['TERMINAL_CLIENT']]],
>> -    LIBS = ['common', 'scsynth', 'pthread', 'dl', 'm'],
>> +    LIBS = ['common', 'scsynth', 'pthread', 'dl', 'm',  
>> 'boost_regex'],
>>      LIBPATH = 'build'
>>      )
>>  if PLATFORM == 'darwin':
>>
>>
>> --
>> Palimm Palimm!
>> http://tapas.affenbande.org
>>
>> _______________________________________________
>> Sc-devel mailing list
>> Sc-devel at create.ucsb.edu
>> http://www.create.ucsb.edu/mailman/listinfo/sc-devel
>>
>>
>>
>
>
> -- 
> http://www.mcld.co.uk
> _______________________________________________
> Sc-devel mailing list
> Sc-devel at create.ucsb.edu
> http://www.create.ucsb.edu/mailman/listinfo/sc-devel



More information about the Sc-devel mailing list