[Sc-devel] regexp support revisited :)
Dan Stowell
danstowell at gmail.com
Tue Nov 20 21:27:30 PST 2007
Florian,
I'm just having a look at this code. The code looks fine, but I
wonder: why did you use boost regex rather than gnu regex? Gnu regex
comes bundled with the Mac OSX dev libs (because it's bundled with gcc
I think) so would be a really easy dependency, while boost would be
adding a proper extra dependency. I'm not very familiar with these
libs so maybe the answer is obvious. I'd prefer not to add
dependencies to SC without good reason though.
Dan
2007/11/4, Florian Schmidt <mista.tapas at gmx.net>:
> On Saturday 03 November 2007, Florian Schmidt wrote:
>
> > > Ok, this version (see attachment or below) shows this behaviour besides
> > > some additional bugfixes ;) This version also behaves more like the
> > > String.find() in that it doesn't return the match position relative to
> > > the offset anymore but also relative to the start of the string..
> >
> > Oops, this version has serious troubles with the GC ;) Any advice?
>
> Ok, latest version.. This one does actually seem to work :)
>
> Index: build/SCClassLibrary/Common/Collections/String.sc
> ===================================================================
> --- build/SCClassLibrary/Common/Collections/String.sc (revision 6504)
> +++ build/SCClassLibrary/Common/Collections/String.sc (working copy)
> @@ -126,6 +126,23 @@
> containsi { arg string, offset = 0;
> ^this.find(string, true, offset).notNil
> }
> +
> + findRegexp { arg regexp, offset = 0;
> + _String_FindRegexp
> + ^this.primitiveFailed
> + }
> + findAllRegexp { arg string, offset = 0;
> + var indices = [], i=[];
> + while {
> + i = this.findRegexp(string, offset);
> + i.notNil
> + }{
> + indices = indices.add(i);
> + offset = i[0][0] + 1;
> + }
> + ^indices
> + }
> +
> find { arg string, ignoreCase = false, offset = 0;
> _String_Find
> ^this.primitiveFailed
> Index: Source/lang/LangPrimSource/PyrStringPrim.cpp
> ===================================================================
> --- Source/lang/LangPrimSource/PyrStringPrim.cpp (revision 6504)
> +++ Source/lang/LangPrimSource/PyrStringPrim.cpp (working copy)
> @@ -40,6 +40,9 @@
> # include <regex.h>
> #endif
>
> +#include <boost/regex.hpp>
> +#include <string>
> +
> int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed);
> int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed)
> {
> @@ -178,6 +181,117 @@
> return(0);
> }
>
> +
> +int prString_FindRegexp(struct VMGlobals *g, int numArgsPushed)
> +{
> + int err;
> +
> + PyrSlot *a = g->sp - 2; // source string
> + PyrSlot *b = g->sp - 1; // pattern
> + PyrSlot *c = g->sp; // offset
> +
> + // std::cout << " num of args: " << g->numpop << std::endl;
> +
> + if (!isKindOfSlot(b, class_string) || (c->utag != tagInt)) return
> errWrongType;
> +
> + int offset = c->ui;
> +
> + char *string = (char*)malloc(a->uo->size + 1);
> + err = slotStrVal(a, string, a->uo->size + 1);
> + if (err) return err;
> +
> + char *pattern = (char*)malloc(b->uo->size + 1);
> + err = slotStrVal(b, pattern, b->uo->size + 1);
> + if (err) return err;
> +
> +
> + // std::cout << "input string: " << string << std::endl;
> + // std::cout << " pattern: " << pattern << std::endl;
> +
> + // std::cout << " offset: " << offset << std::endl;
> +
> + std::string stringstring(string);
> + std::string::const_iterator start, end;
> +
> + start = stringstring.begin() + offset;
> + end = stringstring.end();
> +
> + if (start >= end)
> + {
> + SetNil(a);
> + return errNone;
> + }
> +
> + try
> + {
> + boost::match_results<std::string::const_iterator> what;
> + boost::match_flag_type flags = boost::match_default;
> +
> + boost::regex expression(pattern);
> +
> + bool matched = boost::regex_search(start, end, what, expression, flags);
> +
> + PyrObject *result_array = newPyrArray(g->gc, what.size(), 0, true);
> + result_array->size = 0;
> +
> +
> + if (matched)
> + {
> + for (size_t i = 0; i < what.size(); ++i)
> + {
> + if (what[0].matched == false)
> + {
> + result_array->size++;
> + SetNil(result_array->slots+i);
> + }
> + else
> + {
> + result_array->size++;
> +
> + int match_start = what[i].first - start;
> + int match_length = what[i].second - what[i].first;
> +
> + char *match = (char*)malloc(match_length + 1);
> + strncpy(match, string + offset + match_start, match_length);
> + match[match_length] = 0;
> +
> + PyrObject *array = newPyrArray(g->gc, 2, 0, true);
> + array->size = 2;
> +
> + SetInt(array->slots, match_start + offset);
> +
> + PyrObject *matched_string = (PyrObject*)newPyrString(g->gc, match, 0,
> true);
> + SetObject(array->slots+1, matched_string);
> + g->gc->GCWrite(matched_string, array->slots + 1);
> +
> + SetObject(result_array->slots + i, array);
> + g->gc->GCWrite(array, result_array->slots + i);
> + }
> + }
> + }
> + else
> + {
> + SetNil(a);
> + return errNone;
> + }
> +
> + SetObject(a, result_array);
> + g->gc->GCWrite(result_array,a);
> +
> + return errNone;
> + }
> + catch (boost::regex_error e)
> + {
> + SetNil(a);
> + return errNone;
> + }
> +
> + SetNil(a);
> +
> + return errNone;
> +}
> +
> +
> int prString_Regexp(struct VMGlobals *g, int numArgsPushed)
> {
> int err, start, end;
> @@ -622,11 +736,12 @@
> definePrimitive(base, index++, "_String_AsFloat", prString_AsFloat, 1, 0);
> definePrimitive(base, index++, "_String_AsCompileString",
> prString_AsCompileString, 1, 0);
> definePrimitive(base, index++, "_String_Getenv", prString_Getenv, 1, 0);
> - definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
> - definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
> + definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
> + definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
> definePrimitive(base, index++, "_String_FindBackwards",
> prString_FindBackwards, 4, 0);
> - definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0);
> + definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0);
> definePrimitive(base, index++, "_String_Regexp", prString_Regexp, 4, 0);
> + definePrimitive(base, index++, "_String_FindRegexp", prString_FindRegexp, 3,
> 0);
> definePrimitive(base, index++, "_StripRtf", prStripRtf, 1, 0);
> definePrimitive(base, index++, "_String_GetResourceDirPath",
> prString_GetResourceDirPath, 1, 0);
> definePrimitive(base, index++, "_String_StandardizePath",
> prString_StandardizePath, 1, 0);
> Index: SConstruct
> ===================================================================
> --- SConstruct (revision 6504)
> +++ SConstruct (working copy)
> @@ -790,7 +790,7 @@
> '#Headers/server',
> '#Source/lang/LangSource/Bison'],
> CPPDEFINES = [['USE_SC_TERMINAL_CLIENT', env['TERMINAL_CLIENT']]],
> - LIBS = ['common', 'scsynth', 'pthread', 'dl', 'm'],
> + LIBS = ['common', 'scsynth', 'pthread', 'dl', 'm', 'boost_regex'],
> LIBPATH = 'build'
> )
> if PLATFORM == 'darwin':
>
>
> --
> Palimm Palimm!
> http://tapas.affenbande.org
>
> _______________________________________________
> Sc-devel mailing list
> Sc-devel at create.ucsb.edu
> http://www.create.ucsb.edu/mailman/listinfo/sc-devel
>
>
>
--
http://www.mcld.co.uk
More information about the Sc-devel
mailing list