[Sc-devel] regexp support revisited :)
Florian Schmidt
mista.tapas at gmx.net
Sat Nov 3 22:54:40 PST 2007
On Saturday 03 November 2007, Florian Schmidt wrote:
> > e.g.
> > "sesame seeds".findRegExp("s.*e")
> > [[0, "se"], [2, "same"], [7, "see"]]
> >
> > This would give more flexibility (because of course it's easy to find
> > the lengths from the above result).
>
> Sure. I thought about this, too, for the same reason. But i was too lazy to
> look at how to create strings ;)
Ok, this version (see attachment or below) shows this behaviour besides some
additional bugfixes ;) This version also behaves more like the String.find()
in that it doesn't return the match position relative to the offset anymore
but also relative to the start of the string..
"foobar".findRegexp("o*bar")
[ [ 1, oobar ] ]
"32424 334 /**aaaaaa*/".findRegexp("/\\*\\*a*\\*/")
[ [ 10, /**aaaaaa*/ ] ]
"foobar".findRegexp("(o*)(bar)")
[ [ 1, oobar ], [ 1, oo ], [ 3, bar ] ]
"aaaabaaa".findAllRegexp("a+")
[ [ [ 0, aaaa ] ], [ [ 1, aaa ] ], [ [ 2, aa ] ], [ [ 3, a ] ], [ [ 5,
aaa ] ], [ [ 6, aa ] ], [ [ 7, a ] ] ]
Here's also some docs, which i don't know how to get into the Help file (using
a html-editor?? - yuk!!)
I guess one could think about exposing some more features of the boost regex
classes. Tell me what you think..
----------------------- SNIP
findRegexp(expression, offset)
POSIX regular expression matching using libboost. Returns a list of submatches
or nil on error. A submach consists of a List [offset, match]. The first
submatch simply consists of the matched strong of the whole expression, while
concurrent submatches consist of the matches corresponding to subexpressions.
Examples without subexpressions:
// Simple expression:
"foobar".findRegexp("o*bar")
// We have to escape special chars twice * -> \\*:
"32424 334 /**aaaaaa*/".findRegexp("/\\*\\*a*\\*/")
Examples with subexpressions (use brackets to mark subexpressions):
// This should make it obvious what happens :)
"foobar".findRegexp("(o*)(bar)")
findAllRegexp(expression, offset)
Iterates over the String to find all matches of a pattern (there might be some
characters of the source string contained in more than one match).
Examples:
"aaaabaaa".findAllRegexp("a+")
----------------------- SNIP
Index: build/SCClassLibrary/Common/Collections/String.sc
===================================================================
--- build/SCClassLibrary/Common/Collections/String.sc (revision 6504)
+++ build/SCClassLibrary/Common/Collections/String.sc (working copy)
@@ -126,6 +126,23 @@
containsi { arg string, offset = 0;
^this.find(string, true, offset).notNil
}
+
+ findRegexp { arg regexp, offset = 0;
+ _String_FindRegexp
+ ^this.primitiveFailed
+ }
+ findAllRegexp { arg string, offset = 0;
+ var indices = [], i=[];
+ while {
+ i = this.findRegexp(string, offset);
+ i.notNil
+ }{
+ indices = indices.add(i);
+ offset = i[0][0] + 1;
+ }
+ ^indices
+ }
+
find { arg string, ignoreCase = false, offset = 0;
_String_Find
^this.primitiveFailed
Index: Source/lang/LangPrimSource/PyrStringPrim.cpp
===================================================================
--- Source/lang/LangPrimSource/PyrStringPrim.cpp (revision 6504)
+++ Source/lang/LangPrimSource/PyrStringPrim.cpp (working copy)
@@ -40,6 +40,9 @@
# include <regex.h>
#endif
+#include <boost/regex.hpp>
+#include <string>
+
int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed);
int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed)
{
@@ -178,6 +181,114 @@
return(0);
}
+
+int prString_FindRegexp(struct VMGlobals *g, int numArgsPushed)
+{
+ int err;
+
+ PyrSlot *a = g->sp - 2; // source string
+ PyrSlot *b = g->sp - 1; // pattern
+ PyrSlot *c = g->sp; // offset
+
+ // std::cout << " num of args: " << g->numpop << std::endl;
+
+ if (!isKindOfSlot(b, class_string) || (c->utag != tagInt)) return
errWrongType;
+
+ int offset = c->ui;
+
+ char *string = (char*)malloc(a->uo->size + 1);
+ err = slotStrVal(a, string, a->uo->size + 1);
+ if (err) return err;
+
+ char *pattern = (char*)malloc(b->uo->size + 1);
+ err = slotStrVal(b, pattern, b->uo->size + 1);
+ if (err) return err;
+
+
+ // std::cout << "input string: " << string << std::endl;
+ // std::cout << " pattern: " << pattern << std::endl;
+
+ // std::cout << " offset: " << offset << std::endl;
+
+ std::string stringstring(string);
+ std::string::const_iterator start, end;
+
+ start = stringstring.begin() + offset;
+ end = stringstring.end();
+
+ if (start >= end)
+ {
+ SetNil(a);
+ return errNone;
+ }
+
+ try
+ {
+ boost::match_results<std::string::const_iterator> what;
+ boost::match_flag_type flags = boost::match_default;
+
+ boost::regex expression(pattern);
+
+ bool matched = boost::regex_search(start, end, what, expression, flags);
+
+ PyrObject *result_array = newPyrArray(g->gc, what.size(), 0, true);
+ result_array->size = 0;
+
+
+ if (matched)
+ {
+ for (size_t i = 0; i < what.size(); ++i)
+ {
+ if (what[0].matched == false)
+ {
+ result_array->size++;
+ SetNil(result_array->slots+i);
+ }
+ else
+ {
+ result_array->size++;
+
+ int match_start = what[i].first - start;
+ int match_length = what[i].second - what[i].first;
+
+ char *match = (char*)malloc(match_length + 1);
+ strncpy(match, string + offset + match_start, match_length);
+ match[match_length] = 0;
+
+ PyrObject *array = newPyrArray(g->gc, 2, 0, true);
+ array->size = 2;
+
+ SetInt(array->slots, match_start + offset);
+
+ PyrObject *matched_string = (PyrObject*)newPyrString(g->gc, match, 0,
true);
+ SetObject(array->slots+1, matched_string);
+
+ SetObject(result_array->slots + i, array);
+ }
+ }
+ }
+ else
+ {
+ SetNil(a);
+ return errNone;
+ }
+
+ SetObject(a, result_array);
+
+ return errNone;
+ }
+ catch (boost::regex_error e)
+ {
+ SetNil(a);
+ return errNone;
+ }
+
+ SetNil(a);
+
+ return errNone;
+}
+
+
int prString_Regexp(struct VMGlobals *g, int numArgsPushed)
{
int err, start, end;
@@ -622,11 +733,12 @@
definePrimitive(base, index++, "_String_AsFloat", prString_AsFloat, 1, 0);
definePrimitive(base, index++, "_String_AsCompileString",
prString_AsCompileString, 1, 0);
definePrimitive(base, index++, "_String_Getenv", prString_Getenv, 1, 0);
- definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
- definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
+ definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
+ definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
definePrimitive(base, index++, "_String_FindBackwards",
prString_FindBackwards, 4, 0);
- definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0);
+ definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0);
definePrimitive(base, index++, "_String_Regexp", prString_Regexp, 4, 0);
+ definePrimitive(base, index++, "_String_FindRegexp", prString_FindRegexp, 3,
0);
definePrimitive(base, index++, "_StripRtf", prStripRtf, 1, 0);
definePrimitive(base, index++, "_String_GetResourceDirPath",
prString_GetResourceDirPath, 1, 0);
definePrimitive(base, index++, "_String_StandardizePath",
prString_StandardizePath, 1, 0);
Index: SConstruct
===================================================================
--- SConstruct (revision 6504)
+++ SConstruct (working copy)
@@ -790,7 +790,7 @@
'#Headers/server',
'#Source/lang/LangSource/Bison'],
CPPDEFINES = [['USE_SC_TERMINAL_CLIENT', env['TERMINAL_CLIENT']]],
- LIBS = ['common', 'scsynth', 'pthread', 'dl', 'm'],
+ LIBS = ['common', 'scsynth', 'pthread', 'dl', 'm', 'boost_regex'],
LIBPATH = 'build'
)
if PLATFORM == 'darwin':
--
Palimm Palimm!
http://tapas.affenbande.org
-------------- next part --------------
A non-text attachment was scrubbed...
Name: sc-regexp-patch.diff
Type: text/x-diff
Size: 5547 bytes
Desc: not available
Url : http://www.create.ucsb.edu/pipermail/sc-devel/attachments/20071103/d78a4ae3/sc-regexp-patch-0001.bin
More information about the Sc-devel
mailing list