[Sc-devel] regexp support revisited :)
Florian Schmidt
mista.tapas at gmx.net
Sat Nov 3 16:12:45 PST 2007
On Saturday 03 November 2007, Florian Schmidt wrote:
> Erm, nah, my logic was just wrong ;)
>
> Soz for the noise :)
Ok, here you go. Regular expression matching including subexpressions:
"aaaafoobaaaarxxxxx".findRegexp("(fo*)(ba*)")
[ [ 4, 8 ], [ 4, 3 ], [ 7, 5 ] ]
First pair in the list is the full match. Second pair in the list fist
subexpression match, etc..
The pairs are start and length of match relative to offset:
"aaaafoobaaaarxxxxx".findRegexp("(fo*)(ba*)",3)
[ [ 1, 8 ], [ 1, 3 ], [ 4, 5 ] ]
I'm not too sure about the GC part of the story. So if anyone wants to look it
over, please go for it :)
Also one should probably check correctly for boost_regex and only
conditionally compile this code. I just added it to the common LIBS thing in
SConstruct [see bottom of patch]..
Patch included and attached..
Have fun,
Flo
Index: build/SCClassLibrary/Common/Collections/String.sc
===================================================================
--- build/SCClassLibrary/Common/Collections/String.sc (revision 6504)
+++ build/SCClassLibrary/Common/Collections/String.sc (working copy)
@@ -126,6 +126,12 @@
containsi { arg string, offset = 0;
^this.find(string, true, offset).notNil
}
+
+ findRegexp { arg regexp, offset = 0;
+ _String_FindRegexp
+ ^this.primitiveFailed
+ }
+
find { arg string, ignoreCase = false, offset = 0;
_String_Find
^this.primitiveFailed
Index: Source/lang/LangPrimSource/PyrStringPrim.cpp
===================================================================
--- Source/lang/LangPrimSource/PyrStringPrim.cpp (revision 6504)
+++ Source/lang/LangPrimSource/PyrStringPrim.cpp (working copy)
@@ -40,6 +40,9 @@
# include <regex.h>
#endif
+#include <boost/regex.hpp>
+#include <string>
+
int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed);
int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed)
{
@@ -178,6 +181,98 @@
return(0);
}
+
+int prString_FindRegexp(struct VMGlobals *g, int numArgsPushed)
+{
+ int err;
+
+ PyrSlot *a = g->sp - 2; // source string
+ PyrSlot *b = g->sp - 1; // pattern
+ PyrSlot *c = g->sp; // offset
+
+ // std::cout << " num of args: " << g->numpop << std::endl;
+
+ if (!isKindOfSlot(b, class_string)) return errWrongType;
+
+ int offset = c->ui;
+
+ char *string = (char*)malloc(a->uo->size + 1);
+ err = slotStrVal(a, string, a->uo->size + 1);
+ if (err) return err;
+
+ char *pattern = (char*)malloc(b->uo->size + 1);
+ err = slotStrVal(b, pattern, b->uo->size + 1);
+ if (err) return err;
+
+
+ // std::cout << "input string: " << string << std::endl;
+ // std::cout << " pattern: " << pattern << std::endl;
+
+ // std::cout << " offset: " << offset << std::endl;
+
+ std::string stringstring(string);
+ std::string::const_iterator start, end;
+
+ start = stringstring.begin() + offset;
+ end = stringstring.end();
+
+ boost::match_results<std::string::const_iterator> what;
+ boost::match_flag_type flags = boost::match_default;
+
+ boost::regex expression(pattern);
+
+ int match_start = 0;
+ int match_length = 0;
+
+ PyrObject *result_array = newPyrArray(g->gc, what.size(), 0, true);
+ result_array->size = 0;
+
+
+ if (boost::regex_search(start, end, what, expression, flags))
+ {
+ for (size_t i = 0; i < what.size(); ++i)
+ {
+ if (what[0].matched == false)
+ {
+ // std::cout << "oooh should we ever be here?" << std::endl;
+ return errNone;
+ }
+ else
+ {
+ result_array->size++;
+
+ // std::cout << "match!!" << std::endl;
+ match_start = what[i].first - start;
+ match_length = what[i].second - what[i].first;
+
+ PyrObject *array = newPyrArray(g->gc, 2, 0, true);
+ array->size = 2;
+
+ SetInt(array->slots, match_start);
+ SetInt(array->slots + 1, match_length);
+
+ SetObject(result_array->slots + i, array);
+
+ // std::cout << match_start << std::endl;
+ // std::cout << match_length << std::endl;
+ }
+ }
+ }
+ else
+ {
+ // std::cout << "mismatch" << std::endl;
+ SetObject(a, result_array);
+ return errNone;
+ }
+
+ SetObject(a, result_array);
+
+ //g->gc->GCWrite(array, a);
+
+ return errNone;
+}
+
+
int prString_Regexp(struct VMGlobals *g, int numArgsPushed)
{
int err, start, end;
@@ -622,11 +717,12 @@
definePrimitive(base, index++, "_String_AsFloat", prString_AsFloat, 1, 0);
definePrimitive(base, index++, "_String_AsCompileString",
prString_AsCompileString, 1, 0);
definePrimitive(base, index++, "_String_Getenv", prString_Getenv, 1, 0);
- definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
- definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
+ definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
+ definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
definePrimitive(base, index++, "_String_FindBackwards",
prString_FindBackwards, 4, 0);
- definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0);
+ definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0);
definePrimitive(base, index++, "_String_Regexp", prString_Regexp, 4, 0);
+ definePrimitive(base, index++, "_String_FindRegexp", prString_FindRegexp, 3,
0);
definePrimitive(base, index++, "_StripRtf", prStripRtf, 1, 0);
definePrimitive(base, index++, "_String_GetResourceDirPath",
prString_GetResourceDirPath, 1, 0);
definePrimitive(base, index++, "_String_StandardizePath",
prString_StandardizePath, 1, 0);
Index: SConstruct
===================================================================
--- SConstruct (revision 6504)
+++ SConstruct (working copy)
@@ -790,7 +790,7 @@
'#Headers/server',
'#Source/lang/LangSource/Bison'],
CPPDEFINES = [['USE_SC_TERMINAL_CLIENT', env['TERMINAL_CLIENT']]],
- LIBS = ['common', 'scsynth', 'pthread', 'dl', 'm'],
+ LIBS = ['common', 'scsynth', 'pthread', 'dl', 'm', 'boost_regex'],
LIBPATH = 'build'
)
if PLATFORM == 'darwin':
--
Palimm Palimm!
http://tapas.affenbande.org
-------------- next part --------------
A non-text attachment was scrubbed...
Name: sc-regexp-patch.diff
Type: text/x-diff
Size: 5102 bytes
Desc: not available
Url : http://www.create.ucsb.edu/pipermail/sc-devel/attachments/20071103/4e8dcc02/sc-regexp-patch.bin
More information about the Sc-devel
mailing list