[Sc-devel] regexp support revisited :)

Florian Schmidt mista.tapas at gmx.net
Sat Nov 3 16:35:18 PST 2007


On Saturday 03 November 2007, Florian Schmidt wrote:
> On Saturday 03 November 2007, Florian Schmidt wrote:
> > Erm, nah, my logic was just wrong ;)
> >
> > Soz for the noise :)
>
> Ok, here you go. Regular expression matching including subexpressions:

patch with a small bugfix included attached..

Index: build/SCClassLibrary/Common/Collections/String.sc
===================================================================
--- build/SCClassLibrary/Common/Collections/String.sc	(revision 6504)
+++ build/SCClassLibrary/Common/Collections/String.sc	(working copy)
@@ -126,6 +126,12 @@
 	containsi { arg string, offset = 0;
 		^this.find(string, true, offset).notNil
 	}
+  
+  findRegexp { arg regexp, offset = 0;
+	_String_FindRegexp
+	^this.primitiveFailed
+  }
+
 	find { arg string, ignoreCase = false, offset = 0;
 		_String_Find
 		^this.primitiveFailed
Index: Source/lang/LangPrimSource/PyrStringPrim.cpp
===================================================================
--- Source/lang/LangPrimSource/PyrStringPrim.cpp	(revision 6504)
+++ Source/lang/LangPrimSource/PyrStringPrim.cpp	(working copy)
@@ -40,6 +40,9 @@
 # include <regex.h>
 #endif
 
+#include <boost/regex.hpp>
+#include <string>
+
 int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed);
 int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed)
 {
@@ -178,6 +181,101 @@
 	return(0);
 }
 
+
+int prString_FindRegexp(struct VMGlobals *g, int numArgsPushed)
+{
+	int err;
+
+	PyrSlot *a = g->sp - 2; // source string
+	PyrSlot *b = g->sp - 1; // pattern
+	PyrSlot *c = g->sp;     // offset
+		
+	// std::cout << " num of args: " <<  g->numpop << std::endl;
+
+	if (!isKindOfSlot(b, class_string)) return errWrongType;
+
+	int offset = c->ui;
+
+	char *string = (char*)malloc(a->uo->size + 1);
+	err = slotStrVal(a, string, a->uo->size + 1);
+	if (err) return err;
+	
+	char *pattern = (char*)malloc(b->uo->size + 1);
+	err = slotStrVal(b, pattern, b->uo->size + 1);
+	if (err) return err;
+	
+
+	// std::cout << "input string: " << string << std::endl;
+	// std::cout << "     pattern: " << pattern << std::endl;
+
+	// std::cout << "      offset: " << offset << std::endl;	
+
+	std::string stringstring(string);
+	std::string::const_iterator start, end;
+
+	start = stringstring.begin() + offset;
+	end = stringstring.end();
+	
+	boost::match_results<std::string::const_iterator> what; 
+	boost::match_flag_type flags = boost::match_default; 
+
+	boost::regex expression(pattern);
+
+	int match_start = 0;
+	int match_length = 0;
+
+	bool matched = boost::regex_search(start, end, what, expression, flags);
+
+	PyrObject *result_array = newPyrArray(g->gc, what.size(), 0, true);
+	result_array->size = 0;
+
+
+	if (matched)
+	{
+		for (size_t i = 0; i < what.size(); ++i)
+		{
+			if (what[0].matched == false)
+			{
+				// std::cout << "oooh should we ever be here?" << std::endl;
+				result_array->size++;
+				SetNil(result_array->slots+i);
+			}
+			else
+			{
+				result_array->size++;
+
+				// std::cout << "match!!"  << std::endl;
+				match_start =  what[i].first - start;
+				match_length = what[i].second - what[i].first;
+
+				PyrObject *array = newPyrArray(g->gc, 2, 0, true);
+				array->size = 2;
+
+				SetInt(array->slots, match_start);
+				SetInt(array->slots + 1, match_length);
+
+				SetObject(result_array->slots + i, array);
+
+				// std::cout << match_start << std::endl;
+				// std::cout << match_length << std::endl;
+			}
+		}
+	}
+	else
+	{
+		// std::cout << "mismatch" << std::endl;
+		SetObject(a, result_array);
+		return errNone;
+	}
+	
+	SetObject(a, result_array);
+
+	//g->gc->GCWrite(array, a);
+
+	return errNone;
+}
+
+
 int prString_Regexp(struct VMGlobals *g, int numArgsPushed)
 {
 	int err, start, end;
@@ -622,11 +720,12 @@
 	definePrimitive(base, index++, "_String_AsFloat", prString_AsFloat, 1, 0);	
 	definePrimitive(base, index++, "_String_AsCompileString", 
prString_AsCompileString, 1, 0);	
 	definePrimitive(base, index++, "_String_Getenv", prString_Getenv, 1, 0);
-    definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
-    definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
+	definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
+	definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
 	definePrimitive(base, index++, "_String_FindBackwards", 
prString_FindBackwards, 4, 0);
-    definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0);
+	definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0);
 	definePrimitive(base, index++, "_String_Regexp", prString_Regexp, 4, 0);
+	definePrimitive(base, index++, "_String_FindRegexp", prString_FindRegexp, 3, 
0);
 	definePrimitive(base, index++, "_StripRtf", prStripRtf, 1, 0);
 	definePrimitive(base, index++, "_String_GetResourceDirPath", 
prString_GetResourceDirPath, 1, 0);
 	definePrimitive(base, index++, "_String_StandardizePath", 
prString_StandardizePath, 1, 0);	
Index: SConstruct
===================================================================
--- SConstruct	(revision 6504)
+++ SConstruct	(working copy)
@@ -790,7 +790,7 @@
                '#Headers/server',
                '#Source/lang/LangSource/Bison'],
     CPPDEFINES = [['USE_SC_TERMINAL_CLIENT', env['TERMINAL_CLIENT']]],
-    LIBS = ['common', 'scsynth', 'pthread', 'dl', 'm'],
+    LIBS = ['common', 'scsynth', 'pthread', 'dl', 'm', 'boost_regex'],
     LIBPATH = 'build'
     )
 if PLATFORM == 'darwin':



-- 
Palimm Palimm!
http://tapas.affenbande.org
-------------- next part --------------
A non-text attachment was scrubbed...
Name: sc-regexp-patch.diff
Type: text/x-diff
Size: 5173 bytes
Desc: not available
Url : http://www.create.ucsb.edu/pipermail/sc-devel/attachments/20071103/aea1c7e9/sc-regexp-patch.bin


More information about the Sc-devel mailing list