[Sc-devel] regexp support revisited :)
Jan Trutzschler
sc3 at sampleAndHold.org
Wed Nov 21 13:43:01 PST 2007
ported a version to icu, so it would go nicely along with the
advanced find ...
diff not attached, since i had to move it into another file, because
of tricky linking issues.
code is more or less taken from Flo, only replaced all the regex calls.
On Nov 21, 2007, at 1:10 AM, Florian Schmidt wrote:
> On Tuesday 20 November 2007, Dan Stowell wrote:
>> Florian,
>>
>> I'm just having a look at this code. The code looks fine, but I
>> wonder: why did you use boost regex rather than gnu regex? Gnu regex
>
> Simply because of the ease of using it.. And because i thought
> boost_regex
> wasn't such a bad dependency because it's all standard c++ and thus
> pretty
> much perfectly portable.. Plus boos regex has some more features which
> weren't yet exposed..
>
>> comes bundled with the Mac OSX dev libs (because it's bundled with
>> gcc
>> I think) so would be a really easy dependency, while boost would be
>> adding a proper extra dependency. I'm not very familiar with these
>> libs so maybe the answer is obvious. I'd prefer not to add
>> dependencies to SC without good reason though.
>
> I can understand that.. Here's a glibc version. This is largely
> untested
> because it's a ten minute hack ;) You guys probably want to take a
> look at
> the regexp manpage and think about exposing some of the more advanced
> options.. This code has REG_EXTENDED turned on for now.. I also
> feel a bit
> uneasy about the macro i define [and later undefine].. Changes anyone?
>
> Flo
>
> Index: Source/lang/LangPrimSource/PyrStringPrim.cpp
> ===================================================================
> --- Source/lang/LangPrimSource/PyrStringPrim.cpp (revision 6504)
> +++ Source/lang/LangPrimSource/PyrStringPrim.cpp (working copy)
> @@ -40,6 +40,8 @@
> # include <regex.h>
> #endif
>
> +#include <string>
> +
> int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed);
> int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed)
> {
> @@ -178,6 +180,102 @@
> return(0);
> }
>
> +int prString_FindRegexp(struct VMGlobals *g, int numArgsPushed)
> +{
> + int err;
> +
> + PyrSlot *a = g->sp - 2; // source string
> + PyrSlot *b = g->sp - 1; // pattern
> + PyrSlot *c = g->sp; // offset
> +
> + // std::cout << " num of args: " << g->numpop << std::endl;
> +
> + if (!isKindOfSlot(b, class_string) || (c->utag != tagInt)) return
> errWrongType;
> +
> + int offset = c->ui;
> +
> + char *string = (char*)malloc(a->uo->size + 1);
> + err = slotStrVal(a, string, a->uo->size + 1);
> + if (err) return err;
> +
> + if (offset > strlen(string))
> + {
> + free(string);
> + SetNil(a);
> + return errNone;
> + }
> +
> + char *pattern = (char*)malloc(b->uo->size + 1);
> + err = slotStrVal(b, pattern, b->uo->size + 1);
> + if (err) return err;
> +
> + regex_t compiled_pattern;
> +
> + /* Need different options, see man regcomp ;) */
> + if (regcomp(&compiled_pattern, pattern, REG_EXTENDED) != 0)
> + {
> + free(string);
> + free(pattern);
> + SetNil(a);
> + return errNone;
> + }
> +
> + // TODO: fix arbitrary limit here..
> + #define MAX_NUM_OF_MATCHES 100
> + regmatch_t matches[MAX_NUM_OF_MATCHES];
> +
> + /* want more options, see man regexec */
> + if (regexec(&compiled_pattern, string + offset,
> MAX_NUM_OF_MATCHES, matches,
> 0) != 0)
> + {
> + free(string);
> + free(pattern);
> + SetNil(a);
> + return errNone;
> + }
> +
> + // std::cout << "input string: " << string << std::endl;
> + // std::cout << " pattern: " << pattern << std::endl;
> +
> + // std::cout << " offset: " << offset << std::endl;
> +
> +
> + PyrObject *result_array = newPyrArray(g->gc, MAX_NUM_OF_MATCHES,
> 0, true);
> + result_array->size = 0;
> +
> + for (size_t i = 0; (matches[i].rm_so != -1) && (i <
> MAX_NUM_OF_MATCHES);
> i++)
> + {
> + result_array->size++;
> +
> + int match_start = matches[i].rm_so;
> + int match_length = matches[i].rm_eo - matches[i].rm_so;
> +
> + char *match = (char*)malloc(match_length + 1);
> + strncpy(match, string + offset + match_start, match_length);
> + match[match_length] = 0;
> +
> + PyrObject *array = newPyrArray(g->gc, 2, 0, true);
> + array->size = 2;
> +
> + SetInt(array->slots, match_start + offset);
> +
> + PyrObject *matched_string = (PyrObject*)newPyrString(g->gc,
> match, 0,
> true);
> + SetObject(array->slots+1, matched_string);
> + g->gc->GCWrite(matched_string, array->slots + 1);
> +
> + SetObject(result_array->slots + i, array);
> + g->gc->GCWrite(array, result_array->slots + i);
> + }
> +
> + SetObject(a, result_array);
> + g->gc->GCWrite(result_array,a);
> +
> + #undef MAX_NUM_OF_MATCHES
> + free(string);
> + free(pattern);
> +
> + return errNone;
> +}
> +
> int prString_Regexp(struct VMGlobals *g, int numArgsPushed)
> {
> int err, start, end;
> @@ -622,11 +720,12 @@
> definePrimitive(base, index++, "_String_AsFloat",
> prString_AsFloat, 1, 0);
> definePrimitive(base, index++, "_String_AsCompileString",
> prString_AsCompileString, 1, 0);
> definePrimitive(base, index++, "_String_Getenv", prString_Getenv,
> 1, 0);
> - definePrimitive(base, index++, "_String_Setenv",
> prString_Setenv, 2, 0);
> - definePrimitive(base, index++, "_String_Find", prString_Find,
> 4, 0);
> + definePrimitive(base, index++, "_String_Setenv", prString_Setenv,
> 2, 0);
> + definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
> definePrimitive(base, index++, "_String_FindBackwards",
> prString_FindBackwards, 4, 0);
> - definePrimitive(base, index++, "_String_Format",
> prString_Format, 2, 0);
> + definePrimitive(base, index++, "_String_Format", prString_Format,
> 2, 0);
> definePrimitive(base, index++, "_String_Regexp", prString_Regexp,
> 4, 0);
> + definePrimitive(base, index++, "_String_FindRegexp",
> prString_FindRegexp, 3,
> 0);
> definePrimitive(base, index++, "_StripRtf", prStripRtf, 1, 0);
> definePrimitive(base, index++, "_String_GetResourceDirPath",
> prString_GetResourceDirPath, 1, 0);
> definePrimitive(base, index++, "_String_StandardizePath",
> prString_StandardizePath, 1, 0);
>
>
> --
> Palimm Palimm!
> http://tapas.affenbande.org
> <sc-regexp-patch_posix.diff>
> _______________________________________________
> Sc-devel mailing list
> Sc-devel at create.ucsb.edu
> http://www.create.ucsb.edu/mailman/listinfo/sc-devel
More information about the Sc-devel
mailing list