1 /**
2  * Contains the obsolete pattern matching functions from Phobos'
3  * `std.string`.
4  */
5 module undead..string;
6 
7 import std.traits;
8 
9 /***********************************************
10  * See if character c is in the pattern.
11  * Patterns:
12  *
13  *  A $(I pattern) is an array of characters much like a $(I character
14  *  class) in regular expressions. A sequence of characters
15  *  can be given, such as "abcde". The '-' can represent a range
16  *  of characters, as "a-e" represents the same pattern as "abcde".
17  *  "a-fA-F0-9" represents all the hex characters.
18  *  If the first character of a pattern is '^', then the pattern
19  *  is negated, i.e. "^0-9" means any character except a digit.
20  *  The functions inPattern, $(B countchars), $(B removeschars),
21  *  and $(B squeeze) use patterns.
22  *
23  * Note: In the future, the pattern syntax may be improved
24  *  to be more like regular expression character classes.
25  */
26 bool inPattern(S)(dchar c, in S pattern) @safe pure @nogc
27 if (isSomeString!S)
28 {
29     bool result = false;
30     int range = 0;
31     dchar lastc;
32 
33     foreach (size_t i, dchar p; pattern)
34     {
35         if (p == '^' && i == 0)
36         {
37             result = true;
38             if (i + 1 == pattern.length)
39                 return (c == p);    // or should this be an error?
40         }
41         else if (range)
42         {
43             range = 0;
44             if (lastc <= c && c <= p || c == p)
45                 return !result;
46         }
47         else if (p == '-' && i > result && i + 1 < pattern.length)
48         {
49             range = 1;
50             continue;
51         }
52         else if (c == p)
53             return !result;
54         lastc = p;
55     }
56     return result;
57 }
58 
59 
60 @safe pure @nogc unittest
61 {
62     assertCTFEable!(
63     {
64     assert(inPattern('x', "x") == 1);
65     assert(inPattern('x', "y") == 0);
66     assert(inPattern('x', string.init) == 0);
67     assert(inPattern('x', "^y") == 1);
68     assert(inPattern('x', "yxxy") == 1);
69     assert(inPattern('x', "^yxxy") == 0);
70     assert(inPattern('x', "^abcd") == 1);
71     assert(inPattern('^', "^^") == 0);
72     assert(inPattern('^', "^") == 1);
73     assert(inPattern('^', "a^") == 1);
74     assert(inPattern('x', "a-z") == 1);
75     assert(inPattern('x', "A-Z") == 0);
76     assert(inPattern('x', "^a-z") == 0);
77     assert(inPattern('x', "^A-Z") == 1);
78     assert(inPattern('-', "a-") == 1);
79     assert(inPattern('-', "^A-") == 0);
80     assert(inPattern('a', "z-a") == 1);
81     assert(inPattern('z', "z-a") == 1);
82     assert(inPattern('x', "z-a") == 0);
83     });
84 }
85 
86 
87 /**
88  * See if character c is in the intersection of the patterns.
89  */
90 bool inPattern(S)(dchar c, S[] patterns) @safe pure @nogc
91 if (isSomeString!S)
92 {
93     foreach (string pattern; patterns)
94     {
95         if (!inPattern(c, pattern))
96         {
97             return false;
98         }
99     }
100     return true;
101 }
102 
103 
104 /**
105  * Count characters in s that match pattern.
106  */
107 size_t countchars(S, S1)(S s, in S1 pattern) @safe pure @nogc
108 if (isSomeString!S && isSomeString!S1)
109 {
110     size_t count;
111     foreach (dchar c; s)
112     {
113         count += inPattern(c, pattern);
114     }
115     return count;
116 }
117 
118 @safe pure @nogc unittest
119 {
120     assertCTFEable!(
121     {
122     assert(countchars("abc", "a-c") == 3);
123     assert(countchars("hello world", "or") == 3);
124     });
125 }
126 
127 
128 /**
129  * Return string that is s with all characters removed that match pattern.
130  */
131 S removechars(S)(S s, in S pattern) @safe pure
132 if (isSomeString!S)
133 {
134     import std.utf : encode;
135 
136     Unqual!(typeof(s[0]))[] r;
137     bool changed = false;
138 
139     foreach (size_t i, dchar c; s)
140     {
141         if (inPattern(c, pattern))
142         {
143             if (!changed)
144             {
145                 changed = true;
146                 r = s[0 .. i].dup;
147             }
148             continue;
149         }
150         if (changed)
151         {
152             encode(r, c);
153         }
154     }
155     if (changed)
156         return r;
157     else
158         return s;
159 }
160 
161 @safe pure unittest
162 {
163     assertCTFEable!(
164     {
165     assert(removechars("abc", "a-c").length == 0);
166     assert(removechars("hello world", "or") == "hell wld");
167     assert(removechars("hello world", "d") == "hello worl");
168     assert(removechars("hah", "h") == "a");
169     });
170 }
171 
172 @safe pure unittest
173 {
174     assert(removechars("abc", "x") == "abc");
175 }
176 
177 
178 /***************************************************
179  * Return string where sequences of a character in s[] from pattern[]
180  * are replaced with a single instance of that character.
181  * If pattern is null, it defaults to all characters.
182  */
183 S squeeze(S)(S s, in S pattern = null)
184 {
185     import std.utf : encode, stride;
186 
187     Unqual!(typeof(s[0]))[] r;
188     dchar lastc;
189     size_t lasti;
190     int run;
191     bool changed;
192 
193     foreach (size_t i, dchar c; s)
194     {
195         if (run && lastc == c)
196         {
197             changed = true;
198         }
199         else if (pattern is null || inPattern(c, pattern))
200         {
201             run = 1;
202             if (changed)
203             {
204                 if (r is null)
205                     r = s[0 .. lasti].dup;
206                 encode(r, c);
207             }
208             else
209                 lasti = i + stride(s, i);
210             lastc = c;
211         }
212         else
213         {
214             run = 0;
215             if (changed)
216             {
217                 if (r is null)
218                     r = s[0 .. lasti].dup;
219                 encode(r, c);
220             }
221         }
222     }
223     return changed ? ((r is null) ? s[0 .. lasti] : cast(S) r) : s;
224 }
225 
226 @system pure unittest
227 {
228     assertCTFEable!(
229     {
230     string s;
231 
232     assert(squeeze("hello") == "helo");
233 
234     s = "abcd";
235     assert(squeeze(s) is s);
236     s = "xyzz";
237     assert(squeeze(s).ptr == s.ptr); // should just be a slice
238 
239     assert(squeeze("hello goodbyee", "oe") == "hello godbye");
240     });
241 }
242 
243 /***************************************************************
244  Finds the position $(D_PARAM pos) of the first character in $(D_PARAM
245  s) that does not match $(D_PARAM pattern) (in the terminology used by
246  $(REF inPattern, std,string)). Updates $(D_PARAM s =
247  s[pos..$]). Returns the slice from the beginning of the original
248  (before update) string up to, and excluding, $(D_PARAM pos).
249 
250 The $(D_PARAM munch) function is mostly convenient for skipping
251 certain category of characters (e.g. whitespace) when parsing
252 strings. (In such cases, the return value is not used.)
253  */
254 S1 munch(S1, S2)(ref S1 s, S2 pattern) @safe pure @nogc
255 {
256     size_t j = s.length;
257     foreach (i, dchar c; s)
258     {
259         if (!inPattern(c, pattern))
260         {
261             j = i;
262             break;
263         }
264     }
265     scope(exit) s = s[j .. $];
266     return s[0 .. j];
267 }
268 
269 ///
270 @safe pure @nogc unittest
271 {
272     string s = "123abc";
273     string t = munch(s, "0123456789");
274     assert(t == "123" && s == "abc");
275     t = munch(s, "0123456789");
276     assert(t == "" && s == "abc");
277 }
278 
279 @safe pure @nogc unittest
280 {
281     string s = "123€abc";
282     string t = munch(s, "0123456789");
283     assert(t == "123" && s == "€abc");
284     t = munch(s, "0123456789");
285     assert(t == "" && s == "€abc");
286     t = munch(s, "£$€¥");
287     assert(t == "€" && s == "abc");
288 }
289 
290 // helper function for unit tests
291 private @property void assertCTFEable(alias dg)()
292 {
293     static assert({ cast(void) dg(); return true; }());
294     cast(void) dg();
295 }