1 /** 2 * Contains the obsolete pattern matching functions from Phobos' 3 * `std.string`. 4 */ 5 module undead..string; 6 7 import std.traits; 8 9 /*********************************************** 10 * See if character c is in the pattern. 11 * Patterns: 12 * 13 * A $(I pattern) is an array of characters much like a $(I character 14 * class) in regular expressions. A sequence of characters 15 * can be given, such as "abcde". The '-' can represent a range 16 * of characters, as "a-e" represents the same pattern as "abcde". 17 * "a-fA-F0-9" represents all the hex characters. 18 * If the first character of a pattern is '^', then the pattern 19 * is negated, i.e. "^0-9" means any character except a digit. 20 * The functions inPattern, $(B countchars), $(B removeschars), 21 * and $(B squeeze) use patterns. 22 * 23 * Note: In the future, the pattern syntax may be improved 24 * to be more like regular expression character classes. 25 */ 26 bool inPattern(S)(dchar c, in S pattern) @safe pure @nogc 27 if (isSomeString!S) 28 { 29 bool result = false; 30 int range = 0; 31 dchar lastc; 32 33 foreach (size_t i, dchar p; pattern) 34 { 35 if (p == '^' && i == 0) 36 { 37 result = true; 38 if (i + 1 == pattern.length) 39 return (c == p); // or should this be an error? 40 } 41 else if (range) 42 { 43 range = 0; 44 if (lastc <= c && c <= p || c == p) 45 return !result; 46 } 47 else if (p == '-' && i > result && i + 1 < pattern.length) 48 { 49 range = 1; 50 continue; 51 } 52 else if (c == p) 53 return !result; 54 lastc = p; 55 } 56 return result; 57 } 58 59 60 @safe pure @nogc unittest 61 { 62 assertCTFEable!( 63 { 64 assert(inPattern('x', "x") == 1); 65 assert(inPattern('x', "y") == 0); 66 assert(inPattern('x', string.init) == 0); 67 assert(inPattern('x', "^y") == 1); 68 assert(inPattern('x', "yxxy") == 1); 69 assert(inPattern('x', "^yxxy") == 0); 70 assert(inPattern('x', "^abcd") == 1); 71 assert(inPattern('^', "^^") == 0); 72 assert(inPattern('^', "^") == 1); 73 assert(inPattern('^', "a^") == 1); 74 assert(inPattern('x', "a-z") == 1); 75 assert(inPattern('x', "A-Z") == 0); 76 assert(inPattern('x', "^a-z") == 0); 77 assert(inPattern('x', "^A-Z") == 1); 78 assert(inPattern('-', "a-") == 1); 79 assert(inPattern('-', "^A-") == 0); 80 assert(inPattern('a', "z-a") == 1); 81 assert(inPattern('z', "z-a") == 1); 82 assert(inPattern('x', "z-a") == 0); 83 }); 84 } 85 86 87 /** 88 * See if character c is in the intersection of the patterns. 89 */ 90 bool inPattern(S)(dchar c, S[] patterns) @safe pure @nogc 91 if (isSomeString!S) 92 { 93 foreach (string pattern; patterns) 94 { 95 if (!inPattern(c, pattern)) 96 { 97 return false; 98 } 99 } 100 return true; 101 } 102 103 104 /** 105 * Count characters in s that match pattern. 106 */ 107 size_t countchars(S, S1)(S s, in S1 pattern) @safe pure @nogc 108 if (isSomeString!S && isSomeString!S1) 109 { 110 size_t count; 111 foreach (dchar c; s) 112 { 113 count += inPattern(c, pattern); 114 } 115 return count; 116 } 117 118 @safe pure @nogc unittest 119 { 120 assertCTFEable!( 121 { 122 assert(countchars("abc", "a-c") == 3); 123 assert(countchars("hello world", "or") == 3); 124 }); 125 } 126 127 128 /** 129 * Return string that is s with all characters removed that match pattern. 130 */ 131 S removechars(S)(S s, in S pattern) @safe pure 132 if (isSomeString!S) 133 { 134 import std.utf : encode; 135 136 Unqual!(typeof(s[0]))[] r; 137 bool changed = false; 138 139 foreach (size_t i, dchar c; s) 140 { 141 if (inPattern(c, pattern)) 142 { 143 if (!changed) 144 { 145 changed = true; 146 r = s[0 .. i].dup; 147 } 148 continue; 149 } 150 if (changed) 151 { 152 encode(r, c); 153 } 154 } 155 if (changed) 156 return r; 157 else 158 return s; 159 } 160 161 @safe pure unittest 162 { 163 assertCTFEable!( 164 { 165 assert(removechars("abc", "a-c").length == 0); 166 assert(removechars("hello world", "or") == "hell wld"); 167 assert(removechars("hello world", "d") == "hello worl"); 168 assert(removechars("hah", "h") == "a"); 169 }); 170 } 171 172 @safe pure unittest 173 { 174 assert(removechars("abc", "x") == "abc"); 175 } 176 177 178 /*************************************************** 179 * Return string where sequences of a character in s[] from pattern[] 180 * are replaced with a single instance of that character. 181 * If pattern is null, it defaults to all characters. 182 */ 183 S squeeze(S)(S s, in S pattern = null) 184 { 185 import std.utf : encode, stride; 186 187 Unqual!(typeof(s[0]))[] r; 188 dchar lastc; 189 size_t lasti; 190 int run; 191 bool changed; 192 193 foreach (size_t i, dchar c; s) 194 { 195 if (run && lastc == c) 196 { 197 changed = true; 198 } 199 else if (pattern is null || inPattern(c, pattern)) 200 { 201 run = 1; 202 if (changed) 203 { 204 if (r is null) 205 r = s[0 .. lasti].dup; 206 encode(r, c); 207 } 208 else 209 lasti = i + stride(s, i); 210 lastc = c; 211 } 212 else 213 { 214 run = 0; 215 if (changed) 216 { 217 if (r is null) 218 r = s[0 .. lasti].dup; 219 encode(r, c); 220 } 221 } 222 } 223 return changed ? ((r is null) ? s[0 .. lasti] : cast(S) r) : s; 224 } 225 226 @system pure unittest 227 { 228 assertCTFEable!( 229 { 230 string s; 231 232 assert(squeeze("hello") == "helo"); 233 234 s = "abcd"; 235 assert(squeeze(s) is s); 236 s = "xyzz"; 237 assert(squeeze(s).ptr == s.ptr); // should just be a slice 238 239 assert(squeeze("hello goodbyee", "oe") == "hello godbye"); 240 }); 241 } 242 243 /*************************************************************** 244 Finds the position $(D_PARAM pos) of the first character in $(D_PARAM 245 s) that does not match $(D_PARAM pattern) (in the terminology used by 246 $(REF inPattern, std,string)). Updates $(D_PARAM s = 247 s[pos..$]). Returns the slice from the beginning of the original 248 (before update) string up to, and excluding, $(D_PARAM pos). 249 250 The $(D_PARAM munch) function is mostly convenient for skipping 251 certain category of characters (e.g. whitespace) when parsing 252 strings. (In such cases, the return value is not used.) 253 */ 254 S1 munch(S1, S2)(ref S1 s, S2 pattern) @safe pure @nogc 255 { 256 size_t j = s.length; 257 foreach (i, dchar c; s) 258 { 259 if (!inPattern(c, pattern)) 260 { 261 j = i; 262 break; 263 } 264 } 265 scope(exit) s = s[j .. $]; 266 return s[0 .. j]; 267 } 268 269 /// 270 @safe pure @nogc unittest 271 { 272 string s = "123abc"; 273 string t = munch(s, "0123456789"); 274 assert(t == "123" && s == "abc"); 275 t = munch(s, "0123456789"); 276 assert(t == "" && s == "abc"); 277 } 278 279 @safe pure @nogc unittest 280 { 281 string s = "123€abc"; 282 string t = munch(s, "0123456789"); 283 assert(t == "123" && s == "€abc"); 284 t = munch(s, "0123456789"); 285 assert(t == "" && s == "€abc"); 286 t = munch(s, "£$€¥"); 287 assert(t == "€" && s == "abc"); 288 } 289 290 // helper function for unit tests 291 private @property void assertCTFEable(alias dg)() 292 { 293 static assert({ cast(void) dg(); return true; }()); 294 cast(void) dg(); 295 }