more.format source code

1 module more.format;
2 
3 /**
4 Used for selecting either lower or upper case for certain kinds of formatting, such as hex.
5 */
6 enum Case
7 {
8     lower, upper
9 }
10 
11 /**
12 An alias to the common sink delegate used for string formatting.
13 */
14 alias StringSink = void delegate(const(char)[]);
15 
16 /**
17 A delegate formatter allows a delegate to behave as format function.
18 A common use case for this would be to have multiple ways to format a class.
19 
20 Example:
21 ---
22 class Foo
23 {
24     DelegateFormatter formatPretty() { return DelegateFormatter(&prettyFormatter); }
25     private void prettyFormatter(StringSink sink)
26     {
27         sink("the pretty format");
28     }
29 
30     DelegateFormatter formatUgly() { return DelegateFormatter(&uglyFormatter); }
31     private void uglyFormatter(StringSink sink)
32     {
33         sink("the ugly format");
34     }
35 }
36 Foo foo;
37 writefln("foo pretty = %s", foo.formatPretty());
38 writefln("foo ugly   = %s", foo.formatUgly());
39 ---
40 
41 */
42 struct DelegateFormatter
43 {
44     void delegate(StringSink sink) formatter;
45     void toString(StringSink sink) const
46     {
47         formatter(sink);
48     }
49 }
50 
51 /**
52 Append a formatted string into a character OutputRange
53 */
54 void putf(R, U...)(auto ref R outputRange, string fmt, U args)
55 {
56     import std.format : formattedWrite;
57     formattedWrite(&outputRange.put!(const(char)[]), fmt, args);
58 }
59 
60 /**
61 Converts a 4-bit nibble to the corresponding hex character (0-9 or A-F).
62 */
63 char toHex(Case case_ = Case.lower)(ubyte b) in { assert(b <= 0x0F); } body
64 {
65     /*
66     NOTE: another implementation could be to use a hex table such as:
67        return "0123456789ABCDEF"[value];
68     HoweverThe table lookup might be slightly worse since it would require
69     the string table to be loaded into the processor cache, whereas the current
70     implementation may be more instructions but all the code will
71     be in the same place which helps cache locality.
72 
73     On processors without cache (such as the 6502), the table lookup approach
74     would likely be faster.
75       */
76     static if(case_ == Case.lower)
77     {
78         return cast(char)(b + ((b <= 9) ? '0' : ('a'-10)));
79     }
80     else
81     {
82         return cast(char)(b + ((b <= 9) ? '0' : ('A'-10)));
83     }
84 }
85 unittest
86 {
87     assert('0' == toHex(0x0));
88     assert('9' == toHex(0x9));
89     assert('a' == toHex(0xA));
90     assert('f' == toHex(0xF));
91     assert('A' == toHex!(Case.upper)(0xA));
92     assert('F' == toHex!(Case.upper)(0xF));
93 }
94 alias toHexLower = toHex!(Case.lower);
95 alias toHexUpper = toHex!(Case.upper);
96 
97 bool asciiIsUnreadable(char c) pure nothrow @nogc @safe
98 {
99     return c < ' ' || (c > '~' && c < 256);
100 }
101 void asciiWriteUnreadable(scope void delegate(const(char)[]) sink, char c)
102     in { assert(asciiIsUnreadable(c)); } body
103 {
104     if(c == '\r') sink("\\r");
105     else if(c == '\t') sink("\\t");
106     else if(c == '\n') sink("\\n");
107     else if(c == '\0') sink("\\0");
108     else {
109         char[4] buffer;
110         buffer[0] = '\\';
111         buffer[1] = 'x';
112         buffer[2] = toHexUpper((cast(char)c)>>4);
113         buffer[3] = toHexUpper((cast(char)c)&0xF);
114         sink(buffer);
115     }
116 }
117 void asciiWriteEscaped(scope void delegate(const(char)[]) sink, const(char)* ptr, const char* limit)
118 {
119     auto flushPtr = ptr;
120 
121     void flush()
122     {
123         if(ptr > flushPtr)
124         {
125             sink(flushPtr[0..ptr-flushPtr]);
126             flushPtr = ptr;
127         }
128     }
129 
130     for(; ptr < limit; ptr++)
131     {
132         auto c = *ptr;
133         if(asciiIsUnreadable(c))
134         {
135             flush();
136             sink.asciiWriteUnreadable(c);
137             flushPtr++;
138         }
139     }
140     flush();
141 }
142 auto asciiFormatEscaped(const(char)[] str)
143 {
144     static struct Formatter
145     {
146         const(char)* str;
147         const(char)* limit;
148         void toString(scope void delegate(const(char)[]) sink) const
149         {
150             sink.asciiWriteEscaped(str, limit);
151         }
152     }
153     return Formatter(str.ptr, str.ptr + str.length);
154 }
155 
156 bool utf8IsUnreadable(dchar c) pure nothrow @nogc @safe
157 {
158     if(c < ' ') return true; // unreadable
159     if(c < 0x7F) return false; // readable
160     assert(0, "utf8IsUnreadable not fully implemented");
161 }
162 void utf8WriteUnreadable(scope void delegate(const(char)[]) sink, dchar c)
163     in { assert(utf8IsUnreadable(c)); } body
164 {
165     if(c == '\r') sink("\\r");
166     else if(c == '\t') sink("\\t");
167     else if(c == '\n') sink("\\n");
168     else if(c == '\0') sink("\\0");
169     else {
170         if(c >= 0xFF)
171         {
172             assert(0, "not implemented");
173         }
174         char[4] buffer;
175         buffer[0] = '\\';
176         buffer[1] = 'x';
177         buffer[2] = toHexUpper((cast(char)c)>>4);
178         buffer[3] = toHexUpper((cast(char)c)&0xF);
179         sink(buffer);
180     }
181 }
182 void utf8WriteEscaped(scope void delegate(const(char)[]) sink, const(char)* ptr, const char* limit)
183 {
184     import more.utf8 : decodeUtf8;
185 
186     auto flushPtr = ptr;
187 
188     void flush()
189     {
190         if(ptr > flushPtr)
191         {
192             sink(flushPtr[0..ptr-flushPtr]);
193             flushPtr = ptr;
194         }
195     }
196 
197     for(; ptr < limit;)
198     {
199         const(char)* nextPtr = ptr;
200         auto c = decodeUtf8(&nextPtr);
201         if(utf8IsUnreadable(c))
202         {
203             flush();
204             sink.utf8WriteUnreadable(c);
205         }
206         ptr = nextPtr;
207     }
208     flush();
209 }
210 auto utf8FormatEscaped(const(char)[] str)
211 {
212     static struct Formatter
213     {
214         const(char)* str;
215         const(char)* limit;
216         void toString(scope void delegate(const(char)[]) sink) const
217         {
218             sink.utf8WriteEscaped(str, limit);
219         }
220     }
221     return Formatter(str.ptr, str.ptr + str.length);
222 }
223 auto utf8FormatEscaped(dchar c)
224 {
225     static struct Formatter
226     {
227         char[4] buffer;
228         ubyte size;
229         this(dchar c)
230         {
231             import more.utf8 : encodeUtf8;
232             size = encodeUtf8(buffer.ptr, c);
233         }
234         void toString(scope void delegate(const(char)[]) sink) const
235         {
236             sink.utf8WriteEscaped(buffer.ptr, buffer.ptr + size);
237         }
238     }
239     return Formatter(c);
240 }
241 
242 auto formatHex(Case case_ = Case.lower, T)(const(T)[] array) if(T.sizeof == 1)
243 {
244     struct Formatter
245     {
246         const(T)[] array;
247         void toString(scope void delegate(const(char)[]) sink) const
248         {
249             char[2] chars;
250             foreach(value; array)
251             {
252                 chars[0] = toHex!case_((cast(char)value)>>4);
253                 chars[1] = toHex!case_((cast(char)value)&0xF);
254                 sink(chars);
255             }
256         }
257     }
258     return Formatter(array);
259 }
260 
261 // Policy-based formatEscape function
262 auto formatEscapeByPolicy(Hooks)(const(char)[] str)
263 {
264     struct Formatter
265     {
266         const(char)[] str;
267         void toString(scope void delegate(const(char)[]) sink) const
268         {
269             auto from = 0;
270             auto to = 0;
271             char[Hooks.escapeBufferLength] buffer;
272             Hooks.initEscapeBuffer(buffer.ptr);
273 
274             for(; to < str.length; to++)
275             {
276                 auto escapeLength = Hooks.escapeCheck(buffer.ptr, str[to]);
277                 if(escapeLength > 0)
278                 {
279                     if(to > from)
280                     {
281                         sink(str[from..to]);
282                     }
283                     sink(buffer[0..escapeLength]);
284                     from = to + 1;
285                 }
286             }
287             if(to > from)
288             {
289                 sink(str[from..to]);
290             }
291         }
292     }
293     return Formatter(str);
294 }
295 auto formatEscapeSet(string escapePrefix, string escapeSet)(const(char)[] str)
296 {
297     static struct Hooks
298     {
299         enum escapeBufferLength = escapePrefix.length + 1;
300         static void initEscapeBuffer(char* escapeBuffer) pure
301         {
302             escapeBuffer[0..escapePrefix.length] = escapePrefix[];
303         }
304         static auto escapeCheck(char* escapeBuffer, char charToCheck) pure
305         {
306             foreach(escapeChar; escapeSet)
307             {
308                 if(charToCheck == escapeChar)
309                 {
310                     escapeBuffer[escapePrefix.length] = charToCheck;
311                     return escapePrefix.length + 1;
312                 }
313             }
314             return 0; // char should not be escaped
315         }
316     }
317     return formatEscapeByPolicy!Hooks(str);
318 }
319 unittest
320 {
321     import more.test;
322     mixin(scopedTest!"format");
323 
324     import std.format : format;
325     assert(`` == format("%s", formatEscapeSet!(`\`, `\'`)(``)));
326     assert(`a` == format("%s", formatEscapeSet!(`\`, `\'`)(`a`)));
327     assert(`abcd` == format("%s", formatEscapeSet!(`\`, `\'`)(`abcd`)));
328 
329     assert(`\'` == format("%s", formatEscapeSet!(`\`, `\'`)(`'`)));
330     assert(`\\` == format("%s", formatEscapeSet!(`\`, `\'`)(`\`)));
331     assert(`\'\\` == format("%s", formatEscapeSet!(`\`, `\'`)(`'\`)));
332     assert(`a\'\\` == format("%s", formatEscapeSet!(`\`, `\'`)(`a'\`)));
333     assert(`\'a\\` == format("%s", formatEscapeSet!(`\`, `\'`)(`'a\`)));
334     assert(`\'\\a` == format("%s", formatEscapeSet!(`\`, `\'`)(`'\a`)));
335     assert(`abcd\'\\` == format("%s", formatEscapeSet!(`\`, `\'`)(`abcd'\`)));
336     assert(`\'abcd\\` == format("%s", formatEscapeSet!(`\`, `\'`)(`'abcd\`)));
337     assert(`\'\\abcd` == format("%s", formatEscapeSet!(`\`, `\'`)(`'\abcd`)));
338 }