001 /*
002 * Copyright 2015-2017 UnboundID Corp.
003 * All Rights Reserved.
004 */
005 /*
006 * Copyright (C) 2015-2017 UnboundID Corp.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021 package com.unboundid.util.json;
022
023
024
025 import com.unboundid.util.ByteStringBuffer;
026 import com.unboundid.util.NotMutable;
027 import com.unboundid.util.StaticUtils;
028 import com.unboundid.util.ThreadSafety;
029 import com.unboundid.util.ThreadSafetyLevel;
030
031
032
033 /**
034 * This class provides an implementation of a JSON value that represents a
035 * string of Unicode characters. The string representation of a JSON string
036 * must start and end with the double quotation mark character, and a Unicode
037 * (preferably UTF-8) representation of the string between the quotes. The
038 * following special characters must be escaped:
039 * <UL>
040 * <LI>
041 * The double quotation mark (Unicode character U+0022) must be escaped as
042 * either {@code \"} or {@code \}{@code u0022}.
043 * </LI>
044 * <LI>
045 * The backslash (Unicode character U+005C) must be escaped as either
046 * {@code \\} or {@code \}{@code u005C}.
047 * </LI>
048 * <LI>
049 * All ASCII control characters (Unicode characters U+0000 through U+001F)
050 * must be escaped. They can all be escaped by prefixing the
051 * four-hexadecimal-digit Unicode character code with {@code \}{@code u},
052 * like {@code \}{@code u0000} to represent the ASCII null character U+0000.
053 * For certain characters, a more user-friendly escape sequence is also
054 * defined:
055 * <UL>
056 * <LI>
057 * The horizontal tab character can be escaped as either {@code \t} or
058 * {@code \}{@code u0009}.
059 * </LI>
060 * <LI>
061 * The newline character can be escaped as either {@code \n} or
062 * {@code \}{@code u000A}.
063 * </LI>
064 * <LI>
065 * The formfeed character can be escaped as either {@code \f} or
066 * {@code \}{@code u000C}.
067 * </LI>
068 * <LI>
069 * The carriage return character can be escaped as either {@code \r} or
070 * {@code \}{@code u000D}.
071 * </LI>
072 * </UL>
073 * </LI>
074 * </UL>
075 * In addition, any other character may optionally be escaped by placing the
076 * {@code \}{@code u} prefix in front of each four-hexadecimal digit sequence in
077 * the UTF-16 representation of that character. For example, the "LATIN SMALL
078 * LETTER N WITH TILDE" character U+00F1 may be escaped as
079 * {@code \}{@code u00F1}, while the "MUSICAL SYMBOL G CLEF" character U+1D11E
080 * may be escaped as {@code \}{@code uD834}{@code \}{@code uDD1E}. And while
081 * the forward slash character is not required to be escaped in JSON strings, it
082 * can be escaped using {@code \/} as a more human-readable alternative to
083 * {@code \}{@code u002F}.
084 * <BR><BR>
085 * The string provided to the {@link #JSONString(String)} constructor should not
086 * have any escaping performed, and the string returned by the
087 * {@link #stringValue()} method will not have any escaping performed. These
088 * methods work with the Java string that is represented by the JSON string.
089 * <BR><BR>
090 * If this JSON string was parsed from the string representation of a JSON
091 * object, then the value returned by the {@link #toString()} method (or
092 * appended to the buffer provided to the {@link #toString(StringBuilder)}
093 * method) will be the string representation used in the JSON object that was
094 * parsed. Otherwise, this class will generate an appropriate string
095 * representation, which will be surrounded by quotation marks and will have the
096 * minimal required encoding applied.
097 * <BR><BR>
098 * The string returned by the {@link #toNormalizedString()} method (or appended
099 * to the buffer provided to the {@link #toNormalizedString(StringBuilder)}
100 * method) will be generated by converting it to lowercase, surrounding it with
101 * quotation marks, and using the {@code \}{@code u}-style escaping for all
102 * characters other than the following (as contained in the LDAP printable
103 * character set defined in <A HREF="http://www.ietf.org/rfc/rfc4517.txt">RFC
104 * 4517</A> section 3.2, and indicated by the
105 * {@link StaticUtils#isPrintable(char)} method):
106 * <UL>
107 * <LI>All uppercase ASCII alphabetic letters (U+0041 through U+005A).</LI>
108 * <LI>All lowercase ASCII alphabetic letters (U+0061 through U+007A).</LI>
109 * <LI>All ASCII numeric digits (U+0030 through U+0039).</LI>
110 * <LI>The ASCII space character U+0020.</LI>
111 * <LI>The ASCII single quote (aka apostrophe) character U+0027.</LI>
112 * <LI>The ASCII left parenthesis character U+0028.</LI>
113 * <LI>The ASCII right parenthesis character U+0029.</LI>
114 * <LI>The ASCII plus sign character U+002B.</LI>
115 * <LI>The ASCII comma character U+002C.</LI>
116 * <LI>The ASCII minus sign (aka hyphen) character U+002D.</LI>
117 * <LI>The ASCII period character U+002E.</LI>
118 * <LI>The ASCII forward slash character U+002F.</LI>
119 * <LI>The ASCII colon character U+003A.</LI>
120 * <LI>The ASCII equals sign character U+003D.</LI>
121 * <LI>The ASCII question mark character U+003F.</LI>
122 * </UL>
123 */
124 @NotMutable()
125 @ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE)
126 public final class JSONString
127 extends JSONValue
128 {
129 /**
130 * The serial version UID for this serializable class.
131 */
132 private static final long serialVersionUID = -4677194657299153890L;
133
134
135
136 // The JSON-formatted string representation for this JSON string. It will be
137 // surrounded by quotation marks and any necessary escaping will have been
138 // performed.
139 private String jsonStringRepresentation;
140
141 // The string value for this object.
142 private final String value;
143
144
145
146 /**
147 * Creates a new JSON string.
148 *
149 * @param value The string to represent in this JSON value. It must not be
150 * {@code null}.
151 */
152 public JSONString(final String value)
153 {
154 this.value = value;
155 jsonStringRepresentation = null;
156 }
157
158
159
160 /**
161 * Creates a new JSON string. This method should be used for strings parsed
162 * from the string representation of a JSON object.
163 *
164 * @param javaString The Java string to represent.
165 * @param jsonString The JSON string representation to use for the Java
166 * string.
167 */
168 JSONString(final String javaString, final String jsonString)
169 {
170 value = javaString;
171 jsonStringRepresentation = jsonString;
172 }
173
174
175
176 /**
177 * Retrieves the string value for this object. This will be the interpreted
178 * value, without the surrounding quotation marks or escaping.
179 *
180 * @return The string value for this object.
181 */
182 public String stringValue()
183 {
184 return value;
185 }
186
187
188
189 /**
190 * {@inheritDoc}
191 */
192 @Override()
193 public int hashCode()
194 {
195 return stringValue().hashCode();
196 }
197
198
199
200 /**
201 * {@inheritDoc}
202 */
203 @Override()
204 public boolean equals(final Object o)
205 {
206 if (o == this)
207 {
208 return true;
209 }
210
211 if (o instanceof JSONString)
212 {
213 final JSONString s = (JSONString) o;
214 return value.equals(s.value);
215 }
216
217 return false;
218 }
219
220
221
222 /**
223 * Indicates whether the value of this JSON string matches that of the
224 * provided string, optionally ignoring differences in capitalization.
225 *
226 * @param s The JSON string to compare against this JSON string.
227 * It must not be {@code null}.
228 * @param ignoreCase Indicates whether to ignore differences in
229 * capitalization.
230 *
231 * @return {@code true} if the value of this JSON string matches the value of
232 * the provided string (optionally ignoring differences in
233 * capitalization), or {@code false} if not.
234 */
235 public boolean equals(final JSONString s, final boolean ignoreCase)
236 {
237 if (ignoreCase)
238 {
239 return value.equalsIgnoreCase(s.value);
240 }
241 else
242 {
243 return value.equals(s.value);
244 }
245 }
246
247
248
249 /**
250 * {@inheritDoc}
251 */
252 @Override()
253 public boolean equals(final JSONValue v, final boolean ignoreFieldNameCase,
254 final boolean ignoreValueCase,
255 final boolean ignoreArrayOrder)
256 {
257 return ((v instanceof JSONString) &&
258 equals((JSONString) v, ignoreValueCase));
259 }
260
261
262
263 /**
264 * Retrieves a string representation of this JSON string as it should appear
265 * in a JSON object, including the surrounding quotation marks and any
266 * appropriate escaping To obtain the string to which this value refers
267 * without the surrounding quotation marks or escaping, use the
268 * {@link #stringValue()} method.
269 * <BR><BR>
270 * If the object containing this string was decoded from a string, then this
271 * method will use the same string representation as in that original object.
272 * Otherwise, the string representation will be constructed.
273 *
274 * @return A string representation of this value as it should appear in a
275 * JSON object.
276 */
277 @Override()
278 public String toString()
279 {
280 if (jsonStringRepresentation == null)
281 {
282 final StringBuilder buffer = new StringBuilder();
283 toString(buffer);
284 jsonStringRepresentation = buffer.toString();
285 }
286
287 return jsonStringRepresentation;
288 }
289
290
291
292 /**
293 * Appends a string representation of this JSON string as it should appear
294 * in a JSON object, including the surrounding quotation marks and any
295 * appropriate escaping, to the provided buffer. To obtain the string to
296 * which this value refers without the surrounding quotation marks or
297 * escaping, use the {@link #stringValue()} method.
298 * <BR><BR>
299 * If the object containing this string was decoded from a string, then this
300 * method will use the same string representation as in that original object.
301 * Otherwise, the string representation will be constructed.
302 *
303 * @param buffer The buffer to which the information should be appended.
304 */
305 @Override()
306 public void toString(final StringBuilder buffer)
307 {
308 if (jsonStringRepresentation != null)
309 {
310 buffer.append(jsonStringRepresentation);
311 }
312 else
313 {
314 final boolean emptyBufferProvided = (buffer.length() == 0);
315 encodeString(value, buffer);
316
317 if (emptyBufferProvided)
318 {
319 jsonStringRepresentation = buffer.toString();
320 }
321 }
322 }
323
324
325
326 /**
327 * Retrieves a single-line representation of this JSON string as it should
328 * appear in a JSON object, including the surrounding quotation marks and any
329 * appropriate escaping. To obtain the string to which this value refers
330 * without the surrounding quotation marks or escaping, use the
331 * {@link #stringValue()} method.
332 *
333 * @return A single-line representation of this value as it should appear in
334 * a JSON object.
335 */
336 @Override()
337 public String toSingleLineString()
338 {
339 return toString();
340 }
341
342
343
344 /**
345 * Appends a single-line string representation of this JSON string as it
346 * should appear in a JSON object, including the surrounding quotation marks
347 * and any appropriate escaping, to the provided buffer. To obtain the string
348 * to which this value refers without the surrounding quotation marks or
349 * escaping, use the {@link #stringValue()} method.
350 *
351 * @param buffer The buffer to which the information should be appended.
352 */
353 @Override()
354 public void toSingleLineString(final StringBuilder buffer)
355 {
356 toString(buffer);
357 }
358
359
360
361 /**
362 * Appends a minimally-escaped JSON representation of the provided string to
363 * the given buffer. When escaping is required, the most user-friendly form
364 * of escaping will be used.
365 *
366 * @param s The string to be encoded.
367 * @param buffer The buffer to which the encoded representation should be
368 * appended.
369 */
370 static void encodeString(final String s, final StringBuilder buffer)
371 {
372 buffer.append('"');
373
374 for (final char c : s.toCharArray())
375 {
376 switch (c)
377 {
378 case '"':
379 buffer.append("\\\"");
380 break;
381 case '\\':
382 buffer.append("\\\\");
383 break;
384 case '\b': // backspace
385 buffer.append("\\b");
386 break;
387 case '\f': // formfeed
388 buffer.append("\\f");
389 break;
390 case '\n': // newline
391 buffer.append("\\n");
392 break;
393 case '\r': // carriage return
394 buffer.append("\\r");
395 break;
396 case '\t': // horizontal tab
397 buffer.append("\\t");
398 break;
399 default:
400 if (c <= '\u001F')
401 {
402 buffer.append("\\u");
403 buffer.append(String.format("%04X", (int) c));
404 }
405 else
406 {
407 buffer.append(c);
408 }
409 break;
410 }
411 }
412
413 buffer.append('"');
414 }
415
416
417
418 /**
419 * Appends a minimally-escaped JSON representation of the provided string to
420 * the given buffer. When escaping is required, the most user-friendly form
421 * of escaping will be used.
422 *
423 * @param s The string to be encoded.
424 * @param buffer The buffer to which the encoded representation should be
425 * appended.
426 */
427 static void encodeString(final String s, final ByteStringBuffer buffer)
428 {
429 buffer.append('"');
430
431 for (final char c : s.toCharArray())
432 {
433 switch (c)
434 {
435 case '"':
436 buffer.append("\\\"");
437 break;
438 case '\\':
439 buffer.append("\\\\");
440 break;
441 case '\b': // backspace
442 buffer.append("\\b");
443 break;
444 case '\f': // formfeed
445 buffer.append("\\f");
446 break;
447 case '\n': // newline
448 buffer.append("\\n");
449 break;
450 case '\r': // carriage return
451 buffer.append("\\r");
452 break;
453 case '\t': // horizontal tab
454 buffer.append("\\t");
455 break;
456 default:
457 if (c <= '\u001F')
458 {
459 buffer.append("\\u");
460 buffer.append(String.format("%04X", (int) c));
461 }
462 else
463 {
464 buffer.append(c);
465 }
466 break;
467 }
468 }
469
470 buffer.append('"');
471 }
472
473
474
475 /**
476 * Retrieves a normalized representation of this JSON string as it should
477 * appear in a JSON object, including the surrounding quotes and any
478 * appropriate escaping. The normalized representation will use the unescaped
479 * ASCII representation of all of the following characters:
480 * <UL>
481 * <LI>The letters a through z (ASCII character codes 0x61 through
482 * 0x7A).</LI>
483 * <LI>The digits 0 through 9 (ASCII character codes 0x30 through
484 * 0x39).</LI>
485 * <LI>The space (ASCII character code 0x20).</LI>
486 * <LI>The single quote (ASCII character code 0x27).</LI>
487 * <LI>The left parenthesis (ASCII character code 0x28).</LI>
488 * <LI>The right parenthesis (ASCII character code 0x29).</LI>
489 * <LI>The plus sign (ASCII character code 0x2B).</LI>
490 * <LI>The comma (ASCII character code 0x2C).</LI>
491 * <LI>The hyphen (ASCII character code 0x2D).</LI>
492 * <LI>The period (ASCII character code 0x2E).</LI>
493 * <LI>The forward slash (ASCII character code 0x2F).</LI>
494 * <LI>The colon (ASCII character code 0x3A).</LI>
495 * <LI>The equal sign (ASCII character code 0x3D).</LI>
496 * <LI>The question mark (ASCII character code 0x3F).</LI>
497 * </UL>
498 * All characters except those listed above will be escaped using their
499 * Unicode representation.
500 *
501 * @return A normalized representation of this JSON string as it should
502 * appear in a JSON object, including
503 */
504 @Override()
505 public String toNormalizedString()
506 {
507 final StringBuilder buffer = new StringBuilder();
508 toNormalizedString(buffer);
509 return buffer.toString();
510 }
511
512
513
514 /**
515 * Appends a normalized representation of this JSON string as it should
516 * appear in a JSON object, including the surrounding quotes and any
517 * appropriate escaping, to the provided buffer. The normalized
518 * representation will use the unescaped ASCII representation of all of the
519 * following characters:
520 * <UL>
521 * <LI>The letters a through z (ASCII character codes 0x61 through
522 * 0x7A).</LI>
523 * <LI>The digits 0 through 9 (ASCII character codes 0x30 through
524 * 0x39).</LI>
525 * <LI>The space (ASCII character code 0x20).</LI>
526 * <LI>The single quote (ASCII character code 0x27).</LI>
527 * <LI>The left parenthesis (ASCII character code 0x28).</LI>
528 * <LI>The right parenthesis (ASCII character code 0x29).</LI>
529 * <LI>The plus sign (ASCII character code 0x2B).</LI>
530 * <LI>The comma (ASCII character code 0x2C).</LI>
531 * <LI>The hyphen (ASCII character code 0x2D).</LI>
532 * <LI>The period (ASCII character code 0x2E).</LI>
533 * <LI>The forward slash (ASCII character code 0x2F).</LI>
534 * <LI>The colon (ASCII character code 0x3A).</LI>
535 * <LI>The equal sign (ASCII character code 0x3D).</LI>
536 * <LI>The question mark (ASCII character code 0x3F).</LI>
537 * </UL>
538 * All characters except those listed above will be escaped using their
539 * Unicode representation.
540 *
541 * @param buffer The buffer to which the information should be appended.
542 */
543 @Override()
544 public void toNormalizedString(final StringBuilder buffer)
545 {
546 buffer.append('"');
547
548 for (final char c : value.toLowerCase().toCharArray())
549 {
550 if (StaticUtils.isPrintable(c))
551 {
552 buffer.append(c);
553 }
554 else
555 {
556 buffer.append("\\u");
557 buffer.append(String.format("%04X", (int) c));
558 }
559 }
560
561 buffer.append('"');
562 }
563
564
565
566 /**
567 * {@inheritDoc}
568 */
569 @Override()
570 public void appendToJSONBuffer(final JSONBuffer buffer)
571 {
572 buffer.appendString(value);
573 }
574
575
576
577 /**
578 * {@inheritDoc}
579 */
580 @Override()
581 public void appendToJSONBuffer(final String fieldName,
582 final JSONBuffer buffer)
583 {
584 buffer.appendString(fieldName, value);
585 }
586 }