001 /*
002 * Copyright 2007-2017 UnboundID Corp.
003 * All Rights Reserved.
004 */
005 /*
006 * Copyright (C) 2008-2017 UnboundID Corp.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021 package com.unboundid.ldif;
022
023
024
025 import java.io.BufferedReader;
026 import java.io.BufferedWriter;
027 import java.io.Closeable;
028 import java.io.File;
029 import java.io.FileInputStream;
030 import java.io.FileWriter;
031 import java.io.InputStream;
032 import java.io.InputStreamReader;
033 import java.io.IOException;
034 import java.text.ParseException;
035 import java.util.ArrayList;
036 import java.util.Collection;
037 import java.util.Iterator;
038 import java.util.HashSet;
039 import java.util.LinkedHashMap;
040 import java.util.List;
041 import java.util.Set;
042 import java.util.concurrent.BlockingQueue;
043 import java.util.concurrent.ArrayBlockingQueue;
044 import java.util.concurrent.TimeUnit;
045 import java.util.concurrent.atomic.AtomicBoolean;
046 import java.nio.charset.Charset;
047
048 import com.unboundid.asn1.ASN1OctetString;
049 import com.unboundid.ldap.matchingrules.CaseIgnoreStringMatchingRule;
050 import com.unboundid.ldap.matchingrules.MatchingRule;
051 import com.unboundid.ldap.sdk.Attribute;
052 import com.unboundid.ldap.sdk.Control;
053 import com.unboundid.ldap.sdk.Entry;
054 import com.unboundid.ldap.sdk.Modification;
055 import com.unboundid.ldap.sdk.ModificationType;
056 import com.unboundid.ldap.sdk.LDAPException;
057 import com.unboundid.ldap.sdk.schema.AttributeTypeDefinition;
058 import com.unboundid.ldap.sdk.schema.Schema;
059 import com.unboundid.util.AggregateInputStream;
060 import com.unboundid.util.Base64;
061 import com.unboundid.util.LDAPSDKThreadFactory;
062 import com.unboundid.util.ThreadSafety;
063 import com.unboundid.util.ThreadSafetyLevel;
064 import com.unboundid.util.parallel.AsynchronousParallelProcessor;
065 import com.unboundid.util.parallel.Result;
066 import com.unboundid.util.parallel.ParallelProcessor;
067 import com.unboundid.util.parallel.Processor;
068
069 import static com.unboundid.ldif.LDIFMessages.*;
070 import static com.unboundid.util.Debug.*;
071 import static com.unboundid.util.StaticUtils.*;
072 import static com.unboundid.util.Validator.*;
073
074 /**
075 * This class provides an LDIF reader, which can be used to read and decode
076 * entries and change records from a data source using the LDAP Data Interchange
077 * Format as per <A HREF="http://www.ietf.org/rfc/rfc2849.txt">RFC 2849</A>.
078 * <BR>
079 * This class is not synchronized. If multiple threads read from the
080 * LDIFReader, they must be synchronized externally.
081 * <BR><BR>
082 * <H2>Example</H2>
083 * The following example iterates through all entries contained in an LDIF file
084 * and attempts to add them to a directory server:
085 * <PRE>
086 * LDIFReader ldifReader = new LDIFReader(pathToLDIFFile);
087 *
088 * int entriesRead = 0;
089 * int entriesAdded = 0;
090 * int errorsEncountered = 0;
091 * while (true)
092 * {
093 * Entry entry;
094 * try
095 * {
096 * entry = ldifReader.readEntry();
097 * if (entry == null)
098 * {
099 * // All entries have been read.
100 * break;
101 * }
102 *
103 * entriesRead++;
104 * }
105 * catch (LDIFException le)
106 * {
107 * errorsEncountered++;
108 * if (le.mayContinueReading())
109 * {
110 * // A recoverable error occurred while attempting to read a change
111 * // record, at or near line number le.getLineNumber()
112 * // The entry will be skipped, but we'll try to keep reading from the
113 * // LDIF file.
114 * continue;
115 * }
116 * else
117 * {
118 * // An unrecoverable error occurred while attempting to read an entry
119 * // at or near line number le.getLineNumber()
120 * // No further LDIF processing will be performed.
121 * break;
122 * }
123 * }
124 * catch (IOException ioe)
125 * {
126 * // An I/O error occurred while attempting to read from the LDIF file.
127 * // No further LDIF processing will be performed.
128 * errorsEncountered++;
129 * break;
130 * }
131 *
132 * LDAPResult addResult;
133 * try
134 * {
135 * addResult = connection.add(entry);
136 * // If we got here, then the change should have been processed
137 * // successfully.
138 * entriesAdded++;
139 * }
140 * catch (LDAPException le)
141 * {
142 * // If we got here, then the change attempt failed.
143 * addResult = le.toLDAPResult();
144 * errorsEncountered++;
145 * }
146 * }
147 *
148 * ldifReader.close();
149 * </PRE>
150 */
151 @ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE)
152 public final class LDIFReader
153 implements Closeable
154 {
155 /**
156 * The default buffer size (128KB) that will be used when reading from the
157 * data source.
158 */
159 public static final int DEFAULT_BUFFER_SIZE = 128 * 1024;
160
161
162
163 /*
164 * When processing asynchronously, this determines how many of the allocated
165 * worker threads are used to parse each batch of read entries.
166 */
167 private static final int ASYNC_MIN_PER_PARSING_THREAD = 3;
168
169
170
171 /**
172 * When processing asynchronously, this specifies the size of the pending and
173 * completed queues.
174 */
175 private static final int ASYNC_QUEUE_SIZE = 500;
176
177
178
179 /**
180 * Special entry used internally to signal that the LDIFReaderEntryTranslator
181 * has signalled that a read Entry should be skipped by returning null,
182 * which normally implies EOF.
183 */
184 private static final Entry SKIP_ENTRY = new Entry("cn=skipped");
185
186
187
188 /**
189 * The default base path that will be prepended to relative paths. It will
190 * end with a trailing slash.
191 */
192 private static final String DEFAULT_RELATIVE_BASE_PATH;
193 static
194 {
195 final File currentDir;
196 String currentDirString = System.getProperty("user.dir");
197 if (currentDirString == null)
198 {
199 currentDir = new File(".");
200 }
201 else
202 {
203 currentDir = new File(currentDirString);
204 }
205
206 final String currentDirAbsolutePath = currentDir.getAbsolutePath();
207 if (currentDirAbsolutePath.endsWith(File.separator))
208 {
209 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath;
210 }
211 else
212 {
213 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath + File.separator;
214 }
215 }
216
217
218
219 // The buffered reader that will be used to read LDIF data.
220 private final BufferedReader reader;
221
222 // The behavior that should be exhibited when encountering duplicate attribute
223 // values.
224 private volatile DuplicateValueBehavior duplicateValueBehavior;
225
226 // A line number counter.
227 private long lineNumberCounter = 0;
228
229 // The change record translator to use, if any.
230 private final LDIFReaderChangeRecordTranslator changeRecordTranslator;
231
232 // The entry translator to use, if any.
233 private final LDIFReaderEntryTranslator entryTranslator;
234
235 // The schema that will be used when processing, if applicable.
236 private Schema schema;
237
238 // Specifies the base path that will be prepended to relative paths for file
239 // URLs.
240 private volatile String relativeBasePath;
241
242 // The behavior that should be exhibited with regard to illegal trailing
243 // spaces in attribute values.
244 private volatile TrailingSpaceBehavior trailingSpaceBehavior;
245
246 // True iff we are processing asynchronously.
247 private final boolean isAsync;
248
249 //
250 // The following only apply to asynchronous processing.
251 //
252
253 // Parses entries asynchronously.
254 private final AsynchronousParallelProcessor<UnparsedLDIFRecord, LDIFRecord>
255 asyncParser;
256
257 // Set to true when the end of the input is reached.
258 private final AtomicBoolean asyncParsingComplete;
259
260 // The records that have been read and parsed.
261 private final BlockingQueue<Result<UnparsedLDIFRecord, LDIFRecord>>
262 asyncParsedRecords;
263
264
265
266 /**
267 * Creates a new LDIF reader that will read data from the specified file.
268 *
269 * @param path The path to the file from which the data is to be read. It
270 * must not be {@code null}.
271 *
272 * @throws IOException If a problem occurs while opening the file for
273 * reading.
274 */
275 public LDIFReader(final String path)
276 throws IOException
277 {
278 this(new FileInputStream(path));
279 }
280
281
282
283 /**
284 * Creates a new LDIF reader that will read data from the specified file
285 * and parses the LDIF records asynchronously using the specified number of
286 * threads.
287 *
288 * @param path The path to the file from which the data is to be read. It
289 * must not be {@code null}.
290 * @param numParseThreads If this value is greater than zero, then the
291 * specified number of threads will be used to
292 * asynchronously read and parse the LDIF file.
293 *
294 * @throws IOException If a problem occurs while opening the file for
295 * reading.
296 *
297 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
298 * constructor for more details about asynchronous processing.
299 */
300 public LDIFReader(final String path, final int numParseThreads)
301 throws IOException
302 {
303 this(new FileInputStream(path), numParseThreads);
304 }
305
306
307
308 /**
309 * Creates a new LDIF reader that will read data from the specified file.
310 *
311 * @param file The file from which the data is to be read. It must not be
312 * {@code null}.
313 *
314 * @throws IOException If a problem occurs while opening the file for
315 * reading.
316 */
317 public LDIFReader(final File file)
318 throws IOException
319 {
320 this(new FileInputStream(file));
321 }
322
323
324
325 /**
326 * Creates a new LDIF reader that will read data from the specified file
327 * and optionally parses the LDIF records asynchronously using the specified
328 * number of threads.
329 *
330 * @param file The file from which the data is to be read. It
331 * must not be {@code null}.
332 * @param numParseThreads If this value is greater than zero, then the
333 * specified number of threads will be used to
334 * asynchronously read and parse the LDIF file.
335 *
336 * @throws IOException If a problem occurs while opening the file for
337 * reading.
338 */
339 public LDIFReader(final File file, final int numParseThreads)
340 throws IOException
341 {
342 this(new FileInputStream(file), numParseThreads);
343 }
344
345
346
347 /**
348 * Creates a new LDIF reader that will read data from the specified files in
349 * the order in which they are provided and optionally parses the LDIF records
350 * asynchronously using the specified number of threads.
351 *
352 * @param files The files from which the data is to be read. It
353 * must not be {@code null} or empty.
354 * @param numParseThreads If this value is greater than zero, then the
355 * specified number of threads will be used to
356 * asynchronously read and parse the LDIF file.
357 * @param entryTranslator The LDIFReaderEntryTranslator to apply to entries
358 * before they are returned. This is normally
359 * {@code null}, which causes entries to be returned
360 * unaltered. This is particularly useful when
361 * parsing the input file in parallel because the
362 * entry translation is also done in parallel.
363 *
364 * @throws IOException If a problem occurs while opening the file for
365 * reading.
366 */
367 public LDIFReader(final File[] files, final int numParseThreads,
368 final LDIFReaderEntryTranslator entryTranslator)
369 throws IOException
370 {
371 this(files, numParseThreads, entryTranslator, null);
372 }
373
374
375
376 /**
377 * Creates a new LDIF reader that will read data from the specified files in
378 * the order in which they are provided and optionally parses the LDIF records
379 * asynchronously using the specified number of threads.
380 *
381 * @param files The files from which the data is to be
382 * read. It must not be {@code null} or
383 * empty.
384 * @param numParseThreads If this value is greater than zero, then
385 * the specified number of threads will be
386 * used to asynchronously read and parse the
387 * LDIF file.
388 * @param entryTranslator The LDIFReaderEntryTranslator to apply to
389 * entries before they are returned. This is
390 * normally {@code null}, which causes entries
391 * to be returned unaltered. This is
392 * particularly useful when parsing the input
393 * file in parallel because the entry
394 * translation is also done in parallel.
395 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to
396 * apply to change records before they are
397 * returned. This is normally {@code null},
398 * which causes change records to be returned
399 * unaltered. This is particularly useful
400 * when parsing the input file in parallel
401 * because the change record translation is
402 * also done in parallel.
403 *
404 * @throws IOException If a problem occurs while opening the file for
405 * reading.
406 */
407 public LDIFReader(final File[] files, final int numParseThreads,
408 final LDIFReaderEntryTranslator entryTranslator,
409 final LDIFReaderChangeRecordTranslator changeRecordTranslator)
410 throws IOException
411 {
412 this(files, numParseThreads, entryTranslator, changeRecordTranslator,
413 "UTF-8");
414 }
415
416
417
418 /**
419 * Creates a new LDIF reader that will read data from the specified files in
420 * the order in which they are provided and optionally parses the LDIF records
421 * asynchronously using the specified number of threads.
422 *
423 * @param files The files from which the data is to be
424 * read. It must not be {@code null} or
425 * empty.
426 * @param numParseThreads If this value is greater than zero, then
427 * the specified number of threads will be
428 * used to asynchronously read and parse the
429 * LDIF file.
430 * @param entryTranslator The LDIFReaderEntryTranslator to apply to
431 * entries before they are returned. This is
432 * normally {@code null}, which causes entries
433 * to be returned unaltered. This is
434 * particularly useful when parsing the input
435 * file in parallel because the entry
436 * translation is also done in parallel.
437 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to
438 * apply to change records before they are
439 * returned. This is normally {@code null},
440 * which causes change records to be returned
441 * unaltered. This is particularly useful
442 * when parsing the input file in parallel
443 * because the change record translation is
444 * also done in parallel.
445 * @param characterSet The character set to use when reading from
446 * the input stream. It must not be
447 * {@code null}.
448 *
449 * @throws IOException If a problem occurs while opening the file for
450 * reading.
451 */
452 public LDIFReader(final File[] files, final int numParseThreads,
453 final LDIFReaderEntryTranslator entryTranslator,
454 final LDIFReaderChangeRecordTranslator changeRecordTranslator,
455 final String characterSet)
456 throws IOException
457 {
458 this(createAggregateInputStream(files), numParseThreads, entryTranslator,
459 changeRecordTranslator, characterSet);
460 }
461
462
463
464 /**
465 * Creates a new aggregate input stream that will read data from the specified
466 * files. If there are multiple files, then a "padding" file will be inserted
467 * between them to ensure that there is at least one blank line between the
468 * end of one file and the beginning of another.
469 *
470 * @param files The files from which the data is to be read. It must not be
471 * {@code null} or empty.
472 *
473 * @return The input stream to use to read data from the provided files.
474 *
475 * @throws IOException If a problem is encountered while attempting to
476 * create the input stream.
477 */
478 private static InputStream createAggregateInputStream(final File... files)
479 throws IOException
480 {
481 if (files.length == 0)
482 {
483 throw new IOException(ERR_READ_NO_LDIF_FILES.get());
484 }
485 else if (files.length == 1)
486 {
487 return new FileInputStream(files[0]);
488 }
489 else
490 {
491 final File spacerFile =
492 File.createTempFile("ldif-reader-spacer", ".ldif");
493 spacerFile.deleteOnExit();
494
495 final BufferedWriter spacerWriter =
496 new BufferedWriter(new FileWriter(spacerFile));
497 try
498 {
499 spacerWriter.newLine();
500 spacerWriter.newLine();
501 }
502 finally
503 {
504 spacerWriter.close();
505 }
506
507 final File[] returnArray = new File[(files.length * 2) - 1];
508 returnArray[0] = files[0];
509
510 int pos = 1;
511 for (int i=1; i < files.length; i++)
512 {
513 returnArray[pos++] = spacerFile;
514 returnArray[pos++] = files[i];
515 }
516
517 return new AggregateInputStream(returnArray);
518 }
519 }
520
521
522
523 /**
524 * Creates a new LDIF reader that will read data from the provided input
525 * stream.
526 *
527 * @param inputStream The input stream from which the data is to be read.
528 * It must not be {@code null}.
529 */
530 public LDIFReader(final InputStream inputStream)
531 {
532 this(inputStream, 0);
533 }
534
535
536
537 /**
538 * Creates a new LDIF reader that will read data from the specified stream
539 * and parses the LDIF records asynchronously using the specified number of
540 * threads.
541 *
542 * @param inputStream The input stream from which the data is to be read.
543 * It must not be {@code null}.
544 * @param numParseThreads If this value is greater than zero, then the
545 * specified number of threads will be used to
546 * asynchronously read and parse the LDIF file.
547 *
548 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
549 * constructor for more details about asynchronous processing.
550 */
551 public LDIFReader(final InputStream inputStream, final int numParseThreads)
552 {
553 // UTF-8 is required by RFC 2849. Java guarantees it's always available.
554 this(new BufferedReader(new InputStreamReader(inputStream,
555 Charset.forName("UTF-8")),
556 DEFAULT_BUFFER_SIZE),
557 numParseThreads);
558 }
559
560
561
562 /**
563 * Creates a new LDIF reader that will read data from the specified stream
564 * and parses the LDIF records asynchronously using the specified number of
565 * threads.
566 *
567 * @param inputStream The input stream from which the data is to be read.
568 * It must not be {@code null}.
569 * @param numParseThreads If this value is greater than zero, then the
570 * specified number of threads will be used to
571 * asynchronously read and parse the LDIF file.
572 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read
573 * entries before they are returned. This is normally
574 * {@code null}, which causes entries to be returned
575 * unaltered. This is particularly useful when parsing
576 * the input file in parallel because the entry
577 * translation is also done in parallel.
578 *
579 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
580 * constructor for more details about asynchronous processing.
581 */
582 public LDIFReader(final InputStream inputStream, final int numParseThreads,
583 final LDIFReaderEntryTranslator entryTranslator)
584 {
585 this(inputStream, numParseThreads, entryTranslator, null);
586 }
587
588
589
590 /**
591 * Creates a new LDIF reader that will read data from the specified stream
592 * and parses the LDIF records asynchronously using the specified number of
593 * threads.
594 *
595 * @param inputStream The input stream from which the data is to
596 * be read. It must not be {@code null}.
597 * @param numParseThreads If this value is greater than zero, then
598 * the specified number of threads will be
599 * used to asynchronously read and parse the
600 * LDIF file.
601 * @param entryTranslator The LDIFReaderEntryTranslator to apply to
602 * entries before they are returned. This is
603 * normally {@code null}, which causes entries
604 * to be returned unaltered. This is
605 * particularly useful when parsing the input
606 * file in parallel because the entry
607 * translation is also done in parallel.
608 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to
609 * apply to change records before they are
610 * returned. This is normally {@code null},
611 * which causes change records to be returned
612 * unaltered. This is particularly useful
613 * when parsing the input file in parallel
614 * because the change record translation is
615 * also done in parallel.
616 *
617 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
618 * constructor for more details about asynchronous processing.
619 */
620 public LDIFReader(final InputStream inputStream, final int numParseThreads,
621 final LDIFReaderEntryTranslator entryTranslator,
622 final LDIFReaderChangeRecordTranslator changeRecordTranslator)
623 {
624 // UTF-8 is required by RFC 2849. Java guarantees it's always available.
625 this(inputStream, numParseThreads, entryTranslator, changeRecordTranslator,
626 "UTF-8");
627 }
628
629
630
631 /**
632 * Creates a new LDIF reader that will read data from the specified stream
633 * and parses the LDIF records asynchronously using the specified number of
634 * threads.
635 *
636 * @param inputStream The input stream from which the data is to
637 * be read. It must not be {@code null}.
638 * @param numParseThreads If this value is greater than zero, then
639 * the specified number of threads will be
640 * used to asynchronously read and parse the
641 * LDIF file.
642 * @param entryTranslator The LDIFReaderEntryTranslator to apply to
643 * entries before they are returned. This is
644 * normally {@code null}, which causes entries
645 * to be returned unaltered. This is
646 * particularly useful when parsing the input
647 * file in parallel because the entry
648 * translation is also done in parallel.
649 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to
650 * apply to change records before they are
651 * returned. This is normally {@code null},
652 * which causes change records to be returned
653 * unaltered. This is particularly useful
654 * when parsing the input file in parallel
655 * because the change record translation is
656 * also done in parallel.
657 * @param characterSet The character set to use when reading from
658 * the input stream. It must not be
659 * {@code null}.
660 *
661 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
662 * constructor for more details about asynchronous processing.
663 */
664 public LDIFReader(final InputStream inputStream, final int numParseThreads,
665 final LDIFReaderEntryTranslator entryTranslator,
666 final LDIFReaderChangeRecordTranslator changeRecordTranslator,
667 final String characterSet)
668 {
669 this(new BufferedReader(
670 new InputStreamReader(inputStream, Charset.forName(characterSet)),
671 DEFAULT_BUFFER_SIZE),
672 numParseThreads, entryTranslator, changeRecordTranslator);
673 }
674
675
676
677 /**
678 * Creates a new LDIF reader that will use the provided buffered reader to
679 * read the LDIF data. The encoding of the underlying Reader must be set to
680 * "UTF-8" as required by RFC 2849.
681 *
682 * @param reader The buffered reader that will be used to read the LDIF
683 * data. It must not be {@code null}.
684 */
685 public LDIFReader(final BufferedReader reader)
686 {
687 this(reader, 0);
688 }
689
690
691
692 /**
693 * Creates a new LDIF reader that will read data from the specified buffered
694 * reader and parses the LDIF records asynchronously using the specified
695 * number of threads. The encoding of the underlying Reader must be set to
696 * "UTF-8" as required by RFC 2849.
697 *
698 * @param reader The buffered reader that will be used to read the LDIF data.
699 * It must not be {@code null}.
700 * @param numParseThreads If this value is greater than zero, then the
701 * specified number of threads will be used to
702 * asynchronously read and parse the LDIF file.
703 *
704 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
705 * constructor for more details about asynchronous processing.
706 */
707 public LDIFReader(final BufferedReader reader, final int numParseThreads)
708 {
709 this(reader, numParseThreads, null);
710 }
711
712
713
714 /**
715 * Creates a new LDIF reader that will read data from the specified buffered
716 * reader and parses the LDIF records asynchronously using the specified
717 * number of threads. The encoding of the underlying Reader must be set to
718 * "UTF-8" as required by RFC 2849.
719 *
720 * @param reader The buffered reader that will be used to read the LDIF data.
721 * It must not be {@code null}.
722 * @param numParseThreads If this value is greater than zero, then the
723 * specified number of threads will be used to
724 * asynchronously read and parse the LDIF file.
725 * This should only be set to greater than zero when
726 * performance analysis has demonstrated that reading
727 * and parsing the LDIF is a bottleneck. The default
728 * synchronous processing is normally fast enough.
729 * There is little benefit in passing in a value
730 * greater than four (unless there is an
731 * LDIFReaderEntryTranslator that does time-consuming
732 * processing). A value of zero implies the
733 * default behavior of reading and parsing LDIF
734 * records synchronously when one of the read
735 * methods is called.
736 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read
737 * entries before they are returned. This is normally
738 * {@code null}, which causes entries to be returned
739 * unaltered. This is particularly useful when parsing
740 * the input file in parallel because the entry
741 * translation is also done in parallel.
742 */
743 public LDIFReader(final BufferedReader reader,
744 final int numParseThreads,
745 final LDIFReaderEntryTranslator entryTranslator)
746 {
747 this(reader, numParseThreads, entryTranslator, null);
748 }
749
750
751
752 /**
753 * Creates a new LDIF reader that will read data from the specified buffered
754 * reader and parses the LDIF records asynchronously using the specified
755 * number of threads. The encoding of the underlying Reader must be set to
756 * "UTF-8" as required by RFC 2849.
757 *
758 * @param reader The buffered reader that will be used to
759 * read the LDIF data. It must not be
760 * {@code null}.
761 * @param numParseThreads If this value is greater than zero, then
762 * the specified number of threads will be
763 * used to asynchronously read and parse the
764 * LDIF file.
765 * @param entryTranslator The LDIFReaderEntryTranslator to apply to
766 * entries before they are returned. This is
767 * normally {@code null}, which causes entries
768 * to be returned unaltered. This is
769 * particularly useful when parsing the input
770 * file in parallel because the entry
771 * translation is also done in parallel.
772 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to
773 * apply to change records before they are
774 * returned. This is normally {@code null},
775 * which causes change records to be returned
776 * unaltered. This is particularly useful
777 * when parsing the input file in parallel
778 * because the change record translation is
779 * also done in parallel.
780 */
781 public LDIFReader(final BufferedReader reader, final int numParseThreads,
782 final LDIFReaderEntryTranslator entryTranslator,
783 final LDIFReaderChangeRecordTranslator changeRecordTranslator)
784 {
785 ensureNotNull(reader);
786 ensureTrue(numParseThreads >= 0,
787 "LDIFReader.numParseThreads must not be negative.");
788
789 this.reader = reader;
790 this.entryTranslator = entryTranslator;
791 this.changeRecordTranslator = changeRecordTranslator;
792
793 duplicateValueBehavior = DuplicateValueBehavior.STRIP;
794 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT;
795
796 relativeBasePath = DEFAULT_RELATIVE_BASE_PATH;
797
798 if (numParseThreads == 0)
799 {
800 isAsync = false;
801 asyncParser = null;
802 asyncParsingComplete = null;
803 asyncParsedRecords = null;
804 }
805 else
806 {
807 isAsync = true;
808 asyncParsingComplete = new AtomicBoolean(false);
809
810 // Decodes entries in parallel.
811 final LDAPSDKThreadFactory threadFactory =
812 new LDAPSDKThreadFactory("LDIFReader Worker", true, null);
813 final ParallelProcessor<UnparsedLDIFRecord, LDIFRecord> parallelParser =
814 new ParallelProcessor<UnparsedLDIFRecord, LDIFRecord>(
815 new RecordParser(), threadFactory, numParseThreads,
816 ASYNC_MIN_PER_PARSING_THREAD);
817
818 final BlockingQueue<UnparsedLDIFRecord> pendingQueue = new
819 ArrayBlockingQueue<UnparsedLDIFRecord>(ASYNC_QUEUE_SIZE);
820
821 // The output queue must be a little more than twice as big as the input
822 // queue to more easily handle being shutdown in the middle of processing
823 // when the queues are full and threads are blocked.
824 asyncParsedRecords = new ArrayBlockingQueue
825 <Result<UnparsedLDIFRecord, LDIFRecord>>(2 * ASYNC_QUEUE_SIZE + 100);
826
827 asyncParser = new AsynchronousParallelProcessor
828 <UnparsedLDIFRecord, LDIFRecord>(pendingQueue, parallelParser,
829 asyncParsedRecords);
830
831 final LineReaderThread lineReaderThread = new LineReaderThread();
832 lineReaderThread.start();
833 }
834 }
835
836
837
838 /**
839 * Reads entries from the LDIF file with the specified path and returns them
840 * as a {@code List}. This is a convenience method that should only be used
841 * for data sets that are small enough so that running out of memory isn't a
842 * concern.
843 *
844 * @param path The path to the LDIF file containing the entries to be read.
845 *
846 * @return A list of the entries read from the given LDIF file.
847 *
848 * @throws IOException If a problem occurs while attempting to read data
849 * from the specified file.
850 *
851 * @throws LDIFException If a problem is encountered while attempting to
852 * decode data read as LDIF.
853 */
854 public static List<Entry> readEntries(final String path)
855 throws IOException, LDIFException
856 {
857 return readEntries(new LDIFReader(path));
858 }
859
860
861
862 /**
863 * Reads entries from the specified LDIF file and returns them as a
864 * {@code List}. This is a convenience method that should only be used for
865 * data sets that are small enough so that running out of memory isn't a
866 * concern.
867 *
868 * @param file A reference to the LDIF file containing the entries to be
869 * read.
870 *
871 * @return A list of the entries read from the given LDIF file.
872 *
873 * @throws IOException If a problem occurs while attempting to read data
874 * from the specified file.
875 *
876 * @throws LDIFException If a problem is encountered while attempting to
877 * decode data read as LDIF.
878 */
879 public static List<Entry> readEntries(final File file)
880 throws IOException, LDIFException
881 {
882 return readEntries(new LDIFReader(file));
883 }
884
885
886
887 /**
888 * Reads and decodes LDIF entries from the provided input stream and
889 * returns them as a {@code List}. This is a convenience method that should
890 * only be used for data sets that are small enough so that running out of
891 * memory isn't a concern.
892 *
893 * @param inputStream The input stream from which the entries should be
894 * read. The input stream will be closed before
895 * returning.
896 *
897 * @return A list of the entries read from the given input stream.
898 *
899 * @throws IOException If a problem occurs while attempting to read data
900 * from the input stream.
901 *
902 * @throws LDIFException If a problem is encountered while attempting to
903 * decode data read as LDIF.
904 */
905 public static List<Entry> readEntries(final InputStream inputStream)
906 throws IOException, LDIFException
907 {
908 return readEntries(new LDIFReader(inputStream));
909 }
910
911
912
913 /**
914 * Reads entries from the provided LDIF reader and returns them as a list.
915 *
916 * @param reader The reader from which the entries should be read. It will
917 * be closed before returning.
918 *
919 * @return A list of the entries read from the provided reader.
920 *
921 * @throws IOException If a problem was encountered while attempting to read
922 * data from the LDIF data source.
923 *
924 * @throws LDIFException If a problem is encountered while attempting to
925 * decode data read as LDIF.
926 */
927 private static List<Entry> readEntries(final LDIFReader reader)
928 throws IOException, LDIFException
929 {
930 try
931 {
932 final ArrayList<Entry> entries = new ArrayList<Entry>(10);
933 while (true)
934 {
935 final Entry e = reader.readEntry();
936 if (e == null)
937 {
938 break;
939 }
940
941 entries.add(e);
942 }
943
944 return entries;
945 }
946 finally
947 {
948 reader.close();
949 }
950 }
951
952
953
954 /**
955 * Closes this LDIF reader and the underlying LDIF source.
956 *
957 * @throws IOException If a problem occurs while closing the underlying LDIF
958 * source.
959 */
960 public void close()
961 throws IOException
962 {
963 reader.close();
964
965 if (isAsync())
966 {
967 // Closing the reader will trigger the LineReaderThread to complete, but
968 // not if it's blocked submitting the next UnparsedLDIFRecord. To avoid
969 // this, we clear out the completed output queue, which is larger than
970 // the input queue, so the LineReaderThread will stop reading and
971 // shutdown the asyncParser.
972 asyncParsedRecords.clear();
973 }
974 }
975
976
977
978 /**
979 * Indicates whether to ignore any duplicate values encountered while reading
980 * LDIF records.
981 *
982 * @return {@code true} if duplicate values should be ignored, or
983 * {@code false} if any LDIF records containing duplicate values
984 * should be rejected.
985 *
986 * @deprecated Use the {@link #getDuplicateValueBehavior} method instead.
987 */
988 @Deprecated()
989 public boolean ignoreDuplicateValues()
990 {
991 return (duplicateValueBehavior == DuplicateValueBehavior.STRIP);
992 }
993
994
995
996 /**
997 * Specifies whether to ignore any duplicate values encountered while reading
998 * LDIF records.
999 *
1000 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
1001 * attribute values encountered while reading
1002 * LDIF records.
1003 *
1004 * @deprecated Use the {@link #setDuplicateValueBehavior} method instead.
1005 */
1006 @Deprecated()
1007 public void setIgnoreDuplicateValues(final boolean ignoreDuplicateValues)
1008 {
1009 if (ignoreDuplicateValues)
1010 {
1011 duplicateValueBehavior = DuplicateValueBehavior.STRIP;
1012 }
1013 else
1014 {
1015 duplicateValueBehavior = DuplicateValueBehavior.REJECT;
1016 }
1017 }
1018
1019
1020
1021 /**
1022 * Retrieves the behavior that should be exhibited if the LDIF reader
1023 * encounters an entry with duplicate values.
1024 *
1025 * @return The behavior that should be exhibited if the LDIF reader
1026 * encounters an entry with duplicate values.
1027 */
1028 public DuplicateValueBehavior getDuplicateValueBehavior()
1029 {
1030 return duplicateValueBehavior;
1031 }
1032
1033
1034
1035 /**
1036 * Specifies the behavior that should be exhibited if the LDIF reader
1037 * encounters an entry with duplicate values.
1038 *
1039 * @param duplicateValueBehavior The behavior that should be exhibited if
1040 * the LDIF reader encounters an entry with
1041 * duplicate values.
1042 */
1043 public void setDuplicateValueBehavior(
1044 final DuplicateValueBehavior duplicateValueBehavior)
1045 {
1046 this.duplicateValueBehavior = duplicateValueBehavior;
1047 }
1048
1049
1050
1051 /**
1052 * Indicates whether to strip off any illegal trailing spaces that may appear
1053 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF
1054 * specification strongly recommends that any value which legitimately
1055 * contains trailing spaces be base64-encoded, and any spaces which appear
1056 * after the end of non-base64-encoded values may therefore be considered
1057 * invalid. If any such trailing spaces are encountered in an LDIF record and
1058 * they are not to be stripped, then an {@link LDIFException} will be thrown
1059 * for that record.
1060 * <BR><BR>
1061 * Note that this applies only to spaces after the end of a value, and not to
1062 * spaces which may appear at the end of a line for a value that is wrapped
1063 * and continued on the next line.
1064 *
1065 * @return {@code true} if illegal trailing spaces should be stripped off, or
1066 * {@code false} if LDIF records containing illegal trailing spaces
1067 * should be rejected.
1068 *
1069 * @deprecated Use the {@link #getTrailingSpaceBehavior} method instead.
1070 */
1071 @Deprecated()
1072 public boolean stripTrailingSpaces()
1073 {
1074 return (trailingSpaceBehavior == TrailingSpaceBehavior.STRIP);
1075 }
1076
1077
1078
1079 /**
1080 * Specifies whether to strip off any illegal trailing spaces that may appear
1081 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF
1082 * specification strongly recommends that any value which legitimately
1083 * contains trailing spaces be base64-encoded, and any spaces which appear
1084 * after the end of non-base64-encoded values may therefore be considered
1085 * invalid. If any such trailing spaces are encountered in an LDIF record and
1086 * they are not to be stripped, then an {@link LDIFException} will be thrown
1087 * for that record.
1088 * <BR><BR>
1089 * Note that this applies only to spaces after the end of a value, and not to
1090 * spaces which may appear at the end of a line for a value that is wrapped
1091 * and continued on the next line.
1092 *
1093 * @param stripTrailingSpaces Indicates whether to strip off any illegal
1094 * trailing spaces, or {@code false} if LDIF
1095 * records containing them should be rejected.
1096 *
1097 * @deprecated Use the {@link #setTrailingSpaceBehavior} method instead.
1098 */
1099 @Deprecated()
1100 public void setStripTrailingSpaces(final boolean stripTrailingSpaces)
1101 {
1102 trailingSpaceBehavior = stripTrailingSpaces
1103 ? TrailingSpaceBehavior.STRIP
1104 : TrailingSpaceBehavior.REJECT;
1105 }
1106
1107
1108
1109 /**
1110 * Retrieves the behavior that should be exhibited when encountering attribute
1111 * values which are not base64-encoded but contain trailing spaces. The LDIF
1112 * specification strongly recommends that any value which legitimately
1113 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser
1114 * may be configured to automatically strip these spaces, to preserve them, or
1115 * to reject any entry or change record containing them.
1116 *
1117 * @return The behavior that should be exhibited when encountering attribute
1118 * values which are not base64-encoded but contain trailing spaces.
1119 */
1120 public TrailingSpaceBehavior getTrailingSpaceBehavior()
1121 {
1122 return trailingSpaceBehavior;
1123 }
1124
1125
1126
1127 /**
1128 * Specifies the behavior that should be exhibited when encountering attribute
1129 * values which are not base64-encoded but contain trailing spaces. The LDIF
1130 * specification strongly recommends that any value which legitimately
1131 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser
1132 * may be configured to automatically strip these spaces, to preserve them, or
1133 * to reject any entry or change record containing them.
1134 *
1135 * @param trailingSpaceBehavior The behavior that should be exhibited when
1136 * encountering attribute values which are not
1137 * base64-encoded but contain trailing spaces.
1138 */
1139 public void setTrailingSpaceBehavior(
1140 final TrailingSpaceBehavior trailingSpaceBehavior)
1141 {
1142 this.trailingSpaceBehavior = trailingSpaceBehavior;
1143 }
1144
1145
1146
1147 /**
1148 * Retrieves the base path that will be prepended to relative paths in order
1149 * to obtain an absolute path. This will only be used for "file:" URLs that
1150 * have paths which do not begin with a slash.
1151 *
1152 * @return The base path that will be prepended to relative paths in order to
1153 * obtain an absolute path.
1154 */
1155 public String getRelativeBasePath()
1156 {
1157 return relativeBasePath;
1158 }
1159
1160
1161
1162 /**
1163 * Specifies the base path that will be prepended to relative paths in order
1164 * to obtain an absolute path. This will only be used for "file:" URLs that
1165 * have paths which do not begin with a space.
1166 *
1167 * @param relativeBasePath The base path that will be prepended to relative
1168 * paths in order to obtain an absolute path.
1169 */
1170 public void setRelativeBasePath(final String relativeBasePath)
1171 {
1172 setRelativeBasePath(new File(relativeBasePath));
1173 }
1174
1175
1176
1177 /**
1178 * Specifies the base path that will be prepended to relative paths in order
1179 * to obtain an absolute path. This will only be used for "file:" URLs that
1180 * have paths which do not begin with a space.
1181 *
1182 * @param relativeBasePath The base path that will be prepended to relative
1183 * paths in order to obtain an absolute path.
1184 */
1185 public void setRelativeBasePath(final File relativeBasePath)
1186 {
1187 final String path = relativeBasePath.getAbsolutePath();
1188 if (path.endsWith(File.separator))
1189 {
1190 this.relativeBasePath = path;
1191 }
1192 else
1193 {
1194 this.relativeBasePath = path + File.separator;
1195 }
1196 }
1197
1198
1199
1200 /**
1201 * Retrieves the schema that will be used when reading LDIF records, if
1202 * defined.
1203 *
1204 * @return The schema that will be used when reading LDIF records, or
1205 * {@code null} if no schema should be used and all attributes should
1206 * be treated as case-insensitive strings.
1207 */
1208 public Schema getSchema()
1209 {
1210 return schema;
1211 }
1212
1213
1214
1215 /**
1216 * Specifies the schema that should be used when reading LDIF records.
1217 *
1218 * @param schema The schema that should be used when reading LDIF records,
1219 * or {@code null} if no schema should be used and all
1220 * attributes should be treated as case-insensitive strings.
1221 */
1222 public void setSchema(final Schema schema)
1223 {
1224 this.schema = schema;
1225 }
1226
1227
1228
1229 /**
1230 * Reads a record from the LDIF source. It may be either an entry or an LDIF
1231 * change record.
1232 *
1233 * @return The record read from the LDIF source, or {@code null} if there are
1234 * no more entries to be read.
1235 *
1236 * @throws IOException If a problem occurs while trying to read from the
1237 * LDIF source.
1238 *
1239 * @throws LDIFException If the data read could not be parsed as an entry or
1240 * an LDIF change record.
1241 */
1242 public LDIFRecord readLDIFRecord()
1243 throws IOException, LDIFException
1244 {
1245 if (isAsync())
1246 {
1247 return readLDIFRecordAsync();
1248 }
1249 else
1250 {
1251 return readLDIFRecordInternal();
1252 }
1253 }
1254
1255
1256
1257 /**
1258 * Reads an entry from the LDIF source.
1259 *
1260 * @return The entry read from the LDIF source, or {@code null} if there are
1261 * no more entries to be read.
1262 *
1263 * @throws IOException If a problem occurs while attempting to read from the
1264 * LDIF source.
1265 *
1266 * @throws LDIFException If the data read could not be parsed as an entry.
1267 */
1268 public Entry readEntry()
1269 throws IOException, LDIFException
1270 {
1271 if (isAsync())
1272 {
1273 return readEntryAsync();
1274 }
1275 else
1276 {
1277 return readEntryInternal();
1278 }
1279 }
1280
1281
1282
1283 /**
1284 * Reads an LDIF change record from the LDIF source. The LDIF record must
1285 * have a changetype.
1286 *
1287 * @return The change record read from the LDIF source, or {@code null} if
1288 * there are no more records to be read.
1289 *
1290 * @throws IOException If a problem occurs while attempting to read from the
1291 * LDIF source.
1292 *
1293 * @throws LDIFException If the data read could not be parsed as an LDIF
1294 * change record.
1295 */
1296 public LDIFChangeRecord readChangeRecord()
1297 throws IOException, LDIFException
1298 {
1299 return readChangeRecord(false);
1300 }
1301
1302
1303
1304 /**
1305 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF
1306 * record does not have a changetype, then it may be assumed to be an add
1307 * change record.
1308 *
1309 * @param defaultAdd Indicates whether an LDIF record not containing a
1310 * changetype should be retrieved as an add change record.
1311 * If this is {@code false} and the record read does not
1312 * include a changetype, then an {@link LDIFException}
1313 * will be thrown.
1314 *
1315 * @return The change record read from the LDIF source, or {@code null} if
1316 * there are no more records to be read.
1317 *
1318 * @throws IOException If a problem occurs while attempting to read from the
1319 * LDIF source.
1320 *
1321 * @throws LDIFException If the data read could not be parsed as an LDIF
1322 * change record.
1323 */
1324 public LDIFChangeRecord readChangeRecord(final boolean defaultAdd)
1325 throws IOException, LDIFException
1326 {
1327 if (isAsync())
1328 {
1329 return readChangeRecordAsync(defaultAdd);
1330 }
1331 else
1332 {
1333 return readChangeRecordInternal(defaultAdd);
1334 }
1335 }
1336
1337
1338
1339 /**
1340 * Reads the next {@code LDIFRecord}, which was read and parsed by a different
1341 * thread.
1342 *
1343 * @return The next parsed record or {@code null} if there are no more
1344 * records to read.
1345 *
1346 * @throws IOException If IOException was thrown when reading or parsing
1347 * the record.
1348 *
1349 * @throws LDIFException If LDIFException was thrown parsing the record.
1350 */
1351 private LDIFRecord readLDIFRecordAsync()
1352 throws IOException, LDIFException
1353 {
1354 Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1355 LDIFRecord record = null;
1356 while (record == null)
1357 {
1358 result = readLDIFRecordResultAsync();
1359 if (result == null)
1360 {
1361 return null;
1362 }
1363
1364 record = result.getOutput();
1365
1366 // This is a special value that means we should skip this Entry. We have
1367 // to use something different than null because null means EOF.
1368 if (record == SKIP_ENTRY)
1369 {
1370 record = null;
1371 }
1372 }
1373 return record;
1374 }
1375
1376
1377
1378 /**
1379 * Reads an entry asynchronously from the LDIF source.
1380 *
1381 * @return The entry read from the LDIF source, or {@code null} if there are
1382 * no more entries to be read.
1383 *
1384 * @throws IOException If a problem occurs while attempting to read from the
1385 * LDIF source.
1386 * @throws LDIFException If the data read could not be parsed as an entry.
1387 */
1388 private Entry readEntryAsync()
1389 throws IOException, LDIFException
1390 {
1391 Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1392 LDIFRecord record = null;
1393 while (record == null)
1394 {
1395 result = readLDIFRecordResultAsync();
1396 if (result == null)
1397 {
1398 return null;
1399 }
1400
1401 record = result.getOutput();
1402
1403 // This is a special value that means we should skip this Entry. We have
1404 // to use something different than null because null means EOF.
1405 if (record == SKIP_ENTRY)
1406 {
1407 record = null;
1408 }
1409 }
1410
1411 if (record instanceof Entry)
1412 {
1413 return (Entry) record;
1414 }
1415 else if (record instanceof LDIFChangeRecord)
1416 {
1417 try
1418 {
1419 // Some LDIFChangeRecord can be converted to an Entry. This is really
1420 // an edge case though.
1421 return ((LDIFChangeRecord)record).toEntry();
1422 }
1423 catch (LDIFException e)
1424 {
1425 debugException(e);
1426 final long firstLineNumber = result.getInput().getFirstLineNumber();
1427 throw new LDIFException(e.getExceptionMessage(),
1428 firstLineNumber, true, e);
1429 }
1430 }
1431
1432 throw new AssertionError("LDIFRecords must either be an Entry or an " +
1433 "LDIFChangeRecord");
1434 }
1435
1436
1437
1438 /**
1439 * Reads an LDIF change record from the LDIF source asynchronously.
1440 * Optionally, if the LDIF record does not have a changetype, then it may be
1441 * assumed to be an add change record.
1442 *
1443 * @param defaultAdd Indicates whether an LDIF record not containing a
1444 * changetype should be retrieved as an add change record.
1445 * If this is {@code false} and the record read does not
1446 * include a changetype, then an {@link LDIFException} will
1447 * be thrown.
1448 *
1449 * @return The change record read from the LDIF source, or {@code null} if
1450 * there are no more records to be read.
1451 *
1452 * @throws IOException If a problem occurs while attempting to read from the
1453 * LDIF source.
1454 * @throws LDIFException If the data read could not be parsed as an LDIF
1455 * change record.
1456 */
1457 private LDIFChangeRecord readChangeRecordAsync(final boolean defaultAdd)
1458 throws IOException, LDIFException
1459 {
1460 Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1461 LDIFRecord record = null;
1462 while (record == null)
1463 {
1464 result = readLDIFRecordResultAsync();
1465 if (result == null)
1466 {
1467 return null;
1468 }
1469
1470 record = result.getOutput();
1471
1472 // This is a special value that means we should skip this Entry. We have
1473 // to use something different than null because null means EOF.
1474 if (record == SKIP_ENTRY)
1475 {
1476 record = null;
1477 }
1478 }
1479
1480 if (record instanceof LDIFChangeRecord)
1481 {
1482 return (LDIFChangeRecord) record;
1483 }
1484 else if (record instanceof Entry)
1485 {
1486 if (defaultAdd)
1487 {
1488 return new LDIFAddChangeRecord((Entry) record);
1489 }
1490 else
1491 {
1492 final long firstLineNumber = result.getInput().getFirstLineNumber();
1493 throw new LDIFException(
1494 ERR_READ_NOT_CHANGE_RECORD.get(firstLineNumber), firstLineNumber,
1495 true);
1496 }
1497 }
1498
1499 throw new AssertionError("LDIFRecords must either be an Entry or an " +
1500 "LDIFChangeRecord");
1501 }
1502
1503
1504
1505 /**
1506 * Reads the next LDIF record, which was read and parsed asynchronously by
1507 * separate threads.
1508 *
1509 * @return The next LDIF record or {@code null} if there are no more records.
1510 *
1511 * @throws IOException If a problem occurs while attempting to read from the
1512 * LDIF source.
1513 *
1514 * @throws LDIFException If the data read could not be parsed as an entry.
1515 */
1516 private Result<UnparsedLDIFRecord, LDIFRecord> readLDIFRecordResultAsync()
1517 throws IOException, LDIFException
1518 {
1519 Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1520
1521 // If the asynchronous reading and parsing is complete, then we don't have
1522 // to block waiting for the next record to show up on the queue. If there
1523 // isn't a record there, then return null (EOF) right away.
1524 if (asyncParsingComplete.get())
1525 {
1526 result = asyncParsedRecords.poll();
1527 }
1528 else
1529 {
1530 try
1531 {
1532 // We probably could just do a asyncParsedRecords.take() here, but
1533 // there are some edge case error scenarios where
1534 // asyncParsingComplete might be set without a special EOF sentinel
1535 // Result enqueued. So to guard against this, we have a very cautious
1536 // polling interval of 1 second. During normal processing, we never
1537 // have to wait for this to expire, when there is something to do
1538 // (like shutdown).
1539 while ((result == null) && (!asyncParsingComplete.get()))
1540 {
1541 result = asyncParsedRecords.poll(1, TimeUnit.SECONDS);
1542 }
1543
1544 // There's a very small chance that we missed the value, so double-check
1545 if (result == null)
1546 {
1547 result = asyncParsedRecords.poll();
1548 }
1549 }
1550 catch (InterruptedException e)
1551 {
1552 debugException(e);
1553 Thread.currentThread().interrupt();
1554 throw createIOExceptionWithCause(null, e);
1555 }
1556 }
1557 if (result == null)
1558 {
1559 return null;
1560 }
1561
1562 rethrow(result.getFailureCause());
1563
1564 // Check if we reached the end of the input
1565 final UnparsedLDIFRecord unparsedRecord = result.getInput();
1566 if (unparsedRecord.isEOF())
1567 {
1568 // This might have been set already by the LineReaderThread, but
1569 // just in case it hasn't gotten to it yet, do so here.
1570 asyncParsingComplete.set(true);
1571
1572 // Enqueue this EOF result again for any other thread that might be
1573 // blocked in asyncParsedRecords.take() even though having multiple
1574 // threads call this method concurrently breaks the contract of this
1575 // class.
1576 try
1577 {
1578 asyncParsedRecords.put(result);
1579 }
1580 catch (InterruptedException e)
1581 {
1582 // We shouldn't ever get interrupted because the put won't ever block.
1583 // Once we are done reading, this is the only item left in the queue,
1584 // so we should always be able to re-enqueue it.
1585 debugException(e);
1586 Thread.currentThread().interrupt();
1587 }
1588 return null;
1589 }
1590
1591 return result;
1592 }
1593
1594
1595
1596 /**
1597 * Indicates whether this LDIF reader was constructed to perform asynchronous
1598 * processing.
1599 *
1600 * @return {@code true} if this LDIFReader was constructed to perform
1601 * asynchronous processing, or {@code false} if not.
1602 */
1603 private boolean isAsync()
1604 {
1605 return isAsync;
1606 }
1607
1608
1609
1610 /**
1611 * If not {@code null}, rethrows the specified Throwable as either an
1612 * IOException or LDIFException.
1613 *
1614 * @param t The exception to rethrow. If it's {@code null}, then nothing
1615 * is thrown.
1616 *
1617 * @throws IOException If t is an IOException or a checked Exception that
1618 * is not an LDIFException.
1619 * @throws LDIFException If t is an LDIFException.
1620 */
1621 static void rethrow(final Throwable t)
1622 throws IOException, LDIFException
1623 {
1624 if (t == null)
1625 {
1626 return;
1627 }
1628
1629 if (t instanceof IOException)
1630 {
1631 throw (IOException) t;
1632 }
1633 else if (t instanceof LDIFException)
1634 {
1635 throw (LDIFException) t;
1636 }
1637 else if (t instanceof RuntimeException)
1638 {
1639 throw (RuntimeException) t;
1640 }
1641 else if (t instanceof Error)
1642 {
1643 throw (Error) t;
1644 }
1645 else
1646 {
1647 throw createIOExceptionWithCause(null, t);
1648 }
1649 }
1650
1651
1652
1653 /**
1654 * Reads a record from the LDIF source. It may be either an entry or an LDIF
1655 * change record.
1656 *
1657 * @return The record read from the LDIF source, or {@code null} if there are
1658 * no more entries to be read.
1659 *
1660 * @throws IOException If a problem occurs while trying to read from the
1661 * LDIF source.
1662 * @throws LDIFException If the data read could not be parsed as an entry or
1663 * an LDIF change record.
1664 */
1665 private LDIFRecord readLDIFRecordInternal()
1666 throws IOException, LDIFException
1667 {
1668 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1669 return decodeRecord(unparsedRecord, relativeBasePath, schema);
1670 }
1671
1672
1673
1674 /**
1675 * Reads an entry from the LDIF source.
1676 *
1677 * @return The entry read from the LDIF source, or {@code null} if there are
1678 * no more entries to be read.
1679 *
1680 * @throws IOException If a problem occurs while attempting to read from the
1681 * LDIF source.
1682 * @throws LDIFException If the data read could not be parsed as an entry.
1683 */
1684 private Entry readEntryInternal()
1685 throws IOException, LDIFException
1686 {
1687 Entry e = null;
1688 while (e == null)
1689 {
1690 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1691 if (unparsedRecord.isEOF())
1692 {
1693 return null;
1694 }
1695
1696 e = decodeEntry(unparsedRecord, relativeBasePath);
1697 debugLDIFRead(e);
1698
1699 if (entryTranslator != null)
1700 {
1701 e = entryTranslator.translate(e, unparsedRecord.getFirstLineNumber());
1702 }
1703 }
1704 return e;
1705 }
1706
1707
1708
1709 /**
1710 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF
1711 * record does not have a changetype, then it may be assumed to be an add
1712 * change record.
1713 *
1714 * @param defaultAdd Indicates whether an LDIF record not containing a
1715 * changetype should be retrieved as an add change record.
1716 * If this is {@code false} and the record read does not
1717 * include a changetype, then an {@link LDIFException} will
1718 * be thrown.
1719 *
1720 * @return The change record read from the LDIF source, or {@code null} if
1721 * there are no more records to be read.
1722 *
1723 * @throws IOException If a problem occurs while attempting to read from the
1724 * LDIF source.
1725 * @throws LDIFException If the data read could not be parsed as an LDIF
1726 * change record.
1727 */
1728 private LDIFChangeRecord readChangeRecordInternal(final boolean defaultAdd)
1729 throws IOException, LDIFException
1730 {
1731 LDIFChangeRecord r = null;
1732 while (r == null)
1733 {
1734 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1735 if (unparsedRecord.isEOF())
1736 {
1737 return null;
1738 }
1739
1740 r = decodeChangeRecord(unparsedRecord, relativeBasePath, defaultAdd,
1741 schema);
1742 debugLDIFRead(r);
1743
1744 if (changeRecordTranslator != null)
1745 {
1746 r = changeRecordTranslator.translate(r,
1747 unparsedRecord.getFirstLineNumber());
1748 }
1749 }
1750 return r;
1751 }
1752
1753
1754
1755 /**
1756 * Reads a record (either an entry or a change record) from the LDIF source
1757 * and places it in the line list.
1758 *
1759 * @return The line number for the first line of the entry that was read.
1760 *
1761 * @throws IOException If a problem occurs while attempting to read from the
1762 * LDIF source.
1763 *
1764 * @throws LDIFException If the data read could not be parsed as a valid
1765 * LDIF record.
1766 */
1767 private UnparsedLDIFRecord readUnparsedRecord()
1768 throws IOException, LDIFException
1769 {
1770 final ArrayList<StringBuilder> lineList = new ArrayList<StringBuilder>(20);
1771 boolean lastWasComment = false;
1772 long firstLineNumber = lineNumberCounter + 1;
1773 while (true)
1774 {
1775 final String line = reader.readLine();
1776 lineNumberCounter++;
1777
1778 if (line == null)
1779 {
1780 // We've hit the end of the LDIF source. If we haven't read any entry
1781 // data, then return null. Otherwise, the last entry wasn't followed by
1782 // a blank line, which is OK, and we should decode that entry.
1783 if (lineList.isEmpty())
1784 {
1785 return new UnparsedLDIFRecord(new ArrayList<StringBuilder>(0),
1786 duplicateValueBehavior, trailingSpaceBehavior, schema, -1);
1787 }
1788 else
1789 {
1790 break;
1791 }
1792 }
1793
1794 if (line.length() == 0)
1795 {
1796 // It's a blank line. If we have read entry data, then this signals the
1797 // end of the entry. Otherwise, it's an extra space between entries,
1798 // which is OK.
1799 lastWasComment = false;
1800 if (lineList.isEmpty())
1801 {
1802 firstLineNumber++;
1803 continue;
1804 }
1805 else
1806 {
1807 break;
1808 }
1809 }
1810
1811 if (line.charAt(0) == ' ')
1812 {
1813 // The line starts with a space, which means that it must be a
1814 // continuation of the previous line. This is true even if the last
1815 // line was a comment.
1816 if (lastWasComment)
1817 {
1818 // What we've read is part of a comment, so we don't care about its
1819 // content.
1820 }
1821 else if (lineList.isEmpty())
1822 {
1823 throw new LDIFException(
1824 ERR_READ_UNEXPECTED_FIRST_SPACE.get(lineNumberCounter),
1825 lineNumberCounter, false);
1826 }
1827 else
1828 {
1829 lineList.get(lineList.size() - 1).append(line.substring(1));
1830 lastWasComment = false;
1831 }
1832 }
1833 else if (line.charAt(0) == '#')
1834 {
1835 lastWasComment = true;
1836 }
1837 else
1838 {
1839 // We want to make sure that we skip over the "version:" line if it
1840 // exists, but that should only occur at the beginning of an entry where
1841 // it can't be confused with a possible "version" attribute.
1842 if (lineList.isEmpty() && line.startsWith("version:"))
1843 {
1844 lastWasComment = true;
1845 }
1846 else
1847 {
1848 lineList.add(new StringBuilder(line));
1849 lastWasComment = false;
1850 }
1851 }
1852 }
1853
1854 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
1855 trailingSpaceBehavior, schema, firstLineNumber);
1856 }
1857
1858
1859
1860 /**
1861 * Decodes the provided set of LDIF lines as an entry. The provided set of
1862 * lines must contain exactly one entry. Long lines may be wrapped as per the
1863 * LDIF specification, and it is acceptable to have one or more blank lines
1864 * following the entry. A default trailing space behavior of
1865 * {@link TrailingSpaceBehavior#REJECT} will be used.
1866 *
1867 * @param ldifLines The set of lines that comprise the LDIF representation
1868 * of the entry. It must not be {@code null} or empty.
1869 *
1870 * @return The entry read from LDIF.
1871 *
1872 * @throws LDIFException If the provided LDIF data cannot be decoded as an
1873 * entry.
1874 */
1875 public static Entry decodeEntry(final String... ldifLines)
1876 throws LDIFException
1877 {
1878 final Entry e = decodeEntry(prepareRecord(DuplicateValueBehavior.STRIP,
1879 TrailingSpaceBehavior.REJECT, null, ldifLines),
1880 DEFAULT_RELATIVE_BASE_PATH);
1881 debugLDIFRead(e);
1882 return e;
1883 }
1884
1885
1886
1887 /**
1888 * Decodes the provided set of LDIF lines as an entry. The provided set of
1889 * lines must contain exactly one entry. Long lines may be wrapped as per the
1890 * LDIF specification, and it is acceptable to have one or more blank lines
1891 * following the entry. A default trailing space behavior of
1892 * {@link TrailingSpaceBehavior#REJECT} will be used.
1893 *
1894 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
1895 * attribute values encountered while parsing.
1896 * @param schema The schema to use when parsing the record,
1897 * if applicable.
1898 * @param ldifLines The set of lines that comprise the LDIF
1899 * representation of the entry. It must not be
1900 * {@code null} or empty.
1901 *
1902 * @return The entry read from LDIF.
1903 *
1904 * @throws LDIFException If the provided LDIF data cannot be decoded as an
1905 * entry.
1906 */
1907 public static Entry decodeEntry(final boolean ignoreDuplicateValues,
1908 final Schema schema,
1909 final String... ldifLines)
1910 throws LDIFException
1911 {
1912 return decodeEntry(ignoreDuplicateValues, TrailingSpaceBehavior.REJECT,
1913 schema, ldifLines);
1914 }
1915
1916
1917
1918 /**
1919 * Decodes the provided set of LDIF lines as an entry. The provided set of
1920 * lines must contain exactly one entry. Long lines may be wrapped as per the
1921 * LDIF specification, and it is acceptable to have one or more blank lines
1922 * following the entry.
1923 *
1924 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
1925 * attribute values encountered while parsing.
1926 * @param trailingSpaceBehavior The behavior that should be exhibited when
1927 * encountering attribute values which are not
1928 * base64-encoded but contain trailing spaces.
1929 * It must not be {@code null}.
1930 * @param schema The schema to use when parsing the record,
1931 * if applicable.
1932 * @param ldifLines The set of lines that comprise the LDIF
1933 * representation of the entry. It must not be
1934 * {@code null} or empty.
1935 *
1936 * @return The entry read from LDIF.
1937 *
1938 * @throws LDIFException If the provided LDIF data cannot be decoded as an
1939 * entry.
1940 */
1941 public static Entry decodeEntry(
1942 final boolean ignoreDuplicateValues,
1943 final TrailingSpaceBehavior trailingSpaceBehavior,
1944 final Schema schema,
1945 final String... ldifLines) throws LDIFException
1946 {
1947 final Entry e = decodeEntry(prepareRecord(
1948 (ignoreDuplicateValues
1949 ? DuplicateValueBehavior.STRIP
1950 : DuplicateValueBehavior.REJECT),
1951 trailingSpaceBehavior, schema, ldifLines),
1952 DEFAULT_RELATIVE_BASE_PATH);
1953 debugLDIFRead(e);
1954 return e;
1955 }
1956
1957
1958
1959 /**
1960 * Decodes the provided set of LDIF lines as an LDIF change record. The
1961 * provided set of lines must contain exactly one change record and it must
1962 * include a changetype. Long lines may be wrapped as per the LDIF
1963 * specification, and it is acceptable to have one or more blank lines
1964 * following the entry.
1965 *
1966 * @param ldifLines The set of lines that comprise the LDIF representation
1967 * of the change record. It must not be {@code null} or
1968 * empty.
1969 *
1970 * @return The change record read from LDIF.
1971 *
1972 * @throws LDIFException If the provided LDIF data cannot be decoded as a
1973 * change record.
1974 */
1975 public static LDIFChangeRecord decodeChangeRecord(final String... ldifLines)
1976 throws LDIFException
1977 {
1978 return decodeChangeRecord(false, ldifLines);
1979 }
1980
1981
1982
1983 /**
1984 * Decodes the provided set of LDIF lines as an LDIF change record. The
1985 * provided set of lines must contain exactly one change record. Long lines
1986 * may be wrapped as per the LDIF specification, and it is acceptable to have
1987 * one or more blank lines following the entry.
1988 *
1989 * @param defaultAdd Indicates whether an LDIF record not containing a
1990 * changetype should be retrieved as an add change record.
1991 * If this is {@code false} and the record read does not
1992 * include a changetype, then an {@link LDIFException}
1993 * will be thrown.
1994 * @param ldifLines The set of lines that comprise the LDIF representation
1995 * of the change record. It must not be {@code null} or
1996 * empty.
1997 *
1998 * @return The change record read from LDIF.
1999 *
2000 * @throws LDIFException If the provided LDIF data cannot be decoded as a
2001 * change record.
2002 */
2003 public static LDIFChangeRecord decodeChangeRecord(final boolean defaultAdd,
2004 final String... ldifLines)
2005 throws LDIFException
2006 {
2007 final LDIFChangeRecord r =
2008 decodeChangeRecord(
2009 prepareRecord(DuplicateValueBehavior.STRIP,
2010 TrailingSpaceBehavior.REJECT, null, ldifLines),
2011 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null);
2012 debugLDIFRead(r);
2013 return r;
2014 }
2015
2016
2017
2018 /**
2019 * Decodes the provided set of LDIF lines as an LDIF change record. The
2020 * provided set of lines must contain exactly one change record. Long lines
2021 * may be wrapped as per the LDIF specification, and it is acceptable to have
2022 * one or more blank lines following the entry.
2023 *
2024 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
2025 * attribute values encountered while parsing.
2026 * @param schema The schema to use when processing the change
2027 * record, or {@code null} if no schema should
2028 * be used and all values should be treated as
2029 * case-insensitive strings.
2030 * @param defaultAdd Indicates whether an LDIF record not
2031 * containing a changetype should be retrieved
2032 * as an add change record. If this is
2033 * {@code false} and the record read does not
2034 * include a changetype, then an
2035 * {@link LDIFException} will be thrown.
2036 * @param ldifLines The set of lines that comprise the LDIF
2037 * representation of the change record. It
2038 * must not be {@code null} or empty.
2039 *
2040 * @return The change record read from LDIF.
2041 *
2042 * @throws LDIFException If the provided LDIF data cannot be decoded as a
2043 * change record.
2044 */
2045 public static LDIFChangeRecord decodeChangeRecord(
2046 final boolean ignoreDuplicateValues,
2047 final Schema schema,
2048 final boolean defaultAdd,
2049 final String... ldifLines)
2050 throws LDIFException
2051 {
2052 return decodeChangeRecord(ignoreDuplicateValues,
2053 TrailingSpaceBehavior.REJECT, schema, defaultAdd, ldifLines);
2054 }
2055
2056
2057
2058 /**
2059 * Decodes the provided set of LDIF lines as an LDIF change record. The
2060 * provided set of lines must contain exactly one change record. Long lines
2061 * may be wrapped as per the LDIF specification, and it is acceptable to have
2062 * one or more blank lines following the entry.
2063 *
2064 * @param ignoreDuplicateValues Indicates whether to ignore duplicate
2065 * attribute values encountered while parsing.
2066 * @param trailingSpaceBehavior The behavior that should be exhibited when
2067 * encountering attribute values which are not
2068 * base64-encoded but contain trailing spaces.
2069 * It must not be {@code null}.
2070 * @param schema The schema to use when processing the change
2071 * record, or {@code null} if no schema should
2072 * be used and all values should be treated as
2073 * case-insensitive strings.
2074 * @param defaultAdd Indicates whether an LDIF record not
2075 * containing a changetype should be retrieved
2076 * as an add change record. If this is
2077 * {@code false} and the record read does not
2078 * include a changetype, then an
2079 * {@link LDIFException} will be thrown.
2080 * @param ldifLines The set of lines that comprise the LDIF
2081 * representation of the change record. It
2082 * must not be {@code null} or empty.
2083 *
2084 * @return The change record read from LDIF.
2085 *
2086 * @throws LDIFException If the provided LDIF data cannot be decoded as a
2087 * change record.
2088 */
2089 public static LDIFChangeRecord decodeChangeRecord(
2090 final boolean ignoreDuplicateValues,
2091 final TrailingSpaceBehavior trailingSpaceBehavior,
2092 final Schema schema,
2093 final boolean defaultAdd,
2094 final String... ldifLines)
2095 throws LDIFException
2096 {
2097 final LDIFChangeRecord r = decodeChangeRecord(
2098 prepareRecord(
2099 (ignoreDuplicateValues
2100 ? DuplicateValueBehavior.STRIP
2101 : DuplicateValueBehavior.REJECT),
2102 trailingSpaceBehavior, schema, ldifLines),
2103 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null);
2104 debugLDIFRead(r);
2105 return r;
2106 }
2107
2108
2109
2110 /**
2111 * Parses the provided set of lines into a list of {@code StringBuilder}
2112 * objects suitable for decoding into an entry or LDIF change record.
2113 * Comments will be ignored and wrapped lines will be unwrapped.
2114 *
2115 * @param duplicateValueBehavior The behavior that should be exhibited if
2116 * the LDIF reader encounters an entry with
2117 * duplicate values.
2118 * @param trailingSpaceBehavior The behavior that should be exhibited when
2119 * encountering attribute values which are not
2120 * base64-encoded but contain trailing spaces.
2121 * @param schema The schema to use when parsing the record,
2122 * if applicable.
2123 * @param ldifLines The set of lines that comprise the record
2124 * to decode. It must not be {@code null} or
2125 * empty.
2126 *
2127 * @return The prepared list of {@code StringBuilder} objects ready to be
2128 * decoded.
2129 *
2130 * @throws LDIFException If the provided lines do not contain valid LDIF
2131 * content.
2132 */
2133 private static UnparsedLDIFRecord prepareRecord(
2134 final DuplicateValueBehavior duplicateValueBehavior,
2135 final TrailingSpaceBehavior trailingSpaceBehavior,
2136 final Schema schema, final String... ldifLines)
2137 throws LDIFException
2138 {
2139 ensureNotNull(ldifLines);
2140 ensureFalse(ldifLines.length == 0,
2141 "LDIFReader.prepareRecord.ldifLines must not be empty.");
2142
2143 boolean lastWasComment = false;
2144 final ArrayList<StringBuilder> lineList =
2145 new ArrayList<StringBuilder>(ldifLines.length);
2146 for (int i=0; i < ldifLines.length; i++)
2147 {
2148 final String line = ldifLines[i];
2149 if (line.length() == 0)
2150 {
2151 // This is only acceptable if there are no more non-empty lines in the
2152 // array.
2153 for (int j=i+1; j < ldifLines.length; j++)
2154 {
2155 if (ldifLines[j].length() > 0)
2156 {
2157 throw new LDIFException(ERR_READ_UNEXPECTED_BLANK.get(i), i, true,
2158 ldifLines, null);
2159 }
2160
2161 // If we've gotten here, then we know that we're at the end of the
2162 // entry. If we have read data, then we can decode it as an entry.
2163 // Otherwise, there was no real data in the provided LDIF lines.
2164 if (lineList.isEmpty())
2165 {
2166 throw new LDIFException(ERR_READ_ONLY_BLANKS.get(), 0, true,
2167 ldifLines, null);
2168 }
2169 else
2170 {
2171 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
2172 trailingSpaceBehavior, schema, 0);
2173 }
2174 }
2175 }
2176
2177 if (line.charAt(0) == ' ')
2178 {
2179 if (i > 0)
2180 {
2181 if (! lastWasComment)
2182 {
2183 lineList.get(lineList.size() - 1).append(line.substring(1));
2184 }
2185 }
2186 else
2187 {
2188 throw new LDIFException(
2189 ERR_READ_UNEXPECTED_FIRST_SPACE_NO_NUMBER.get(), 0,
2190 true, ldifLines, null);
2191 }
2192 }
2193 else if (line.charAt(0) == '#')
2194 {
2195 lastWasComment = true;
2196 }
2197 else
2198 {
2199 lineList.add(new StringBuilder(line));
2200 lastWasComment = false;
2201 }
2202 }
2203
2204 if (lineList.isEmpty())
2205 {
2206 throw new LDIFException(ERR_READ_NO_DATA.get(), 0, true, ldifLines, null);
2207 }
2208 else
2209 {
2210 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
2211 trailingSpaceBehavior, schema, 0);
2212 }
2213 }
2214
2215
2216
2217 /**
2218 * Decodes the unparsed record that was read from the LDIF source. It may be
2219 * either an entry or an LDIF change record.
2220 *
2221 * @param unparsedRecord The unparsed LDIF record that was read from the
2222 * input. It must not be {@code null} or empty.
2223 * @param relativeBasePath The base path that will be prepended to relative
2224 * paths in order to obtain an absolute path.
2225 * @param schema The schema to use when parsing.
2226 *
2227 * @return The parsed record, or {@code null} if there are no more entries to
2228 * be read.
2229 *
2230 * @throws LDIFException If the data read could not be parsed as an entry or
2231 * an LDIF change record.
2232 */
2233 private static LDIFRecord decodeRecord(
2234 final UnparsedLDIFRecord unparsedRecord,
2235 final String relativeBasePath,
2236 final Schema schema)
2237 throws LDIFException
2238 {
2239 // If there was an error reading from the input, then we rethrow it here.
2240 final Exception readError = unparsedRecord.getFailureCause();
2241 if (readError != null)
2242 {
2243 if (readError instanceof LDIFException)
2244 {
2245 // If the error was an LDIFException, which will normally be the case,
2246 // then rethrow it with all of the same state. We could just
2247 // throw (LDIFException) readError;
2248 // but that's considered bad form.
2249 final LDIFException ldifEx = (LDIFException) readError;
2250 throw new LDIFException(ldifEx.getMessage(),
2251 ldifEx.getLineNumber(),
2252 ldifEx.mayContinueReading(),
2253 ldifEx.getDataLines(),
2254 ldifEx.getCause());
2255 }
2256 else
2257 {
2258 throw new LDIFException(getExceptionMessage(readError),
2259 -1, true, readError);
2260 }
2261 }
2262
2263 if (unparsedRecord.isEOF())
2264 {
2265 return null;
2266 }
2267
2268 final ArrayList<StringBuilder> lineList = unparsedRecord.getLineList();
2269 if (unparsedRecord.getLineList() == null)
2270 {
2271 return null; // We can get here if there was an error reading the lines.
2272 }
2273
2274 final LDIFRecord r;
2275 if (lineList.size() == 1)
2276 {
2277 r = decodeEntry(unparsedRecord, relativeBasePath);
2278 }
2279 else
2280 {
2281 final String lowerSecondLine = toLowerCase(lineList.get(1).toString());
2282 if (lowerSecondLine.startsWith("control:") ||
2283 lowerSecondLine.startsWith("changetype:"))
2284 {
2285 r = decodeChangeRecord(unparsedRecord, relativeBasePath, true, schema);
2286 }
2287 else
2288 {
2289 r = decodeEntry(unparsedRecord, relativeBasePath);
2290 }
2291 }
2292
2293 debugLDIFRead(r);
2294 return r;
2295 }
2296
2297
2298
2299 /**
2300 * Decodes the provided set of LDIF lines as an entry. The provided list must
2301 * not contain any blank lines or comments, and lines are not allowed to be
2302 * wrapped.
2303 *
2304 * @param unparsedRecord The unparsed LDIF record that was read from the
2305 * input. It must not be {@code null} or empty.
2306 * @param relativeBasePath The base path that will be prepended to relative
2307 * paths in order to obtain an absolute path.
2308 *
2309 * @return The entry read from LDIF.
2310 *
2311 * @throws LDIFException If the provided LDIF data cannot be read as an
2312 * entry.
2313 */
2314 private static Entry decodeEntry(final UnparsedLDIFRecord unparsedRecord,
2315 final String relativeBasePath)
2316 throws LDIFException
2317 {
2318 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList();
2319 final long firstLineNumber = unparsedRecord.getFirstLineNumber();
2320
2321 final Iterator<StringBuilder> iterator = ldifLines.iterator();
2322
2323 // The first line must start with either "version:" or "dn:". If the first
2324 // line starts with "version:" then the second must start with "dn:".
2325 StringBuilder line = iterator.next();
2326 handleTrailingSpaces(line, null, firstLineNumber,
2327 unparsedRecord.getTrailingSpaceBehavior());
2328 int colonPos = line.indexOf(":");
2329 if ((colonPos > 0) &&
2330 line.substring(0, colonPos).equalsIgnoreCase("version"))
2331 {
2332 // The first line is "version:". Under most conditions, this will be
2333 // handled by the LDIF reader, but this can happen if you call
2334 // decodeEntry with a set of data that includes a version. At any rate,
2335 // read the next line, which must specify the DN.
2336 line = iterator.next();
2337 handleTrailingSpaces(line, null, firstLineNumber,
2338 unparsedRecord.getTrailingSpaceBehavior());
2339 }
2340
2341 colonPos = line.indexOf(":");
2342 if ((colonPos < 0) ||
2343 (! line.substring(0, colonPos).equalsIgnoreCase("dn")))
2344 {
2345 throw new LDIFException(
2346 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber),
2347 firstLineNumber, true, ldifLines, null);
2348 }
2349
2350 final String dn;
2351 final int length = line.length();
2352 if (length == (colonPos+1))
2353 {
2354 // The colon was the last character on the line. This is acceptable and
2355 // indicates that the entry has the null DN.
2356 dn = "";
2357 }
2358 else if (line.charAt(colonPos+1) == ':')
2359 {
2360 // Skip over any spaces leading up to the value, and then the rest of the
2361 // string is the base64-encoded DN.
2362 int pos = colonPos+2;
2363 while ((pos < length) && (line.charAt(pos) == ' '))
2364 {
2365 pos++;
2366 }
2367
2368 try
2369 {
2370 final byte[] dnBytes = Base64.decode(line.substring(pos));
2371 dn = new String(dnBytes, "UTF-8");
2372 }
2373 catch (final ParseException pe)
2374 {
2375 debugException(pe);
2376 throw new LDIFException(
2377 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2378 pe.getMessage()),
2379 firstLineNumber, true, ldifLines, pe);
2380 }
2381 catch (final Exception e)
2382 {
2383 debugException(e);
2384 throw new LDIFException(
2385 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, e),
2386 firstLineNumber, true, ldifLines, e);
2387 }
2388 }
2389 else
2390 {
2391 // Skip over any spaces leading up to the value, and then the rest of the
2392 // string is the DN.
2393 int pos = colonPos+1;
2394 while ((pos < length) && (line.charAt(pos) == ' '))
2395 {
2396 pos++;
2397 }
2398
2399 dn = line.substring(pos);
2400 }
2401
2402
2403 // The remaining lines must be the attributes for the entry. However, we
2404 // will allow the case in which an entry does not have any attributes, to be
2405 // able to support reading search result entries in which no attributes were
2406 // returned.
2407 if (! iterator.hasNext())
2408 {
2409 return new Entry(dn, unparsedRecord.getSchema());
2410 }
2411
2412 return new Entry(dn, unparsedRecord.getSchema(),
2413 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(),
2414 unparsedRecord.getTrailingSpaceBehavior(),
2415 unparsedRecord.getSchema(), ldifLines, iterator, relativeBasePath,
2416 firstLineNumber));
2417 }
2418
2419
2420
2421 /**
2422 * Decodes the provided set of LDIF lines as a change record. The provided
2423 * list must not contain any blank lines or comments, and lines are not
2424 * allowed to be wrapped.
2425 *
2426 * @param unparsedRecord The unparsed LDIF record that was read from the
2427 * input. It must not be {@code null} or empty.
2428 * @param relativeBasePath The base path that will be prepended to relative
2429 * paths in order to obtain an absolute path.
2430 * @param defaultAdd Indicates whether an LDIF record not containing a
2431 * changetype should be retrieved as an add change
2432 * record. If this is {@code false} and the record
2433 * read does not include a changetype, then an
2434 * {@link LDIFException} will be thrown.
2435 * @param schema The schema to use in parsing.
2436 *
2437 * @return The change record read from LDIF.
2438 *
2439 * @throws LDIFException If the provided LDIF data cannot be decoded as a
2440 * change record.
2441 */
2442 private static LDIFChangeRecord decodeChangeRecord(
2443 final UnparsedLDIFRecord unparsedRecord,
2444 final String relativeBasePath,
2445 final boolean defaultAdd,
2446 final Schema schema)
2447 throws LDIFException
2448 {
2449 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList();
2450 final long firstLineNumber = unparsedRecord.getFirstLineNumber();
2451
2452 Iterator<StringBuilder> iterator = ldifLines.iterator();
2453
2454 // The first line must start with either "version:" or "dn:". If the first
2455 // line starts with "version:" then the second must start with "dn:".
2456 StringBuilder line = iterator.next();
2457 handleTrailingSpaces(line, null, firstLineNumber,
2458 unparsedRecord.getTrailingSpaceBehavior());
2459 int colonPos = line.indexOf(":");
2460 int linesRead = 1;
2461 if ((colonPos > 0) &&
2462 line.substring(0, colonPos).equalsIgnoreCase("version"))
2463 {
2464 // The first line is "version:". Under most conditions, this will be
2465 // handled by the LDIF reader, but this can happen if you call
2466 // decodeEntry with a set of data that includes a version. At any rate,
2467 // read the next line, which must specify the DN.
2468 line = iterator.next();
2469 linesRead++;
2470 handleTrailingSpaces(line, null, firstLineNumber,
2471 unparsedRecord.getTrailingSpaceBehavior());
2472 }
2473
2474 colonPos = line.indexOf(":");
2475 if ((colonPos < 0) ||
2476 (! line.substring(0, colonPos).equalsIgnoreCase("dn")))
2477 {
2478 throw new LDIFException(
2479 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber),
2480 firstLineNumber, true, ldifLines, null);
2481 }
2482
2483 final String dn;
2484 int length = line.length();
2485 if (length == (colonPos+1))
2486 {
2487 // The colon was the last character on the line. This is acceptable and
2488 // indicates that the entry has the null DN.
2489 dn = "";
2490 }
2491 else if (line.charAt(colonPos+1) == ':')
2492 {
2493 // Skip over any spaces leading up to the value, and then the rest of the
2494 // string is the base64-encoded DN.
2495 int pos = colonPos+2;
2496 while ((pos < length) && (line.charAt(pos) == ' '))
2497 {
2498 pos++;
2499 }
2500
2501 try
2502 {
2503 final byte[] dnBytes = Base64.decode(line.substring(pos));
2504 dn = new String(dnBytes, "UTF-8");
2505 }
2506 catch (final ParseException pe)
2507 {
2508 debugException(pe);
2509 throw new LDIFException(
2510 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2511 pe.getMessage()),
2512 firstLineNumber, true, ldifLines, pe);
2513 }
2514 catch (final Exception e)
2515 {
2516 debugException(e);
2517 throw new LDIFException(
2518 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2519 e),
2520 firstLineNumber, true, ldifLines, e);
2521 }
2522 }
2523 else
2524 {
2525 // Skip over any spaces leading up to the value, and then the rest of the
2526 // string is the DN.
2527 int pos = colonPos+1;
2528 while ((pos < length) && (line.charAt(pos) == ' '))
2529 {
2530 pos++;
2531 }
2532
2533 dn = line.substring(pos);
2534 }
2535
2536
2537 // An LDIF change record may contain zero or more controls, with the end of
2538 // the controls signified by the changetype. The changetype element must be
2539 // present, unless defaultAdd is true in which case the first thing that is
2540 // neither control or changetype will trigger the start of add attribute
2541 // parsing.
2542 if (! iterator.hasNext())
2543 {
2544 throw new LDIFException(ERR_READ_CR_TOO_SHORT.get(firstLineNumber),
2545 firstLineNumber, true, ldifLines, null);
2546 }
2547
2548 String changeType = null;
2549 ArrayList<Control> controls = null;
2550 while (true)
2551 {
2552 line = iterator.next();
2553 handleTrailingSpaces(line, dn, firstLineNumber,
2554 unparsedRecord.getTrailingSpaceBehavior());
2555 colonPos = line.indexOf(":");
2556 if (colonPos < 0)
2557 {
2558 throw new LDIFException(
2559 ERR_READ_CR_SECOND_LINE_MISSING_COLON.get(firstLineNumber),
2560 firstLineNumber, true, ldifLines, null);
2561 }
2562
2563 final String token = toLowerCase(line.substring(0, colonPos));
2564 if (token.equals("control"))
2565 {
2566 if (controls == null)
2567 {
2568 controls = new ArrayList<Control>(5);
2569 }
2570
2571 controls.add(decodeControl(line, colonPos, firstLineNumber, ldifLines,
2572 relativeBasePath));
2573 }
2574 else if (token.equals("changetype"))
2575 {
2576 changeType =
2577 decodeChangeType(line, colonPos, firstLineNumber, ldifLines);
2578 break;
2579 }
2580 else if (defaultAdd)
2581 {
2582 // The line we read wasn't a control or changetype declaration, so we'll
2583 // assume it's an attribute in an add record. However, we're not ready
2584 // for that yet, and since we can't rewind an iterator we'll create a
2585 // new one that hasn't yet gotten to this line.
2586 changeType = "add";
2587 iterator = ldifLines.iterator();
2588 for (int i=0; i < linesRead; i++)
2589 {
2590 iterator.next();
2591 }
2592 break;
2593 }
2594 else
2595 {
2596 throw new LDIFException(
2597 ERR_READ_CR_CT_LINE_DOESNT_START_WITH_CONTROL_OR_CT.get(
2598 firstLineNumber),
2599 firstLineNumber, true, ldifLines, null);
2600 }
2601
2602 linesRead++;
2603 }
2604
2605
2606 // Make sure that the change type is acceptable and then decode the rest of
2607 // the change record accordingly.
2608 final String lowerChangeType = toLowerCase(changeType);
2609 if (lowerChangeType.equals("add"))
2610 {
2611 // There must be at least one more line. If not, then that's an error.
2612 // Otherwise, parse the rest of the data as attribute-value pairs.
2613 if (iterator.hasNext())
2614 {
2615 final Collection<Attribute> attrs =
2616 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(),
2617 unparsedRecord.getTrailingSpaceBehavior(),
2618 unparsedRecord.getSchema(), ldifLines, iterator,
2619 relativeBasePath, firstLineNumber);
2620 final Attribute[] attributes = new Attribute[attrs.size()];
2621 final Iterator<Attribute> attrIterator = attrs.iterator();
2622 for (int i=0; i < attributes.length; i++)
2623 {
2624 attributes[i] = attrIterator.next();
2625 }
2626
2627 return new LDIFAddChangeRecord(dn, attributes, controls);
2628 }
2629 else
2630 {
2631 throw new LDIFException(ERR_READ_CR_NO_ATTRIBUTES.get(firstLineNumber),
2632 firstLineNumber, true, ldifLines, null);
2633 }
2634 }
2635 else if (lowerChangeType.equals("delete"))
2636 {
2637 // There shouldn't be any more data. If there is, then that's an error.
2638 // Otherwise, we can just return the delete change record with what we
2639 // already know.
2640 if (iterator.hasNext())
2641 {
2642 throw new LDIFException(
2643 ERR_READ_CR_EXTRA_DELETE_DATA.get(firstLineNumber),
2644 firstLineNumber, true, ldifLines, null);
2645 }
2646 else
2647 {
2648 return new LDIFDeleteChangeRecord(dn, controls);
2649 }
2650 }
2651 else if (lowerChangeType.equals("modify"))
2652 {
2653 // There must be at least one more line. If not, then that's an error.
2654 // Otherwise, parse the rest of the data as a set of modifications.
2655 if (iterator.hasNext())
2656 {
2657 final Modification[] mods = parseModifications(dn,
2658 unparsedRecord.getTrailingSpaceBehavior(), ldifLines, iterator,
2659 firstLineNumber, schema);
2660 return new LDIFModifyChangeRecord(dn, mods, controls);
2661 }
2662 else
2663 {
2664 throw new LDIFException(ERR_READ_CR_NO_MODS.get(firstLineNumber),
2665 firstLineNumber, true, ldifLines, null);
2666 }
2667 }
2668 else if (lowerChangeType.equals("moddn") ||
2669 lowerChangeType.equals("modrdn"))
2670 {
2671 // There must be at least one more line. If not, then that's an error.
2672 // Otherwise, parse the rest of the data as a set of modifications.
2673 if (iterator.hasNext())
2674 {
2675 return parseModifyDNChangeRecord(ldifLines, iterator, dn, controls,
2676 unparsedRecord.getTrailingSpaceBehavior(), firstLineNumber);
2677 }
2678 else
2679 {
2680 throw new LDIFException(ERR_READ_CR_NO_NEWRDN.get(firstLineNumber),
2681 firstLineNumber, true, ldifLines, null);
2682 }
2683 }
2684 else
2685 {
2686 throw new LDIFException(ERR_READ_CR_INVALID_CT.get(changeType,
2687 firstLineNumber),
2688 firstLineNumber, true, ldifLines, null);
2689 }
2690 }
2691
2692
2693
2694 /**
2695 * Decodes information about a control from the provided line.
2696 *
2697 * @param line The line to process.
2698 * @param colonPos The position of the colon that separates the
2699 * control token string from tbe encoded control.
2700 * @param firstLineNumber The line number for the start of the record.
2701 * @param ldifLines The lines that comprise the LDIF representation
2702 * of the full record being parsed.
2703 * @param relativeBasePath The base path that will be prepended to relative
2704 * paths in order to obtain an absolute path.
2705 *
2706 * @return The decoded control.
2707 *
2708 * @throws LDIFException If a problem is encountered while trying to decode
2709 * the changetype.
2710 */
2711 private static Control decodeControl(final StringBuilder line,
2712 final int colonPos,
2713 final long firstLineNumber,
2714 final ArrayList<StringBuilder> ldifLines,
2715 final String relativeBasePath)
2716 throws LDIFException
2717 {
2718 final String controlString;
2719 int length = line.length();
2720 if (length == (colonPos+1))
2721 {
2722 // The colon was the last character on the line. This is not
2723 // acceptable.
2724 throw new LDIFException(
2725 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber),
2726 firstLineNumber, true, ldifLines, null);
2727 }
2728 else if (line.charAt(colonPos+1) == ':')
2729 {
2730 // Skip over any spaces leading up to the value, and then the rest of
2731 // the string is the base64-encoded control representation. This is
2732 // unusual and unnecessary, but is nevertheless acceptable.
2733 int pos = colonPos+2;
2734 while ((pos < length) && (line.charAt(pos) == ' '))
2735 {
2736 pos++;
2737 }
2738
2739 try
2740 {
2741 final byte[] controlBytes = Base64.decode(line.substring(pos));
2742 controlString = new String(controlBytes, "UTF-8");
2743 }
2744 catch (final ParseException pe)
2745 {
2746 debugException(pe);
2747 throw new LDIFException(
2748 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get(
2749 firstLineNumber, pe.getMessage()),
2750 firstLineNumber, true, ldifLines, pe);
2751 }
2752 catch (final Exception e)
2753 {
2754 debugException(e);
2755 throw new LDIFException(
2756 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get(firstLineNumber, e),
2757 firstLineNumber, true, ldifLines, e);
2758 }
2759 }
2760 else
2761 {
2762 // Skip over any spaces leading up to the value, and then the rest of
2763 // the string is the encoded control.
2764 int pos = colonPos+1;
2765 while ((pos < length) && (line.charAt(pos) == ' '))
2766 {
2767 pos++;
2768 }
2769
2770 controlString = line.substring(pos);
2771 }
2772
2773 // If the resulting control definition is empty, then that's invalid.
2774 if (controlString.length() == 0)
2775 {
2776 throw new LDIFException(
2777 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber),
2778 firstLineNumber, true, ldifLines, null);
2779 }
2780
2781
2782 // The first element of the control must be the OID, and it must be followed
2783 // by a space (to separate it from the criticality), a colon (to separate it
2784 // from the value and indicate a default criticality of false), or the end
2785 // of the line (to indicate a default criticality of false and no value).
2786 String oid = null;
2787 boolean hasCriticality = false;
2788 boolean hasValue = false;
2789 int pos = 0;
2790 length = controlString.length();
2791 while (pos < length)
2792 {
2793 final char c = controlString.charAt(pos);
2794 if (c == ':')
2795 {
2796 // This indicates that there is no criticality and that the value
2797 // immediately follows the OID.
2798 oid = controlString.substring(0, pos++);
2799 hasValue = true;
2800 break;
2801 }
2802 else if (c == ' ')
2803 {
2804 // This indicates that there is a criticality. We don't know anything
2805 // about the presence of a value yet.
2806 oid = controlString.substring(0, pos++);
2807 hasCriticality = true;
2808 break;
2809 }
2810 else
2811 {
2812 pos++;
2813 }
2814 }
2815
2816 if (oid == null)
2817 {
2818 // This indicates that the string representation of the control is only
2819 // the OID.
2820 return new Control(controlString, false);
2821 }
2822
2823
2824 // See if we need to read the criticality. If so, then do so now.
2825 // Otherwise, assume a default criticality of false.
2826 final boolean isCritical;
2827 if (hasCriticality)
2828 {
2829 // Skip over any spaces before the criticality.
2830 while (controlString.charAt(pos) == ' ')
2831 {
2832 pos++;
2833 }
2834
2835 // Read until we find a colon or the end of the string.
2836 final int criticalityStartPos = pos;
2837 while (pos < length)
2838 {
2839 final char c = controlString.charAt(pos);
2840 if (c == ':')
2841 {
2842 hasValue = true;
2843 break;
2844 }
2845 else
2846 {
2847 pos++;
2848 }
2849 }
2850
2851 final String criticalityString =
2852 toLowerCase(controlString.substring(criticalityStartPos, pos));
2853 if (criticalityString.equals("true"))
2854 {
2855 isCritical = true;
2856 }
2857 else if (criticalityString.equals("false"))
2858 {
2859 isCritical = false;
2860 }
2861 else
2862 {
2863 throw new LDIFException(
2864 ERR_READ_CONTROL_LINE_INVALID_CRITICALITY.get(criticalityString,
2865 firstLineNumber),
2866 firstLineNumber, true, ldifLines, null);
2867 }
2868
2869 if (hasValue)
2870 {
2871 pos++;
2872 }
2873 }
2874 else
2875 {
2876 isCritical = false;
2877 }
2878
2879 // See if we need to read the value. If so, then do so now. It may be
2880 // a string, or it may be base64-encoded. It could conceivably even be read
2881 // from a URL.
2882 final ASN1OctetString value;
2883 if (hasValue)
2884 {
2885 // The character immediately after the colon that precedes the value may
2886 // be one of the following:
2887 // - A second colon (optionally followed by a single space) to indicate
2888 // that the value is base64-encoded.
2889 // - A less-than symbol to indicate that the value should be read from a
2890 // location specified by a URL.
2891 // - A single space that precedes the non-base64-encoded value.
2892 // - The first character of the non-base64-encoded value.
2893 switch (controlString.charAt(pos))
2894 {
2895 case ':':
2896 try
2897 {
2898 if (controlString.length() == (pos+1))
2899 {
2900 value = new ASN1OctetString();
2901 }
2902 else if (controlString.charAt(pos+1) == ' ')
2903 {
2904 value = new ASN1OctetString(
2905 Base64.decode(controlString.substring(pos+2)));
2906 }
2907 else
2908 {
2909 value = new ASN1OctetString(
2910 Base64.decode(controlString.substring(pos+1)));
2911 }
2912 }
2913 catch (final Exception e)
2914 {
2915 debugException(e);
2916 throw new LDIFException(
2917 ERR_READ_CONTROL_LINE_CANNOT_BASE64_DECODE_VALUE.get(
2918 firstLineNumber, getExceptionMessage(e)),
2919 firstLineNumber, true, ldifLines, e);
2920 }
2921 break;
2922 case '<':
2923 try
2924 {
2925 final String urlString;
2926 if (controlString.charAt(pos+1) == ' ')
2927 {
2928 urlString = controlString.substring(pos+2);
2929 }
2930 else
2931 {
2932 urlString = controlString.substring(pos+1);
2933 }
2934 value = new ASN1OctetString(retrieveURLBytes(urlString,
2935 relativeBasePath, firstLineNumber));
2936 }
2937 catch (final Exception e)
2938 {
2939 debugException(e);
2940 throw new LDIFException(
2941 ERR_READ_CONTROL_LINE_CANNOT_RETRIEVE_VALUE_FROM_URL.get(
2942 firstLineNumber, getExceptionMessage(e)),
2943 firstLineNumber, true, ldifLines, e);
2944 }
2945 break;
2946 case ' ':
2947 value = new ASN1OctetString(controlString.substring(pos+1));
2948 break;
2949 default:
2950 value = new ASN1OctetString(controlString.substring(pos));
2951 break;
2952 }
2953 }
2954 else
2955 {
2956 value = null;
2957 }
2958
2959 return new Control(oid, isCritical, value);
2960 }
2961
2962
2963
2964 /**
2965 * Decodes the changetype element from the provided line.
2966 *
2967 * @param line The line to process.
2968 * @param colonPos The position of the colon that separates the
2969 * changetype string from its value.
2970 * @param firstLineNumber The line number for the start of the record.
2971 * @param ldifLines The lines that comprise the LDIF representation of
2972 * the full record being parsed.
2973 *
2974 * @return The decoded changetype string.
2975 *
2976 * @throws LDIFException If a problem is encountered while trying to decode
2977 * the changetype.
2978 */
2979 private static String decodeChangeType(final StringBuilder line,
2980 final int colonPos, final long firstLineNumber,
2981 final ArrayList<StringBuilder> ldifLines)
2982 throws LDIFException
2983 {
2984 final int length = line.length();
2985 if (length == (colonPos+1))
2986 {
2987 // The colon was the last character on the line. This is not
2988 // acceptable.
2989 throw new LDIFException(
2990 ERR_READ_CT_LINE_NO_CT_VALUE.get(firstLineNumber), firstLineNumber,
2991 true, ldifLines, null);
2992 }
2993 else if (line.charAt(colonPos+1) == ':')
2994 {
2995 // Skip over any spaces leading up to the value, and then the rest of
2996 // the string is the base64-encoded changetype. This is unusual and
2997 // unnecessary, but is nevertheless acceptable.
2998 int pos = colonPos+2;
2999 while ((pos < length) && (line.charAt(pos) == ' '))
3000 {
3001 pos++;
3002 }
3003
3004 try
3005 {
3006 final byte[] changeTypeBytes = Base64.decode(line.substring(pos));
3007 return new String(changeTypeBytes, "UTF-8");
3008 }
3009 catch (final ParseException pe)
3010 {
3011 debugException(pe);
3012 throw new LDIFException(
3013 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber,
3014 pe.getMessage()),
3015 firstLineNumber, true, ldifLines, pe);
3016 }
3017 catch (final Exception e)
3018 {
3019 debugException(e);
3020 throw new LDIFException(
3021 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, e),
3022 firstLineNumber, true, ldifLines, e);
3023 }
3024 }
3025 else
3026 {
3027 // Skip over any spaces leading up to the value, and then the rest of
3028 // the string is the changetype.
3029 int pos = colonPos+1;
3030 while ((pos < length) && (line.charAt(pos) == ' '))
3031 {
3032 pos++;
3033 }
3034
3035 return line.substring(pos);
3036 }
3037 }
3038
3039
3040
3041 /**
3042 * Parses the data available through the provided iterator as a collection of
3043 * attributes suitable for use in an entry or an add change record.
3044 *
3045 * @param dn The DN of the record being read.
3046 * @param duplicateValueBehavior The behavior that should be exhibited if
3047 * the LDIF reader encounters an entry with
3048 * duplicate values.
3049 * @param trailingSpaceBehavior The behavior that should be exhibited when
3050 * encountering attribute values which are not
3051 * base64-encoded but contain trailing spaces.
3052 * @param schema The schema to use when parsing the
3053 * attributes, or {@code null} if none is
3054 * needed.
3055 * @param ldifLines The lines that comprise the LDIF
3056 * representation of the full record being
3057 * parsed.
3058 * @param iterator The iterator to use to access the attribute
3059 * lines.
3060 * @param relativeBasePath The base path that will be prepended to
3061 * relative paths in order to obtain an
3062 * absolute path.
3063 * @param firstLineNumber The line number for the start of the
3064 * record.
3065 *
3066 * @return The collection of attributes that were read.
3067 *
3068 * @throws LDIFException If the provided LDIF data cannot be decoded as a
3069 * set of attributes.
3070 */
3071 private static ArrayList<Attribute> parseAttributes(final String dn,
3072 final DuplicateValueBehavior duplicateValueBehavior,
3073 final TrailingSpaceBehavior trailingSpaceBehavior, final Schema schema,
3074 final ArrayList<StringBuilder> ldifLines,
3075 final Iterator<StringBuilder> iterator, final String relativeBasePath,
3076 final long firstLineNumber)
3077 throws LDIFException
3078 {
3079 final LinkedHashMap<String,Object> attributes =
3080 new LinkedHashMap<String,Object>(ldifLines.size());
3081 while (iterator.hasNext())
3082 {
3083 final StringBuilder line = iterator.next();
3084 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3085 final int colonPos = line.indexOf(":");
3086 if (colonPos <= 0)
3087 {
3088 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber),
3089 firstLineNumber, true, ldifLines, null);
3090 }
3091
3092 final String attributeName = line.substring(0, colonPos);
3093 final String lowerName = toLowerCase(attributeName);
3094
3095 final MatchingRule matchingRule;
3096 if (schema == null)
3097 {
3098 matchingRule = CaseIgnoreStringMatchingRule.getInstance();
3099 }
3100 else
3101 {
3102 matchingRule =
3103 MatchingRule.selectEqualityMatchingRule(attributeName, schema);
3104 }
3105
3106 Attribute attr;
3107 final LDIFAttribute ldifAttr;
3108 final Object attrObject = attributes.get(lowerName);
3109 if (attrObject == null)
3110 {
3111 attr = null;
3112 ldifAttr = null;
3113 }
3114 else
3115 {
3116 if (attrObject instanceof Attribute)
3117 {
3118 attr = (Attribute) attrObject;
3119 ldifAttr = new LDIFAttribute(attr.getName(), matchingRule,
3120 attr.getRawValues()[0]);
3121 attributes.put(lowerName, ldifAttr);
3122 }
3123 else
3124 {
3125 attr = null;
3126 ldifAttr = (LDIFAttribute) attrObject;
3127 }
3128 }
3129
3130 final int length = line.length();
3131 if (length == (colonPos+1))
3132 {
3133 // This means that the attribute has a zero-length value, which is
3134 // acceptable.
3135 if (attrObject == null)
3136 {
3137 attr = new Attribute(attributeName, matchingRule, "");
3138 attributes.put(lowerName, attr);
3139 }
3140 else
3141 {
3142 try
3143 {
3144 if (! ldifAttr.addValue(new ASN1OctetString(),
3145 duplicateValueBehavior))
3146 {
3147 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3148 {
3149 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3150 firstLineNumber, attributeName), firstLineNumber, true,
3151 ldifLines, null);
3152 }
3153 }
3154 }
3155 catch (LDAPException le)
3156 {
3157 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
3158 firstLineNumber, attributeName, getExceptionMessage(le)),
3159 firstLineNumber, true, ldifLines, le);
3160 }
3161 }
3162 }
3163 else if (line.charAt(colonPos+1) == ':')
3164 {
3165 // Skip over any spaces leading up to the value, and then the rest of
3166 // the string is the base64-encoded attribute value.
3167 int pos = colonPos+2;
3168 while ((pos < length) && (line.charAt(pos) == ' '))
3169 {
3170 pos++;
3171 }
3172
3173 try
3174 {
3175 final byte[] valueBytes = Base64.decode(line.substring(pos));
3176 if (attrObject == null)
3177 {
3178 attr = new Attribute(attributeName, matchingRule, valueBytes);
3179 attributes.put(lowerName, attr);
3180 }
3181 else
3182 {
3183 try
3184 {
3185 if (! ldifAttr.addValue(new ASN1OctetString(valueBytes),
3186 duplicateValueBehavior))
3187 {
3188 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3189 {
3190 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3191 firstLineNumber, attributeName), firstLineNumber, true,
3192 ldifLines, null);
3193 }
3194 }
3195 }
3196 catch (LDAPException le)
3197 {
3198 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
3199 firstLineNumber, attributeName, getExceptionMessage(le)),
3200 firstLineNumber, true, ldifLines, le);
3201 }
3202 }
3203 }
3204 catch (final ParseException pe)
3205 {
3206 debugException(pe);
3207 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
3208 attributeName, firstLineNumber,
3209 pe.getMessage()),
3210 firstLineNumber, true, ldifLines, pe);
3211 }
3212 }
3213 else if (line.charAt(colonPos+1) == '<')
3214 {
3215 // Skip over any spaces leading up to the value, and then the rest of
3216 // the string is a URL that indicates where to get the real content.
3217 // At the present time, we'll only support the file URLs.
3218 int pos = colonPos+2;
3219 while ((pos < length) && (line.charAt(pos) == ' '))
3220 {
3221 pos++;
3222 }
3223
3224 final byte[] urlBytes;
3225 final String urlString = line.substring(pos);
3226 try
3227 {
3228 urlBytes =
3229 retrieveURLBytes(urlString, relativeBasePath, firstLineNumber);
3230 }
3231 catch (final Exception e)
3232 {
3233 debugException(e);
3234 throw new LDIFException(
3235 ERR_READ_URL_EXCEPTION.get(attributeName, urlString,
3236 firstLineNumber, e),
3237 firstLineNumber, true, ldifLines, e);
3238 }
3239
3240 if (attrObject == null)
3241 {
3242 attr = new Attribute(attributeName, matchingRule, urlBytes);
3243 attributes.put(lowerName, attr);
3244 }
3245 else
3246 {
3247 try
3248 {
3249 if (! ldifAttr.addValue(new ASN1OctetString(urlBytes),
3250 duplicateValueBehavior))
3251 {
3252 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3253 {
3254 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3255 firstLineNumber, attributeName), firstLineNumber, true,
3256 ldifLines, null);
3257 }
3258 }
3259 }
3260 catch (final LDIFException le)
3261 {
3262 debugException(le);
3263 throw le;
3264 }
3265 catch (final Exception e)
3266 {
3267 debugException(e);
3268 throw new LDIFException(
3269 ERR_READ_URL_EXCEPTION.get(attributeName, urlString,
3270 firstLineNumber, e),
3271 firstLineNumber, true, ldifLines, e);
3272 }
3273 }
3274 }
3275 else
3276 {
3277 // Skip over any spaces leading up to the value, and then the rest of
3278 // the string is the value.
3279 int pos = colonPos+1;
3280 while ((pos < length) && (line.charAt(pos) == ' '))
3281 {
3282 pos++;
3283 }
3284
3285 final String valueString = line.substring(pos);
3286 if (attrObject == null)
3287 {
3288 attr = new Attribute(attributeName, matchingRule, valueString);
3289 attributes.put(lowerName, attr);
3290 }
3291 else
3292 {
3293 try
3294 {
3295 if (! ldifAttr.addValue(new ASN1OctetString(valueString),
3296 duplicateValueBehavior))
3297 {
3298 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3299 {
3300 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3301 firstLineNumber, attributeName), firstLineNumber, true,
3302 ldifLines, null);
3303 }
3304 }
3305 }
3306 catch (LDAPException le)
3307 {
3308 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
3309 firstLineNumber, attributeName, getExceptionMessage(le)),
3310 firstLineNumber, true, ldifLines, le);
3311 }
3312 }
3313 }
3314 }
3315
3316 final ArrayList<Attribute> attrList =
3317 new ArrayList<Attribute>(attributes.size());
3318 for (final Object o : attributes.values())
3319 {
3320 if (o instanceof Attribute)
3321 {
3322 attrList.add((Attribute) o);
3323 }
3324 else
3325 {
3326 attrList.add(((LDIFAttribute) o).toAttribute());
3327 }
3328 }
3329
3330 return attrList;
3331 }
3332
3333
3334
3335 /**
3336 * Retrieves the bytes that make up the file referenced by the given URL.
3337 *
3338 * @param urlString The string representation of the URL to retrieve.
3339 * @param relativeBasePath The base path that will be prepended to relative
3340 * paths in order to obtain an absolute path.
3341 * @param firstLineNumber The line number for the start of the record.
3342 *
3343 * @return The bytes contained in the specified file, or an empty array if
3344 * the specified file is empty.
3345 *
3346 * @throws LDIFException If the provided URL is malformed or references a
3347 * nonexistent file.
3348 *
3349 * @throws IOException If a problem is encountered while attempting to read
3350 * from the target file.
3351 */
3352 private static byte[] retrieveURLBytes(final String urlString,
3353 final String relativeBasePath,
3354 final long firstLineNumber)
3355 throws LDIFException, IOException
3356 {
3357 int pos;
3358 String path;
3359 final String lowerURLString = toLowerCase(urlString);
3360 if (lowerURLString.startsWith("file:/"))
3361 {
3362 pos = 6;
3363 while ((pos < urlString.length()) && (urlString.charAt(pos) == '/'))
3364 {
3365 pos++;
3366 }
3367
3368 path = urlString.substring(pos-1);
3369 }
3370 else if (lowerURLString.startsWith("file:"))
3371 {
3372 // A file: URL that doesn't include a slash will be interpreted as a
3373 // relative path.
3374 path = relativeBasePath + urlString.substring(5);
3375 }
3376 else
3377 {
3378 throw new LDIFException(ERR_READ_URL_INVALID_SCHEME.get(urlString),
3379 firstLineNumber, true);
3380 }
3381
3382 final File f = new File(path);
3383 if (! f.exists())
3384 {
3385 throw new LDIFException(
3386 ERR_READ_URL_NO_SUCH_FILE.get(urlString, f.getAbsolutePath()),
3387 firstLineNumber, true);
3388 }
3389
3390 // In order to conserve memory, we'll only allow values to be read from
3391 // files no larger than 10 megabytes.
3392 final long fileSize = f.length();
3393 if (fileSize > (10 * 1024 * 1024))
3394 {
3395 throw new LDIFException(
3396 ERR_READ_URL_FILE_TOO_LARGE.get(urlString, f.getAbsolutePath(),
3397 (10*1024*1024)),
3398 firstLineNumber, true);
3399 }
3400
3401 int fileBytesRemaining = (int) fileSize;
3402 final byte[] fileData = new byte[(int) fileSize];
3403 final FileInputStream fis = new FileInputStream(f);
3404 try
3405 {
3406 int fileBytesRead = 0;
3407 while (fileBytesRead < fileSize)
3408 {
3409 final int bytesRead =
3410 fis.read(fileData, fileBytesRead, fileBytesRemaining);
3411 if (bytesRead < 0)
3412 {
3413 // We hit the end of the file before we expected to. This shouldn't
3414 // happen unless the file size changed since we first looked at it,
3415 // which we won't allow.
3416 throw new LDIFException(
3417 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString,
3418 f.getAbsolutePath()),
3419 firstLineNumber, true);
3420 }
3421
3422 fileBytesRead += bytesRead;
3423 fileBytesRemaining -= bytesRead;
3424 }
3425
3426 if (fis.read() != -1)
3427 {
3428 // There is still more data to read. This shouldn't happen unless the
3429 // file size changed since we first looked at it, which we won't allow.
3430 throw new LDIFException(
3431 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, f.getAbsolutePath()),
3432 firstLineNumber, true);
3433 }
3434 }
3435 finally
3436 {
3437 fis.close();
3438 }
3439
3440 return fileData;
3441 }
3442
3443
3444
3445 /**
3446 * Parses the data available through the provided iterator into an array of
3447 * modifications suitable for use in a modify change record.
3448 *
3449 * @param dn The DN of the entry being parsed.
3450 * @param trailingSpaceBehavior The behavior that should be exhibited when
3451 * encountering attribute values which are not
3452 * base64-encoded but contain trailing spaces.
3453 * @param ldifLines The lines that comprise the LDIF
3454 * representation of the full record being
3455 * parsed.
3456 * @param iterator The iterator to use to access the
3457 * modification data.
3458 * @param firstLineNumber The line number for the start of the record.
3459 * @param schema The schema to use in processing.
3460 *
3461 * @return An array containing the modifications that were read.
3462 *
3463 * @throws LDIFException If the provided LDIF data cannot be decoded as a
3464 * set of modifications.
3465 */
3466 private static Modification[] parseModifications(final String dn,
3467 final TrailingSpaceBehavior trailingSpaceBehavior,
3468 final ArrayList<StringBuilder> ldifLines,
3469 final Iterator<StringBuilder> iterator,
3470 final long firstLineNumber, final Schema schema)
3471 throws LDIFException
3472 {
3473 final ArrayList<Modification> modList =
3474 new ArrayList<Modification>(ldifLines.size());
3475
3476 while (iterator.hasNext())
3477 {
3478 // The first line must start with "add:", "delete:", "replace:", or
3479 // "increment:" followed by an attribute name.
3480 StringBuilder line = iterator.next();
3481 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3482 int colonPos = line.indexOf(":");
3483 if (colonPos < 0)
3484 {
3485 throw new LDIFException(ERR_READ_MOD_CR_NO_MODTYPE.get(firstLineNumber),
3486 firstLineNumber, true, ldifLines, null);
3487 }
3488
3489 final ModificationType modType;
3490 final String modTypeStr = toLowerCase(line.substring(0, colonPos));
3491 if (modTypeStr.equals("add"))
3492 {
3493 modType = ModificationType.ADD;
3494 }
3495 else if (modTypeStr.equals("delete"))
3496 {
3497 modType = ModificationType.DELETE;
3498 }
3499 else if (modTypeStr.equals("replace"))
3500 {
3501 modType = ModificationType.REPLACE;
3502 }
3503 else if (modTypeStr.equals("increment"))
3504 {
3505 modType = ModificationType.INCREMENT;
3506 }
3507 else
3508 {
3509 throw new LDIFException(ERR_READ_MOD_CR_INVALID_MODTYPE.get(modTypeStr,
3510 firstLineNumber),
3511 firstLineNumber, true, ldifLines, null);
3512 }
3513
3514 String attributeName;
3515 int length = line.length();
3516 if (length == (colonPos+1))
3517 {
3518 // The colon was the last character on the line. This is not
3519 // acceptable.
3520 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get(
3521 firstLineNumber),
3522 firstLineNumber, true, ldifLines, null);
3523 }
3524 else if (line.charAt(colonPos+1) == ':')
3525 {
3526 // Skip over any spaces leading up to the value, and then the rest of
3527 // the string is the base64-encoded attribute name.
3528 int pos = colonPos+2;
3529 while ((pos < length) && (line.charAt(pos) == ' '))
3530 {
3531 pos++;
3532 }
3533
3534 try
3535 {
3536 final byte[] dnBytes = Base64.decode(line.substring(pos));
3537 attributeName = new String(dnBytes, "UTF-8");
3538 }
3539 catch (final ParseException pe)
3540 {
3541 debugException(pe);
3542 throw new LDIFException(
3543 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get(
3544 firstLineNumber, pe.getMessage()),
3545 firstLineNumber, true, ldifLines, pe);
3546 }
3547 catch (final Exception e)
3548 {
3549 debugException(e);
3550 throw new LDIFException(
3551 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get(
3552 firstLineNumber, e),
3553 firstLineNumber, true, ldifLines, e);
3554 }
3555 }
3556 else
3557 {
3558 // Skip over any spaces leading up to the value, and then the rest of
3559 // the string is the attribute name.
3560 int pos = colonPos+1;
3561 while ((pos < length) && (line.charAt(pos) == ' '))
3562 {
3563 pos++;
3564 }
3565
3566 attributeName = line.substring(pos);
3567 }
3568
3569 if (attributeName.length() == 0)
3570 {
3571 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get(
3572 firstLineNumber),
3573 firstLineNumber, true, ldifLines, null);
3574 }
3575
3576
3577 // The next zero or more lines may be the set of attribute values. Keep
3578 // reading until we reach the end of the iterator or until we find a line
3579 // with just a "-".
3580 final ArrayList<ASN1OctetString> valueList =
3581 new ArrayList<ASN1OctetString>(ldifLines.size());
3582 while (iterator.hasNext())
3583 {
3584 line = iterator.next();
3585 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3586 if (line.toString().equals("-"))
3587 {
3588 break;
3589 }
3590
3591 colonPos = line.indexOf(":");
3592 if (colonPos < 0)
3593 {
3594 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber),
3595 firstLineNumber, true, ldifLines, null);
3596 }
3597 else if (! line.substring(0, colonPos).equalsIgnoreCase(attributeName))
3598 {
3599 // There are a couple of cases in which this might be acceptable:
3600 // - If the two names are logically equivalent, but have an alternate
3601 // name (or OID) for the target attribute type, or if there are
3602 // attribute options and the options are just in a different order.
3603 // - If this is the first value for the target attribute and the
3604 // alternate name includes a "binary" option that the original
3605 // attribute name did not have. In this case, all subsequent values
3606 // will also be required to have the binary option.
3607 final String alternateName = line.substring(0, colonPos);
3608
3609
3610 // Check to see if the base names are equivalent.
3611 boolean baseNameEquivalent = false;
3612 final String expectedBaseName = Attribute.getBaseName(attributeName);
3613 final String alternateBaseName = Attribute.getBaseName(alternateName);
3614 if (alternateBaseName.equalsIgnoreCase(expectedBaseName))
3615 {
3616 baseNameEquivalent = true;
3617 }
3618 else
3619 {
3620 if (schema != null)
3621 {
3622 final AttributeTypeDefinition expectedAT =
3623 schema.getAttributeType(expectedBaseName);
3624 final AttributeTypeDefinition alternateAT =
3625 schema.getAttributeType(alternateBaseName);
3626 if ((expectedAT != null) && (alternateAT != null) &&
3627 expectedAT.equals(alternateAT))
3628 {
3629 baseNameEquivalent = true;
3630 }
3631 }
3632 }
3633
3634
3635 // Check to see if the attribute options are equivalent.
3636 final Set<String> expectedOptions =
3637 Attribute.getOptions(attributeName);
3638 final Set<String> lowerExpectedOptions =
3639 new HashSet<String>(expectedOptions.size());
3640 for (final String s : expectedOptions)
3641 {
3642 lowerExpectedOptions.add(toLowerCase(s));
3643 }
3644
3645 final Set<String> alternateOptions =
3646 Attribute.getOptions(alternateName);
3647 final Set<String> lowerAlternateOptions =
3648 new HashSet<String>(alternateOptions.size());
3649 for (final String s : alternateOptions)
3650 {
3651 lowerAlternateOptions.add(toLowerCase(s));
3652 }
3653
3654 final boolean optionsEquivalent =
3655 lowerAlternateOptions.equals(lowerExpectedOptions);
3656
3657
3658 if (baseNameEquivalent && optionsEquivalent)
3659 {
3660 // This is fine. The two attribute descriptions are logically
3661 // equivalent. We'll continue using the attribute description that
3662 // was provided first.
3663 }
3664 else if (valueList.isEmpty() && baseNameEquivalent &&
3665 lowerAlternateOptions.remove("binary") &&
3666 lowerAlternateOptions.equals(lowerExpectedOptions))
3667 {
3668 // This means that the provided value is the first value for the
3669 // attribute, and that the only significant difference is that the
3670 // provided attribute description included an unexpected "binary"
3671 // option. We'll accept this, but will require any additional
3672 // values for this modification to also include the binary option,
3673 // and we'll use the binary option in the attribute that is
3674 // eventually created.
3675 attributeName = alternateName;
3676 }
3677 else
3678 {
3679 // This means that either the base names are different or the sets
3680 // of options are incompatible. This is not acceptable.
3681 throw new LDIFException(ERR_READ_MOD_CR_ATTR_MISMATCH.get(
3682 firstLineNumber,
3683 line.substring(0, colonPos),
3684 attributeName),
3685 firstLineNumber, true, ldifLines, null);
3686 }
3687 }
3688
3689 length = line.length();
3690 final ASN1OctetString value;
3691 if (length == (colonPos+1))
3692 {
3693 // The colon was the last character on the line. This is fine.
3694 value = new ASN1OctetString();
3695 }
3696 else if (line.charAt(colonPos+1) == ':')
3697 {
3698 // Skip over any spaces leading up to the value, and then the rest of
3699 // the string is the base64-encoded value. This is unusual and
3700 // unnecessary, but is nevertheless acceptable.
3701 int pos = colonPos+2;
3702 while ((pos < length) && (line.charAt(pos) == ' '))
3703 {
3704 pos++;
3705 }
3706
3707 try
3708 {
3709 value = new ASN1OctetString(Base64.decode(line.substring(pos)));
3710 }
3711 catch (final ParseException pe)
3712 {
3713 debugException(pe);
3714 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
3715 attributeName, firstLineNumber, pe.getMessage()),
3716 firstLineNumber, true, ldifLines, pe);
3717 }
3718 catch (final Exception e)
3719 {
3720 debugException(e);
3721 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
3722 firstLineNumber, e),
3723 firstLineNumber, true, ldifLines, e);
3724 }
3725 }
3726 else
3727 {
3728 // Skip over any spaces leading up to the value, and then the rest of
3729 // the string is the value.
3730 int pos = colonPos+1;
3731 while ((pos < length) && (line.charAt(pos) == ' '))
3732 {
3733 pos++;
3734 }
3735
3736 value = new ASN1OctetString(line.substring(pos));
3737 }
3738
3739 valueList.add(value);
3740 }
3741
3742 final ASN1OctetString[] values = new ASN1OctetString[valueList.size()];
3743 valueList.toArray(values);
3744
3745 // If it's an add modification type, then there must be at least one
3746 // value.
3747 if ((modType.intValue() == ModificationType.ADD.intValue()) &&
3748 (values.length == 0))
3749 {
3750 throw new LDIFException(ERR_READ_MOD_CR_NO_ADD_VALUES.get(attributeName,
3751 firstLineNumber),
3752 firstLineNumber, true, ldifLines, null);
3753 }
3754
3755 // If it's an increment modification type, then there must be exactly one
3756 // value.
3757 if ((modType.intValue() == ModificationType.INCREMENT.intValue()) &&
3758 (values.length != 1))
3759 {
3760 throw new LDIFException(ERR_READ_MOD_CR_INVALID_INCR_VALUE_COUNT.get(
3761 firstLineNumber, attributeName),
3762 firstLineNumber, true, ldifLines, null);
3763 }
3764
3765 modList.add(new Modification(modType, attributeName, values));
3766 }
3767
3768 final Modification[] mods = new Modification[modList.size()];
3769 modList.toArray(mods);
3770 return mods;
3771 }
3772
3773
3774
3775 /**
3776 * Parses the data available through the provided iterator as the body of a
3777 * modify DN change record (i.e., the newrdn, deleteoldrdn, and optional
3778 * newsuperior lines).
3779 *
3780 * @param ldifLines The lines that comprise the LDIF
3781 * representation of the full record being
3782 * parsed.
3783 * @param iterator The iterator to use to access the modify DN
3784 * data.
3785 * @param dn The current DN of the entry.
3786 * @param controls The set of controls to include in the change
3787 * record.
3788 * @param trailingSpaceBehavior The behavior that should be exhibited when
3789 * encountering attribute values which are not
3790 * base64-encoded but contain trailing spaces.
3791 * @param firstLineNumber The line number for the start of the record.
3792 *
3793 * @return The decoded modify DN change record.
3794 *
3795 * @throws LDIFException If the provided LDIF data cannot be decoded as a
3796 * modify DN change record.
3797 */
3798 private static LDIFModifyDNChangeRecord parseModifyDNChangeRecord(
3799 final ArrayList<StringBuilder> ldifLines,
3800 final Iterator<StringBuilder> iterator, final String dn,
3801 final List<Control> controls,
3802 final TrailingSpaceBehavior trailingSpaceBehavior,
3803 final long firstLineNumber)
3804 throws LDIFException
3805 {
3806 // The next line must be the new RDN, and it must start with "newrdn:".
3807 StringBuilder line = iterator.next();
3808 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3809 int colonPos = line.indexOf(":");
3810 if ((colonPos < 0) ||
3811 (! line.substring(0, colonPos).equalsIgnoreCase("newrdn")))
3812 {
3813 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_COLON.get(
3814 firstLineNumber),
3815 firstLineNumber, true, ldifLines, null);
3816 }
3817
3818 final String newRDN;
3819 int length = line.length();
3820 if (length == (colonPos+1))
3821 {
3822 // The colon was the last character on the line. This is not acceptable.
3823 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get(
3824 firstLineNumber),
3825 firstLineNumber, true, ldifLines, null);
3826 }
3827 else if (line.charAt(colonPos+1) == ':')
3828 {
3829 // Skip over any spaces leading up to the value, and then the rest of the
3830 // string is the base64-encoded new RDN.
3831 int pos = colonPos+2;
3832 while ((pos < length) && (line.charAt(pos) == ' '))
3833 {
3834 pos++;
3835 }
3836
3837 try
3838 {
3839 final byte[] dnBytes = Base64.decode(line.substring(pos));
3840 newRDN = new String(dnBytes, "UTF-8");
3841 }
3842 catch (final ParseException pe)
3843 {
3844 debugException(pe);
3845 throw new LDIFException(
3846 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber,
3847 pe.getMessage()),
3848 firstLineNumber, true, ldifLines, pe);
3849 }
3850 catch (final Exception e)
3851 {
3852 debugException(e);
3853 throw new LDIFException(
3854 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber,
3855 e),
3856 firstLineNumber, true, ldifLines, e);
3857 }
3858 }
3859 else
3860 {
3861 // Skip over any spaces leading up to the value, and then the rest of the
3862 // string is the new RDN.
3863 int pos = colonPos+1;
3864 while ((pos < length) && (line.charAt(pos) == ' '))
3865 {
3866 pos++;
3867 }
3868
3869 newRDN = line.substring(pos);
3870 }
3871
3872 if (newRDN.length() == 0)
3873 {
3874 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get(
3875 firstLineNumber),
3876 firstLineNumber, true, ldifLines, null);
3877 }
3878
3879
3880 // The next line must be the deleteOldRDN flag, and it must start with
3881 // 'deleteoldrdn:'.
3882 if (! iterator.hasNext())
3883 {
3884 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get(
3885 firstLineNumber),
3886 firstLineNumber, true, ldifLines, null);
3887 }
3888
3889 line = iterator.next();
3890 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3891 colonPos = line.indexOf(":");
3892 if ((colonPos < 0) ||
3893 (! line.substring(0, colonPos).equalsIgnoreCase("deleteoldrdn")))
3894 {
3895 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get(
3896 firstLineNumber),
3897 firstLineNumber, true, ldifLines, null);
3898 }
3899
3900 final String deleteOldRDNStr;
3901 length = line.length();
3902 if (length == (colonPos+1))
3903 {
3904 // The colon was the last character on the line. This is not acceptable.
3905 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_VALUE.get(
3906 firstLineNumber),
3907 firstLineNumber, true, ldifLines, null);
3908 }
3909 else if (line.charAt(colonPos+1) == ':')
3910 {
3911 // Skip over any spaces leading up to the value, and then the rest of the
3912 // string is the base64-encoded value. This is unusual and
3913 // unnecessary, but is nevertheless acceptable.
3914 int pos = colonPos+2;
3915 while ((pos < length) && (line.charAt(pos) == ' '))
3916 {
3917 pos++;
3918 }
3919
3920 try
3921 {
3922 final byte[] changeTypeBytes = Base64.decode(line.substring(pos));
3923 deleteOldRDNStr = new String(changeTypeBytes, "UTF-8");
3924 }
3925 catch (final ParseException pe)
3926 {
3927 debugException(pe);
3928 throw new LDIFException(
3929 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get(
3930 firstLineNumber, pe.getMessage()),
3931 firstLineNumber, true, ldifLines, pe);
3932 }
3933 catch (final Exception e)
3934 {
3935 debugException(e);
3936 throw new LDIFException(
3937 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get(
3938 firstLineNumber, e),
3939 firstLineNumber, true, ldifLines, e);
3940 }
3941 }
3942 else
3943 {
3944 // Skip over any spaces leading up to the value, and then the rest of the
3945 // string is the value.
3946 int pos = colonPos+1;
3947 while ((pos < length) && (line.charAt(pos) == ' '))
3948 {
3949 pos++;
3950 }
3951
3952 deleteOldRDNStr = line.substring(pos);
3953 }
3954
3955 final boolean deleteOldRDN;
3956 if (deleteOldRDNStr.equals("0"))
3957 {
3958 deleteOldRDN = false;
3959 }
3960 else if (deleteOldRDNStr.equals("1"))
3961 {
3962 deleteOldRDN = true;
3963 }
3964 else if (deleteOldRDNStr.equalsIgnoreCase("false") ||
3965 deleteOldRDNStr.equalsIgnoreCase("no"))
3966 {
3967 // This is technically illegal, but we'll allow it.
3968 deleteOldRDN = false;
3969 }
3970 else if (deleteOldRDNStr.equalsIgnoreCase("true") ||
3971 deleteOldRDNStr.equalsIgnoreCase("yes"))
3972 {
3973 // This is also technically illegal, but we'll allow it.
3974 deleteOldRDN = false;
3975 }
3976 else
3977 {
3978 throw new LDIFException(ERR_READ_MODDN_CR_INVALID_DELOLDRDN.get(
3979 deleteOldRDNStr, firstLineNumber),
3980 firstLineNumber, true, ldifLines, null);
3981 }
3982
3983
3984 // If there is another line, then it must be the new superior DN and it must
3985 // start with "newsuperior:". If this is absent, then it's fine.
3986 final String newSuperiorDN;
3987 if (iterator.hasNext())
3988 {
3989 line = iterator.next();
3990 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3991 colonPos = line.indexOf(":");
3992 if ((colonPos < 0) ||
3993 (! line.substring(0, colonPos).equalsIgnoreCase("newsuperior")))
3994 {
3995 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWSUPERIOR_COLON.get(
3996 firstLineNumber),
3997 firstLineNumber, true, ldifLines, null);
3998 }
3999
4000 length = line.length();
4001 if (length == (colonPos+1))
4002 {
4003 // The colon was the last character on the line. This is fine.
4004 newSuperiorDN = "";
4005 }
4006 else if (line.charAt(colonPos+1) == ':')
4007 {
4008 // Skip over any spaces leading up to the value, and then the rest of
4009 // the string is the base64-encoded new superior DN.
4010 int pos = colonPos+2;
4011 while ((pos < length) && (line.charAt(pos) == ' '))
4012 {
4013 pos++;
4014 }
4015
4016 try
4017 {
4018 final byte[] dnBytes = Base64.decode(line.substring(pos));
4019 newSuperiorDN = new String(dnBytes, "UTF-8");
4020 }
4021 catch (final ParseException pe)
4022 {
4023 debugException(pe);
4024 throw new LDIFException(
4025 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get(
4026 firstLineNumber, pe.getMessage()),
4027 firstLineNumber, true, ldifLines, pe);
4028 }
4029 catch (final Exception e)
4030 {
4031 debugException(e);
4032 throw new LDIFException(
4033 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get(
4034 firstLineNumber, e),
4035 firstLineNumber, true, ldifLines, e);
4036 }
4037 }
4038 else
4039 {
4040 // Skip over any spaces leading up to the value, and then the rest of
4041 // the string is the new superior DN.
4042 int pos = colonPos+1;
4043 while ((pos < length) && (line.charAt(pos) == ' '))
4044 {
4045 pos++;
4046 }
4047
4048 newSuperiorDN = line.substring(pos);
4049 }
4050 }
4051 else
4052 {
4053 newSuperiorDN = null;
4054 }
4055
4056
4057 // There must not be any more lines.
4058 if (iterator.hasNext())
4059 {
4060 throw new LDIFException(ERR_READ_CR_EXTRA_MODDN_DATA.get(firstLineNumber),
4061 firstLineNumber, true, ldifLines, null);
4062 }
4063
4064 return new LDIFModifyDNChangeRecord(dn, newRDN, deleteOldRDN,
4065 newSuperiorDN, controls);
4066 }
4067
4068
4069
4070 /**
4071 * Examines the line contained in the provided buffer to determine whether it
4072 * may contain one or more illegal trailing spaces. If it does, then those
4073 * spaces will either be stripped out or an exception will be thrown to
4074 * indicate that they are illegal.
4075 *
4076 * @param buffer The buffer to be examined.
4077 * @param dn The DN of the LDIF record being parsed. It
4078 * may be {@code null} if the DN is not yet
4079 * known (e.g., because the provided line is
4080 * expected to contain that DN).
4081 * @param firstLineNumber The approximate line number in the LDIF
4082 * source on which the LDIF record begins.
4083 * @param trailingSpaceBehavior The behavior that should be exhibited when
4084 * encountering attribute values which are not
4085 * base64-encoded but contain trailing spaces.
4086 *
4087 * @throws LDIFException If the line contained in the provided buffer ends
4088 * with one or more illegal trailing spaces and
4089 * {@code stripTrailingSpaces} was provided with a
4090 * value of {@code false}.
4091 */
4092 private static void handleTrailingSpaces(final StringBuilder buffer,
4093 final String dn, final long firstLineNumber,
4094 final TrailingSpaceBehavior trailingSpaceBehavior)
4095 throws LDIFException
4096 {
4097 int pos = buffer.length() - 1;
4098 boolean trailingFound = false;
4099 while ((pos >= 0) && (buffer.charAt(pos) == ' '))
4100 {
4101 trailingFound = true;
4102 pos--;
4103 }
4104
4105 if (trailingFound && (buffer.charAt(pos) != ':'))
4106 {
4107 switch (trailingSpaceBehavior)
4108 {
4109 case STRIP:
4110 buffer.setLength(pos+1);
4111 break;
4112
4113 case REJECT:
4114 if (dn == null)
4115 {
4116 throw new LDIFException(
4117 ERR_READ_ILLEGAL_TRAILING_SPACE_WITHOUT_DN.get(firstLineNumber,
4118 buffer.toString()),
4119 firstLineNumber, true);
4120 }
4121 else
4122 {
4123 throw new LDIFException(
4124 ERR_READ_ILLEGAL_TRAILING_SPACE_WITH_DN.get(dn,
4125 firstLineNumber, buffer.toString()),
4126 firstLineNumber, true);
4127 }
4128
4129 case RETAIN:
4130 default:
4131 // No action will be taken.
4132 break;
4133 }
4134 }
4135 }
4136
4137
4138
4139 /**
4140 * This represents an unparsed LDIFRecord. It stores the line number of the
4141 * first line of the record and each line of the record.
4142 */
4143 private static final class UnparsedLDIFRecord
4144 {
4145 private final ArrayList<StringBuilder> lineList;
4146 private final long firstLineNumber;
4147 private final Exception failureCause;
4148 private final boolean isEOF;
4149 private final DuplicateValueBehavior duplicateValueBehavior;
4150 private final Schema schema;
4151 private final TrailingSpaceBehavior trailingSpaceBehavior;
4152
4153
4154
4155 /**
4156 * Constructor.
4157 *
4158 * @param lineList The lines that comprise the LDIF record.
4159 * @param duplicateValueBehavior The behavior to exhibit if the entry
4160 * contains duplicate attribute values.
4161 * @param trailingSpaceBehavior Specifies the behavior to exhibit when
4162 * encountering trailing spaces in
4163 * non-base64-encoded attribute values.
4164 * @param schema The schema to use when parsing, if
4165 * applicable.
4166 * @param firstLineNumber The first line number of the LDIF record.
4167 */
4168 private UnparsedLDIFRecord(final ArrayList<StringBuilder> lineList,
4169 final DuplicateValueBehavior duplicateValueBehavior,
4170 final TrailingSpaceBehavior trailingSpaceBehavior,
4171 final Schema schema, final long firstLineNumber)
4172 {
4173 this.lineList = lineList;
4174 this.firstLineNumber = firstLineNumber;
4175 this.duplicateValueBehavior = duplicateValueBehavior;
4176 this.trailingSpaceBehavior = trailingSpaceBehavior;
4177 this.schema = schema;
4178
4179 failureCause = null;
4180 isEOF =
4181 (firstLineNumber < 0) || ((lineList != null) && lineList.isEmpty());
4182 }
4183
4184
4185
4186 /**
4187 * Constructor.
4188 *
4189 * @param failureCause The Exception thrown when reading from the input.
4190 */
4191 private UnparsedLDIFRecord(final Exception failureCause)
4192 {
4193 this.failureCause = failureCause;
4194
4195 lineList = null;
4196 firstLineNumber = 0;
4197 duplicateValueBehavior = DuplicateValueBehavior.REJECT;
4198 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT;
4199 schema = null;
4200 isEOF = false;
4201 }
4202
4203
4204
4205 /**
4206 * Return the lines that comprise the LDIF record.
4207 *
4208 * @return The lines that comprise the LDIF record.
4209 */
4210 private ArrayList<StringBuilder> getLineList()
4211 {
4212 return lineList;
4213 }
4214
4215
4216
4217 /**
4218 * Retrieves the behavior to exhibit when encountering duplicate attribute
4219 * values.
4220 *
4221 * @return The behavior to exhibit when encountering duplicate attribute
4222 * values.
4223 */
4224 private DuplicateValueBehavior getDuplicateValueBehavior()
4225 {
4226 return duplicateValueBehavior;
4227 }
4228
4229
4230
4231 /**
4232 * Retrieves the behavior that should be exhibited when encountering
4233 * attribute values which are not base64-encoded but contain trailing
4234 * spaces. The LDIF specification strongly recommends that any value which
4235 * legitimately contains trailing spaces be base64-encoded, but the LDAP SDK
4236 * LDIF parser may be configured to automatically strip these spaces, to
4237 * preserve them, or to reject any entry or change record containing them.
4238 *
4239 * @return The behavior that should be exhibited when encountering
4240 * attribute values which are not base64-encoded but contain
4241 * trailing spaces.
4242 */
4243 private TrailingSpaceBehavior getTrailingSpaceBehavior()
4244 {
4245 return trailingSpaceBehavior;
4246 }
4247
4248
4249
4250 /**
4251 * Retrieves the schema that should be used when parsing the record, if
4252 * applicable.
4253 *
4254 * @return The schema that should be used when parsing the record, or
4255 * {@code null} if none should be used.
4256 */
4257 private Schema getSchema()
4258 {
4259 return schema;
4260 }
4261
4262
4263
4264 /**
4265 * Return the first line number of the LDIF record.
4266 *
4267 * @return The first line number of the LDIF record.
4268 */
4269 private long getFirstLineNumber()
4270 {
4271 return firstLineNumber;
4272 }
4273
4274
4275
4276 /**
4277 * Return {@code true} iff the end of the input was reached.
4278 *
4279 * @return {@code true} iff the end of the input was reached.
4280 */
4281 private boolean isEOF()
4282 {
4283 return isEOF;
4284 }
4285
4286
4287
4288 /**
4289 * Returns the reason that reading the record lines failed. This normally
4290 * is only non-null if something bad happened to the input stream (like
4291 * a disk read error).
4292 *
4293 * @return The reason that reading the record lines failed.
4294 */
4295 private Exception getFailureCause()
4296 {
4297 return failureCause;
4298 }
4299 }
4300
4301
4302 /**
4303 * When processing in asynchronous mode, this thread is responsible for
4304 * reading the raw unparsed records from the input and submitting them for
4305 * processing.
4306 */
4307 private final class LineReaderThread
4308 extends Thread
4309 {
4310 /**
4311 * Constructor.
4312 */
4313 private LineReaderThread()
4314 {
4315 super("Asynchronous LDIF line reader");
4316 setDaemon(true);
4317 }
4318
4319
4320
4321 /**
4322 * Reads raw, unparsed records from the input and submits them for
4323 * processing until the input is finished or closed.
4324 */
4325 @Override()
4326 public void run()
4327 {
4328 try
4329 {
4330 boolean stopProcessing = false;
4331 while (!stopProcessing)
4332 {
4333 UnparsedLDIFRecord unparsedRecord = null;
4334 try
4335 {
4336 unparsedRecord = readUnparsedRecord();
4337 }
4338 catch (IOException e)
4339 {
4340 debugException(e);
4341 unparsedRecord = new UnparsedLDIFRecord(e);
4342 stopProcessing = true;
4343 }
4344 catch (Exception e)
4345 {
4346 debugException(e);
4347 unparsedRecord = new UnparsedLDIFRecord(e);
4348 }
4349
4350 try
4351 {
4352 asyncParser.submit(unparsedRecord);
4353 }
4354 catch (InterruptedException e)
4355 {
4356 debugException(e);
4357 // If this thread is interrupted, then someone wants us to stop
4358 // processing, so that's what we'll do.
4359 Thread.currentThread().interrupt();
4360 stopProcessing = true;
4361 }
4362
4363 if ((unparsedRecord == null) || (unparsedRecord.isEOF()))
4364 {
4365 stopProcessing = true;
4366 }
4367 }
4368 }
4369 finally
4370 {
4371 try
4372 {
4373 asyncParser.shutdown();
4374 }
4375 catch (InterruptedException e)
4376 {
4377 debugException(e);
4378 Thread.currentThread().interrupt();
4379 }
4380 finally
4381 {
4382 asyncParsingComplete.set(true);
4383 }
4384 }
4385 }
4386 }
4387
4388
4389
4390 /**
4391 * Used to parse Records asynchronously.
4392 */
4393 private final class RecordParser implements Processor<UnparsedLDIFRecord,
4394 LDIFRecord>
4395 {
4396 /**
4397 * {@inheritDoc}
4398 */
4399 public LDIFRecord process(final UnparsedLDIFRecord input)
4400 throws LDIFException
4401 {
4402 LDIFRecord record = decodeRecord(input, relativeBasePath, schema);
4403
4404 if ((record instanceof Entry) && (entryTranslator != null))
4405 {
4406 record = entryTranslator.translate((Entry) record,
4407 input.getFirstLineNumber());
4408
4409 if (record == null)
4410 {
4411 record = SKIP_ENTRY;
4412 }
4413 }
4414 if ((record instanceof LDIFChangeRecord) &&
4415 (changeRecordTranslator != null))
4416 {
4417 record = changeRecordTranslator.translate((LDIFChangeRecord) record,
4418 input.getFirstLineNumber());
4419
4420 if (record == null)
4421 {
4422 record = SKIP_ENTRY;
4423 }
4424 }
4425 return record;
4426 }
4427 }
4428 }