001/*
002 *   Copyright (C) Christian Schulte <cs@schulte.it>, 2012-253
003 *   All rights reserved.
004 *
005 *   Redistribution and use in source and binary forms, with or without
006 *   modification, are permitted provided that the following conditions
007 *   are met:
008 *
009 *     o Redistributions of source code must retain the above copyright
010 *       notice, this list of conditions and the following disclaimer.
011 *
012 *     o Redistributions in binary form must reproduce the above copyright
013 *       notice, this list of conditions and the following disclaimer in
014 *       the documentation and/or other materials provided with the
015 *       distribution.
016 *
017 *   THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
018 *   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
019 *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
020 *   THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT,
021 *   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022 *   NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026 *   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027 *
028 *   $JOMC: JavaIdentifier.java 5043 2015-05-27 07:03:39Z schulte $
029 *
030 */
031package org.jomc.model;
032
033import java.io.Serializable;
034import java.lang.ref.Reference;
035import java.lang.ref.SoftReference;
036import java.text.MessageFormat;
037import java.text.ParseException;
038import java.util.ArrayList;
039import java.util.HashMap;
040import java.util.List;
041import java.util.Locale;
042import java.util.Map;
043import java.util.ResourceBundle;
044
045/**
046 * Data type of a Java identifier.
047 * <p>
048 * This class provides support for parsing and normalizing text to java identifiers as specified in the Java
049 * Language Specification - Java SE 7 Edition - Chapter 3.8ff.
050 * </p>
051 * <p>
052 * <i>Please note that this class will move to package {@code org.jomc.util} in JOMC 2.0.</i>
053 * </p>
054 *
055 * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
056 * @version $JOMC: JavaIdentifier.java 5043 2015-05-27 07:03:39Z schulte $
057 * @see #normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode)
058 * @see #parse(java.lang.String)
059 * @see #valueOf(java.lang.String)
060 * @since 1.4
061 */
062public final class JavaIdentifier implements CharSequence, Serializable
063{
064
065    /**
066     * Normalization modes.
067     *
068     * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
069     * @version $JOMC: JavaIdentifier.java 5043 2015-05-27 07:03:39Z schulte $
070     * @since 1.4
071     * @see JavaIdentifier#normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode)
072     */
073    public static enum NormalizationMode
074    {
075
076        /**
077         * Mode to normalize by compacting words using camel-case.
078         */
079        CAMEL_CASE,
080        /**
081         * Mode to normalize by separating words using '_' and by converting all characters to lower-case.
082         */
083        LOWER_CASE,
084        /**
085         * Mode to normalize by separating words using '_' and by converting all characters to upper-case.
086         */
087        UPPER_CASE,
088        /**
089         * Mode to normalize according to the
090         * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Constants</cite>.
091         * <blockquote>
092         * The names of variables declared class constants and of ANSI constants should be all uppercase with words
093         * separated by underscores ("_"). (ANSI constants should be avoided, for ease of debugging.)
094         * </blockquote>
095         */
096        CONSTANT_NAME_CONVENTION,
097        /**
098         * Mode to normalize according to the
099         * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Methods</cite>.
100         * <blockquote>
101         * Methods should be verbs, in mixed case with the first letter lowercase, with the first letter of each
102         * internal word capitalized.
103         * </blockquote>
104         */
105        METHOD_NAME_CONVENTION,
106        /**
107         * Mode to normalize according to the
108         * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Variables</cite>.
109         * <blockquote>
110         * Except for variables, all instance, class, and class constants are in mixed case with a lowercase first
111         * letter. Internal words start with capital letters. Variable names should not start with underscore _ or
112         * dollar sign $ characters, even though both are allowed. Variable names should be short yet meaningful. The
113         * choice of a variable name should be mnemonic - that is - designed to indicate to the casual observer the
114         * intent of its use. One-character variable names should be avoided except for temporary "throwaway" variables.
115         * Common names for temporary variables are i, j, k, m, and n for integers; c, d, and e for characters.
116         * </blockquote>
117         */
118        VARIABLE_NAME_CONVENTION
119
120    }
121
122    /**
123     * The value of the instance.
124     *
125     * @serial
126     */
127    private String identifier;
128
129    /**
130     * Cached instances.
131     */
132    private static volatile Reference<Map<CacheKey, JavaIdentifier>> cache;
133
134    /**
135     * Serial version UID for backwards compatibility with 1.4.x object streams.
136     */
137    private static final long serialVersionUID = 7600377999055800720L;
138
139    /**
140     * Underscore character.
141     */
142    private static final int UNDERSCORE_CODEPOINT = Character.codePointAt( "_", 0 );
143
144    /**
145     * Creates a new {@code JavaIdentifier} instance.
146     */
147    private JavaIdentifier()
148    {
149        super();
150    }
151
152    /**
153     * Returns the length of this character sequence.
154     *
155     * @return The number of {@code char}s in this sequence.
156     */
157    public int length()
158    {
159        return this.identifier.length();
160    }
161
162    /**
163     * Returns the {@code char} value at a given index.
164     *
165     * @param index The index of the {@code char} value to return.
166     *
167     * @return The {@code char} value at {@code index}.
168     *
169     * @throws IndexOutOfBoundsException if {@code index} is negative or not less than the length of the sequence.
170     */
171    public char charAt( final int index )
172    {
173        return this.identifier.charAt( index );
174    }
175
176    /**
177     * Returns a new {@code CharSequence} that is a subsequence of this sequence.
178     *
179     * @param start The start index, inclusive.
180     * @param end The end index, exclusive.
181     *
182     * @return The sequence of characters starting at index {@code start} up to index {@code end - 1}.
183     *
184     * @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, if {@code end} is greater than
185     * the length of the sequence, or if {@code start} is greater than {@code end}.
186     */
187    public CharSequence subSequence( final int start, final int end )
188    {
189        return this.identifier.subSequence( start, end );
190    }
191
192    /**
193     * Returns a string containing the characters in this sequence in the same order as this sequence. The length of the
194     * string will be the length of this sequence.
195     *
196     * @return A string consisting of exactly this sequence of characters.
197     */
198    @Override
199    public String toString()
200    {
201        return this.identifier;
202    }
203
204    /**
205     * Returns the hash-code value of the object.
206     *
207     * @return The hash-code value of the object.
208     */
209    @Override
210    public int hashCode()
211    {
212        return this.identifier.hashCode();
213    }
214
215    /**
216     * Tests whether some other object is equal to the object.
217     *
218     * @param o The object to test.
219     *
220     * @return {@code true}, if {@code o} is an instance of the class of the object and its string value is equal to the
221     * string value of the object.
222     */
223    @Override
224    public boolean equals( final Object o )
225    {
226        boolean equal = o == this;
227
228        if ( !equal && o instanceof JavaIdentifier )
229        {
230            equal = this.toString().equals( o.toString() );
231        }
232
233        return equal;
234    }
235
236    /**
237     * Normalizes text from the beginning of the given string to produce a {@code JavaIdentifier}.
238     *
239     * @param text The text to normalize.
240     * @param mode The normalization to apply.
241     *
242     * @return A {@code JavaIdentifier} instance constructed by normalizing {@code text} according to {@code mode}.
243     *
244     * @throws NullPointerException if {@code text} or {@code mode} is {@code null}.
245     * @throws ParseException if normalization fails.
246     */
247    public static JavaIdentifier normalize( final String text, final NormalizationMode mode ) throws ParseException
248    {
249        if ( text == null )
250        {
251            throw new NullPointerException( "text" );
252        }
253        if ( mode == null )
254        {
255            throw new NullPointerException( "mode" );
256        }
257
258        return parse( text, mode, false );
259    }
260
261    /**
262     * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
263     *
264     * @param text The text to parse.
265     *
266     * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
267     *
268     * @throws NullPointerException if {@code text} is {@code null}.
269     * @throws ParseException if parsing fails.
270     *
271     * @see #valueOf(java.lang.String)
272     */
273    public static JavaIdentifier parse( final String text ) throws ParseException
274    {
275        if ( text == null )
276        {
277            throw new NullPointerException( "text" );
278        }
279
280        return parse( text, null, false );
281    }
282
283    /**
284     * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
285     * <p>
286     * Unlike the {@link #parse(String)} method, this method throws an {@code IllegalArgumentException} if parsing
287     * fails.
288     * </p>
289     *
290     * @param text The text to parse.
291     *
292     * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
293     *
294     * @throws NullPointerException if {@code text} is {@code null}.
295     * @throws IllegalArgumentException if parsing fails.
296     *
297     * @see #parse(java.lang.String)
298     */
299    public static JavaIdentifier valueOf( final String text ) throws IllegalArgumentException
300    {
301        if ( text == null )
302        {
303            throw new NullPointerException( "text" );
304        }
305
306        try
307        {
308            return parse( text, null, true );
309        }
310        catch ( final ParseException e )
311        {
312            throw new AssertionError( e );
313        }
314    }
315
316    private static JavaIdentifier parse( final String text, final NormalizationMode mode,
317                                         final boolean runtimeException )
318        throws ParseException
319    {
320        Map<CacheKey, JavaIdentifier> map = cache == null ? null : cache.get();
321
322        if ( map == null )
323        {
324            map = new HashMap<CacheKey, JavaIdentifier>( 128 );
325            cache = new SoftReference<Map<CacheKey, JavaIdentifier>>( map );
326        }
327
328        synchronized ( map )
329        {
330            final CacheKey key = new CacheKey( text, mode );
331            JavaIdentifier javaIdentifier = map.get( key );
332
333            if ( javaIdentifier == null )
334            {
335                javaIdentifier = new JavaIdentifier();
336                parseIdentifier( javaIdentifier, text, mode, runtimeException );
337
338                if ( mode != null )
339                {
340                    final CacheKey normalizedKey = new CacheKey( javaIdentifier.toString(), mode );
341                    final JavaIdentifier normalizedInstance = map.get( normalizedKey );
342
343                    if ( normalizedInstance != null )
344                    {
345                        map.put( key, normalizedInstance );
346                        javaIdentifier = normalizedInstance;
347                    }
348                    else
349                    {
350                        map.put( key, javaIdentifier );
351                        map.put( normalizedKey, javaIdentifier );
352                    }
353                }
354                else
355                {
356                    map.put( key, javaIdentifier );
357                }
358            }
359
360            return javaIdentifier;
361        }
362    }
363
364    private static void parseIdentifier( final JavaIdentifier t, final String text, final NormalizationMode mode,
365                                         final boolean runtimeException )
366        throws ParseException
367    {
368        if ( text.length() <= 0 )
369        {
370            if ( runtimeException )
371            {
372                throw new IllegalArgumentException( getMessage( "invalidEmptyString" ) );
373            }
374            else
375            {
376                throw new ParseException( getMessage( "invalidEmptyString" ), 0 );
377            }
378        }
379
380        final StringBuilder identifierBuilder = new StringBuilder( text.length() );
381        final List<Integer> retainedIndices = new ArrayList<Integer>( text.length() );
382        boolean start_of_word = true;
383        int words = 0;
384
385        for ( int i = 0, j = 1, s0 = text.length(), last_codepoint = -1; i < s0; i++, j++ )
386        {
387            if ( !isWordSeparator( text.codePointAt( i ), mode, identifierBuilder.length() <= 0 ) )
388            {
389                if ( mode != null )
390                {
391                    switch ( mode )
392                    {
393                        case CAMEL_CASE:
394                            if ( start_of_word )
395                            {
396                                identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
397                            }
398                            else if ( last_codepoint > -1 && j < s0
399                                          && isCamelCase( last_codepoint, text.codePointAt( i ),
400                                                          text.codePointAt( j ) ) )
401                            { // Retain camel-case in words.
402                                identifierBuilder.append( text.charAt( i ) );
403                                retainedIndices.add( identifierBuilder.length() - 1 );
404                            }
405                            else
406                            {
407                                identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
408                            }
409                            break;
410
411                        case LOWER_CASE:
412                            if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
413                            {
414                                identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
415                            }
416
417                            identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
418                            break;
419
420                        case UPPER_CASE:
421                        case CONSTANT_NAME_CONVENTION:
422                            if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
423                            {
424                                identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
425                            }
426
427                            identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
428                            break;
429
430                        case VARIABLE_NAME_CONVENTION:
431                        case METHOD_NAME_CONVENTION:
432                            if ( start_of_word )
433                            {
434                                identifierBuilder.append( words == 0
435                                                              ? Character.toLowerCase( text.charAt( i ) )
436                                                              : Character.toUpperCase( text.charAt( i ) ) );
437
438                            }
439                            else if ( last_codepoint > -1 && j < s0
440                                          && isCamelCase( last_codepoint, text.codePointAt( i ),
441                                                          text.codePointAt( j ) ) )
442                            { // Retain camel-case in words.
443                                identifierBuilder.append( text.charAt( i ) );
444                                retainedIndices.add( identifierBuilder.length() - 1 );
445                            }
446                            else
447                            {
448                                identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
449                            }
450                            break;
451
452                        default:
453                            throw new AssertionError( mode );
454
455                    }
456                }
457                else
458                {
459                    identifierBuilder.append( text.charAt( i ) );
460                }
461
462                last_codepoint = identifierBuilder.codePointAt( identifierBuilder.length() - 1 );
463                start_of_word = false;
464            }
465            else
466            {
467                if ( mode != null )
468                {
469                    if ( !start_of_word )
470                    {
471                        start_of_word = true;
472                        words++;
473                    }
474                }
475                else if ( runtimeException )
476                {
477                    throw new IllegalArgumentException( getMessage( "invalidCharacter", text, text.charAt( i ), i ) );
478                }
479                else
480                {
481                    throw new ParseException( getMessage( "invalidCharacter", text, text.charAt( i ), i ), i );
482                }
483            }
484        }
485
486        if ( words > 0 )
487        {
488            // Multiple words - no camel-case retained in any word.
489            toLowerCase( identifierBuilder, retainedIndices );
490        }
491
492        t.identifier = identifierBuilder.toString();
493
494        if ( t.identifier.length() <= 0 )
495        {
496            if ( runtimeException )
497            {
498                throw new IllegalArgumentException( getMessage( "invalidCharacters", text ) );
499            }
500            else
501            {
502                throw new ParseException( getMessage( "invalidCharacters", text ), 0 );
503            }
504        }
505
506        if ( JavaLanguage.KEYWORDS.contains( t.identifier )
507                 || JavaLanguage.BOOLEAN_LITERALS.contains( t.identifier )
508                 || JavaLanguage.NULL_LITERAL.equals( t.identifier ) )
509        {
510            if ( mode != null )
511            {
512                t.identifier = "_" + t.identifier;
513            }
514            else if ( runtimeException )
515            {
516                throw new IllegalArgumentException( getMessage( "invalidWord", text, t.identifier,
517                                                                text.indexOf( t.identifier ) ) );
518
519            }
520            else
521            {
522                throw new ParseException( getMessage( "invalidWord", text, t.identifier, text.indexOf( t.identifier ) ),
523                                          text.indexOf( t.identifier ) );
524
525            }
526        }
527    }
528
529    private static boolean isWordSeparator( final int codePoint, final NormalizationMode mode, final boolean first )
530    {
531        return !( ( first ? Character.isJavaIdentifierStart( codePoint ) : Character.isJavaIdentifierPart( codePoint ) )
532                  && ( mode != null ? Character.isLetterOrDigit( codePoint ) : true ) );
533
534    }
535
536    private static boolean isCamelCase( final int left, final int middle, final int right )
537    {
538        return Character.isLowerCase( left ) && Character.isUpperCase( middle ) && Character.isLowerCase( right );
539    }
540
541    private static void toLowerCase( final StringBuilder stringBuilder, final List<Integer> indices )
542    {
543        for ( int i = 0, s0 = indices.size(); i < s0; i++ )
544        {
545            final int index = indices.get( i );
546            final int cp = Character.toLowerCase( stringBuilder.codePointAt( index ) );
547            stringBuilder.replace( index, index + 1, String.valueOf( Character.toChars( cp ) ) );
548        }
549    }
550
551    private static String getMessage( final String key, final Object... args )
552    {
553        return MessageFormat.format( ResourceBundle.getBundle(
554            JavaIdentifier.class.getName().replace( '.', '/' ), Locale.getDefault() ).
555            getString( key ), args );
556
557    }
558
559    private static final class CacheKey
560    {
561
562        private final String text;
563
564        private final NormalizationMode mode;
565
566        private CacheKey( final String text, final NormalizationMode mode )
567        {
568            super();
569            this.text = text;
570            this.mode = mode;
571        }
572
573        @Override
574        public int hashCode()
575        {
576            int hc = 23;
577            hc = 37 * hc + this.text.hashCode();
578            hc = 37 * hc + ( this.mode == null ? 0 : this.mode.hashCode() );
579            return hc;
580        }
581
582        @Override
583        public boolean equals( final Object o )
584        {
585            boolean equal = o == this;
586
587            if ( !equal && o instanceof CacheKey )
588            {
589                final CacheKey that = (CacheKey) o;
590                equal = this.mode == that.mode && this.text.equals( that.text );
591            }
592
593            return equal;
594        }
595
596    }
597
598}