View Javadoc
1   /*
2    *   Copyright (C) 2012 Christian Schulte <cs@schulte.it>
3    *   All rights reserved.
4    *
5    *   Redistribution and use in source and binary forms, with or without
6    *   modification, are permitted provided that the following conditions
7    *   are met:
8    *
9    *     o Redistributions of source code must retain the above copyright
10   *       notice, this list of conditions and the following disclaimer.
11   *
12   *     o Redistributions in binary form must reproduce the above copyright
13   *       notice, this list of conditions and the following disclaimer in
14   *       the documentation and/or other materials provided with the
15   *       distribution.
16   *
17   *   THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
18   *   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
19   *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
20   *   THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT,
21   *   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22   *   NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23   *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24   *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25   *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26   *   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27   *
28   *   $JOMC: JavaIdentifier.java 5106 2016-04-04 19:56:25Z schulte $
29   *
30   */
31  package org.jomc.jls;
32  
33  import java.io.Serializable;
34  import java.lang.ref.Reference;
35  import java.lang.ref.SoftReference;
36  import java.text.MessageFormat;
37  import java.text.ParseException;
38  import java.util.ArrayList;
39  import java.util.HashMap;
40  import java.util.List;
41  import java.util.Locale;
42  import java.util.Map;
43  import java.util.ResourceBundle;
44  
45  /**
46   * Data type of a Java identifier.
47   * <p>
48   * This class provides support for parsing and normalizing text to java identifiers as specified in the Java
49   * Language Specification - Java SE 7 Edition - Chapter 3.8ff.
50   * </p>
51   *
52   * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
53   * @version $JOMC: JavaIdentifier.java 5106 2016-04-04 19:56:25Z schulte $
54   * @see #normalize(java.lang.String, org.jomc.jls.JavaIdentifier.NormalizationMode)
55   * @see #parse(java.lang.String)
56   * @see #valueOf(java.lang.String)
57   */
58  public final class JavaIdentifier implements CharSequence, Serializable
59  {
60  
61      /**
62       * Normalization modes.
63       *
64       * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
65       * @version $JOMC: JavaIdentifier.java 5106 2016-04-04 19:56:25Z schulte $
66       * @see JavaIdentifier#normalize(java.lang.String, org.jomc.jls.JavaIdentifier.NormalizationMode)
67       */
68      public static enum NormalizationMode
69      {
70  
71          /**
72           * Mode to normalize by compacting words using camel-case.
73           */
74          CAMEL_CASE,
75          /**
76           * Mode to normalize by separating words using '_' and by converting all characters to lower-case.
77           */
78          LOWER_CASE,
79          /**
80           * Mode to normalize by separating words using '_' and by converting all characters to upper-case.
81           */
82          UPPER_CASE,
83          /**
84           * Mode to normalize according to the
85           * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Constants</cite>.
86           * <blockquote>
87           * The names of variables declared class constants and of ANSI constants should be all uppercase with words
88           * separated by underscores ("_"). (ANSI constants should be avoided, for ease of debugging.)
89           * </blockquote>
90           */
91          CONSTANT_NAME_CONVENTION,
92          /**
93           * Mode to normalize according to the
94           * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Methods</cite>.
95           * <blockquote>
96           * Methods should be verbs, in mixed case with the first letter lowercase, with the first letter of each
97           * internal word capitalized.
98           * </blockquote>
99           */
100         METHOD_NAME_CONVENTION,
101         /**
102          * Mode to normalize according to the
103          * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Variables</cite>.
104          * <blockquote>
105          * Except for variables, all instance, class, and class constants are in mixed case with a lowercase first
106          * letter. Internal words start with capital letters. Variable names should not start with underscore _ or
107          * dollar sign $ characters, even though both are allowed. Variable names should be short yet meaningful. The
108          * choice of a variable name should be mnemonic - that is - designed to indicate to the casual observer the
109          * intent of its use. One-character variable names should be avoided except for temporary "throwaway" variables.
110          * Common names for temporary variables are i, j, k, m, and n for integers; c, d, and e for characters.
111          * </blockquote>
112          */
113         VARIABLE_NAME_CONVENTION
114 
115     }
116 
117     /**
118      * The value of the instance.
119      *
120      * @serial
121      */
122     private String identifier;
123 
124     /**
125      * Cached instances.
126      */
127     private static volatile Reference<Map<CacheKey, JavaIdentifier>> cache;
128 
129     /**
130      * Serial version UID for backwards compatibility with 7.x object streams.
131      */
132     private static final long serialVersionUID = 7639783770152985285L;
133 
134     /**
135      * Underscore character.
136      */
137     private static final int UNDERSCORE_CODEPOINT = Character.codePointAt( "_", 0 );
138 
139     /**
140      * Creates a new {@code JavaIdentifier} instance.
141      */
142     private JavaIdentifier()
143     {
144         super();
145     }
146 
147     /**
148      * Returns the length of this character sequence.
149      *
150      * @return The number of {@code char}s in this sequence.
151      */
152     public int length()
153     {
154         return this.identifier.length();
155     }
156 
157     /**
158      * Returns the {@code char} value at a given index.
159      *
160      * @param index The index of the {@code char} value to return.
161      *
162      * @return The {@code char} value at {@code index}.
163      *
164      * @throws IndexOutOfBoundsException if {@code index} is negative or not less than the length of the sequence.
165      */
166     public char charAt( final int index )
167     {
168         return this.identifier.charAt( index );
169     }
170 
171     /**
172      * Returns a new {@code CharSequence} that is a subsequence of this sequence.
173      *
174      * @param start The start index, inclusive.
175      * @param end The end index, exclusive.
176      *
177      * @return The sequence of characters starting at index {@code start} up to index {@code end - 1}.
178      *
179      * @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, if {@code end} is greater than
180      * the length of the sequence, or if {@code start} is greater than {@code end}.
181      */
182     public CharSequence subSequence( final int start, final int end )
183     {
184         return this.identifier.subSequence( start, end );
185     }
186 
187     /**
188      * Returns a string containing the characters in this sequence in the same order as this sequence. The length of the
189      * string will be the length of this sequence.
190      *
191      * @return A string consisting of exactly this sequence of characters.
192      */
193     @Override
194     public String toString()
195     {
196         return this.identifier;
197     }
198 
199     /**
200      * Returns the hash-code value of the object.
201      *
202      * @return The hash-code value of the object.
203      */
204     @Override
205     public int hashCode()
206     {
207         return this.identifier.hashCode();
208     }
209 
210     /**
211      * Tests whether some other object is equal to the object.
212      *
213      * @param o The object to test.
214      *
215      * @return {@code true}, if {@code o} is an instance of the class of the object and its string value is equal to the
216      * string value of the object.
217      */
218     @Override
219     public boolean equals( final Object o )
220     {
221         boolean equal = o == this;
222 
223         if ( !equal && o instanceof JavaIdentifier )
224         {
225             equal = this.toString().equals( o.toString() );
226         }
227 
228         return equal;
229     }
230 
231     /**
232      * Normalizes text from the beginning of the given string to produce a {@code JavaIdentifier}.
233      *
234      * @param text The text to normalize.
235      * @param mode The normalization to apply.
236      *
237      * @return A {@code JavaIdentifier} instance constructed by normalizing {@code text} according to {@code mode}.
238      *
239      * @throws NullPointerException if {@code text} or {@code mode} is {@code null}.
240      * @throws ParseException if normalization fails.
241      */
242     public static JavaIdentifier normalize( final String text, final NormalizationMode mode ) throws ParseException
243     {
244         if ( text == null )
245         {
246             throw new NullPointerException( "text" );
247         }
248         if ( mode == null )
249         {
250             throw new NullPointerException( "mode" );
251         }
252 
253         return parse( text, mode, false );
254     }
255 
256     /**
257      * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
258      *
259      * @param text The text to parse.
260      *
261      * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
262      *
263      * @throws NullPointerException if {@code text} is {@code null}.
264      * @throws ParseException if parsing fails.
265      *
266      * @see #valueOf(java.lang.String)
267      */
268     public static JavaIdentifier parse( final String text ) throws ParseException
269     {
270         if ( text == null )
271         {
272             throw new NullPointerException( "text" );
273         }
274 
275         return parse( text, null, false );
276     }
277 
278     /**
279      * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
280      * <p>
281      * Unlike the {@link #parse(String)} method, this method throws an {@code IllegalArgumentException} if parsing
282      * fails.
283      * </p>
284      *
285      * @param text The text to parse.
286      *
287      * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
288      *
289      * @throws NullPointerException if {@code text} is {@code null}.
290      * @throws IllegalArgumentException if parsing fails.
291      *
292      * @see #parse(java.lang.String)
293      */
294     public static JavaIdentifier valueOf( final String text ) throws IllegalArgumentException
295     {
296         if ( text == null )
297         {
298             throw new NullPointerException( "text" );
299         }
300 
301         try
302         {
303             return parse( text, null, true );
304         }
305         catch ( final ParseException e )
306         {
307             throw new AssertionError( e );
308         }
309     }
310 
311     private static JavaIdentifier parse( final String text, final NormalizationMode mode,
312                                          final boolean runtimeException )
313         throws ParseException
314     {
315         Map<CacheKey, JavaIdentifier> map = cache == null ? null : cache.get();
316 
317         if ( map == null )
318         {
319             map = new HashMap<CacheKey, JavaIdentifier>( 128 );
320             cache = new SoftReference<Map<CacheKey, JavaIdentifier>>( map );
321         }
322 
323         synchronized ( map )
324         {
325             final CacheKey key = new CacheKey( text, mode );
326             JavaIdentifier javaIdentifier = map.get( key );
327 
328             if ( javaIdentifier == null )
329             {
330                 javaIdentifier = new JavaIdentifier();
331                 parseIdentifier( javaIdentifier, text, mode, runtimeException );
332 
333                 if ( mode != null )
334                 {
335                     final CacheKey normalizedKey = new CacheKey( javaIdentifier.toString(), mode );
336                     final JavaIdentifier normalizedInstance = map.get( normalizedKey );
337 
338                     if ( normalizedInstance != null )
339                     {
340                         map.put( key, normalizedInstance );
341                         javaIdentifier = normalizedInstance;
342                     }
343                     else
344                     {
345                         map.put( key, javaIdentifier );
346                         map.put( normalizedKey, javaIdentifier );
347                     }
348                 }
349                 else
350                 {
351                     map.put( key, javaIdentifier );
352                 }
353             }
354 
355             return javaIdentifier;
356         }
357     }
358 
359     private static void parseIdentifier( final JavaIdentifier t, final String text, final NormalizationMode mode,
360                                          final boolean runtimeException )
361         throws ParseException
362     {
363         if ( text.length() <= 0 )
364         {
365             if ( runtimeException )
366             {
367                 throw new IllegalArgumentException( getMessage( "invalidEmptyString" ) );
368             }
369             else
370             {
371                 throw new ParseException( getMessage( "invalidEmptyString" ), 0 );
372             }
373         }
374 
375         final StringBuilder identifierBuilder = new StringBuilder( text.length() );
376         final List<Integer> retainedIndices = new ArrayList<Integer>( text.length() );
377         boolean start_of_word = true;
378         int words = 0;
379 
380         for ( int i = 0, j = 1, s0 = text.length(), last_codepoint = -1; i < s0; i++, j++ )
381         {
382             if ( !isWordSeparator( text.codePointAt( i ), mode, identifierBuilder.length() <= 0 ) )
383             {
384                 if ( mode != null )
385                 {
386                     switch ( mode )
387                     {
388                         case CAMEL_CASE:
389                             if ( start_of_word )
390                             {
391                                 identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
392                             }
393                             else if ( last_codepoint > -1 && j < s0
394                                           && isCamelCase( last_codepoint, text.codePointAt( i ),
395                                                           text.codePointAt( j ) ) )
396                             { // Retain camel-case in words.
397                                 identifierBuilder.append( text.charAt( i ) );
398                                 retainedIndices.add( identifierBuilder.length() - 1 );
399                             }
400                             else
401                             {
402                                 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
403                             }
404                             break;
405 
406                         case LOWER_CASE:
407                             if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
408                             {
409                                 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
410                             }
411 
412                             identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
413                             break;
414 
415                         case UPPER_CASE:
416                         case CONSTANT_NAME_CONVENTION:
417                             if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
418                             {
419                                 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
420                             }
421 
422                             identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
423                             break;
424 
425                         case VARIABLE_NAME_CONVENTION:
426                         case METHOD_NAME_CONVENTION:
427                             if ( start_of_word )
428                             {
429                                 identifierBuilder.append( words == 0
430                                                               ? Character.toLowerCase( text.charAt( i ) )
431                                                               : Character.toUpperCase( text.charAt( i ) ) );
432 
433                             }
434                             else if ( last_codepoint > -1 && j < s0
435                                           && isCamelCase( last_codepoint, text.codePointAt( i ),
436                                                           text.codePointAt( j ) ) )
437                             { // Retain camel-case in words.
438                                 identifierBuilder.append( text.charAt( i ) );
439                                 retainedIndices.add( identifierBuilder.length() - 1 );
440                             }
441                             else
442                             {
443                                 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
444                             }
445                             break;
446 
447                         default:
448                             throw new AssertionError( mode );
449 
450                     }
451                 }
452                 else
453                 {
454                     identifierBuilder.append( text.charAt( i ) );
455                 }
456 
457                 last_codepoint = identifierBuilder.codePointAt( identifierBuilder.length() - 1 );
458                 start_of_word = false;
459             }
460             else
461             {
462                 if ( mode != null )
463                 {
464                     if ( !start_of_word )
465                     {
466                         start_of_word = true;
467                         words++;
468                     }
469                 }
470                 else if ( runtimeException )
471                 {
472                     throw new IllegalArgumentException( getMessage( "invalidCharacter", text, text.charAt( i ), i ) );
473                 }
474                 else
475                 {
476                     throw new ParseException( getMessage( "invalidCharacter", text, text.charAt( i ), i ), i );
477                 }
478             }
479         }
480 
481         if ( words > 0 )
482         {
483             // Multiple words - no camel-case retained in any word.
484             toLowerCase( identifierBuilder, retainedIndices );
485         }
486 
487         t.identifier = identifierBuilder.toString();
488 
489         if ( t.identifier.length() <= 0 )
490         {
491             if ( runtimeException )
492             {
493                 throw new IllegalArgumentException( getMessage( "invalidCharacters", text ) );
494             }
495             else
496             {
497                 throw new ParseException( getMessage( "invalidCharacters", text ), 0 );
498             }
499         }
500 
501         if ( JavaLanguage.KEYWORDS.contains( t.identifier )
502                  || JavaLanguage.BOOLEAN_LITERALS.contains( t.identifier )
503                  || JavaLanguage.NULL_LITERAL.equals( t.identifier ) )
504         {
505             if ( mode != null )
506             {
507                 t.identifier = "_" + t.identifier;
508             }
509             else if ( runtimeException )
510             {
511                 throw new IllegalArgumentException( getMessage( "invalidWord", text, t.identifier,
512                                                                 text.indexOf( t.identifier ) ) );
513 
514             }
515             else
516             {
517                 throw new ParseException( getMessage( "invalidWord", text, t.identifier, text.indexOf( t.identifier ) ),
518                                           text.indexOf( t.identifier ) );
519 
520             }
521         }
522     }
523 
524     private static boolean isWordSeparator( final int codePoint, final NormalizationMode mode, final boolean first )
525     {
526         return !( ( first ? Character.isJavaIdentifierStart( codePoint ) : Character.isJavaIdentifierPart( codePoint ) )
527                   && ( mode != null ? Character.isLetterOrDigit( codePoint ) : true ) );
528 
529     }
530 
531     private static boolean isCamelCase( final int left, final int middle, final int right )
532     {
533         return Character.isLowerCase( left ) && Character.isUpperCase( middle ) && Character.isLowerCase( right );
534     }
535 
536     private static void toLowerCase( final StringBuilder stringBuilder, final List<Integer> indices )
537     {
538         for ( int i = 0, s0 = indices.size(); i < s0; i++ )
539         {
540             final int index = indices.get( i );
541             final int cp = Character.toLowerCase( stringBuilder.codePointAt( index ) );
542             stringBuilder.replace( index, index + 1, String.valueOf( Character.toChars( cp ) ) );
543         }
544     }
545 
546     private static String getMessage( final String key, final Object... args )
547     {
548         return MessageFormat.format( ResourceBundle.getBundle(
549             JavaIdentifier.class.getName().replace( '.', '/' ), Locale.getDefault() ).
550             getString( key ), args );
551 
552     }
553 
554     private static final class CacheKey
555     {
556 
557         private final String text;
558 
559         private final NormalizationMode mode;
560 
561         private CacheKey( final String text, final NormalizationMode mode )
562         {
563             super();
564             this.text = text;
565             this.mode = mode;
566         }
567 
568         @Override
569         public int hashCode()
570         {
571             int hc = 23;
572             hc = 37 * hc + this.text.hashCode();
573             hc = 37 * hc + ( this.mode == null ? 0 : this.mode.hashCode() );
574             return hc;
575         }
576 
577         @Override
578         public boolean equals( final Object o )
579         {
580             boolean equal = o == this;
581 
582             if ( !equal && o instanceof CacheKey )
583             {
584                 final CacheKey that = (CacheKey) o;
585                 equal = this.mode == that.mode && this.text.equals( that.text );
586             }
587 
588             return equal;
589         }
590 
591     }
592 
593 }