View Javadoc
1   /*
2    *   Copyright (C) Christian Schulte <cs@schulte.it>, 2012-253
3    *   All rights reserved.
4    *
5    *   Redistribution and use in source and binary forms, with or without
6    *   modification, are permitted provided that the following conditions
7    *   are met:
8    *
9    *     o Redistributions of source code must retain the above copyright
10   *       notice, this list of conditions and the following disclaimer.
11   *
12   *     o Redistributions in binary form must reproduce the above copyright
13   *       notice, this list of conditions and the following disclaimer in
14   *       the documentation and/or other materials provided with the
15   *       distribution.
16   *
17   *   THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
18   *   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
19   *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
20   *   THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT,
21   *   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22   *   NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23   *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24   *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25   *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26   *   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27   *
28   *   $JOMC: JavaIdentifier.java 5043 2015-05-27 07:03:39Z schulte $
29   *
30   */
31  package org.jomc.model;
32  
33  import java.io.Serializable;
34  import java.lang.ref.Reference;
35  import java.lang.ref.SoftReference;
36  import java.text.MessageFormat;
37  import java.text.ParseException;
38  import java.util.ArrayList;
39  import java.util.HashMap;
40  import java.util.List;
41  import java.util.Locale;
42  import java.util.Map;
43  import java.util.ResourceBundle;
44  
45  /**
46   * Data type of a Java identifier.
47   * <p>
48   * This class provides support for parsing and normalizing text to java identifiers as specified in the Java
49   * Language Specification - Java SE 7 Edition - Chapter 3.8ff.
50   * </p>
51   * <p>
52   * <i>Please note that this class will move to package {@code org.jomc.util} in JOMC 2.0.</i>
53   * </p>
54   *
55   * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
56   * @version $JOMC: JavaIdentifier.java 5043 2015-05-27 07:03:39Z schulte $
57   * @see #normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode)
58   * @see #parse(java.lang.String)
59   * @see #valueOf(java.lang.String)
60   * @since 1.4
61   */
62  public final class JavaIdentifier implements CharSequence, Serializable
63  {
64  
65      /**
66       * Normalization modes.
67       *
68       * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
69       * @version $JOMC: JavaIdentifier.java 5043 2015-05-27 07:03:39Z schulte $
70       * @since 1.4
71       * @see JavaIdentifier#normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode)
72       */
73      public static enum NormalizationMode
74      {
75  
76          /**
77           * Mode to normalize by compacting words using camel-case.
78           */
79          CAMEL_CASE,
80          /**
81           * Mode to normalize by separating words using '_' and by converting all characters to lower-case.
82           */
83          LOWER_CASE,
84          /**
85           * Mode to normalize by separating words using '_' and by converting all characters to upper-case.
86           */
87          UPPER_CASE,
88          /**
89           * Mode to normalize according to the
90           * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Constants</cite>.
91           * <blockquote>
92           * The names of variables declared class constants and of ANSI constants should be all uppercase with words
93           * separated by underscores ("_"). (ANSI constants should be avoided, for ease of debugging.)
94           * </blockquote>
95           */
96          CONSTANT_NAME_CONVENTION,
97          /**
98           * Mode to normalize according to the
99           * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Methods</cite>.
100          * <blockquote>
101          * Methods should be verbs, in mixed case with the first letter lowercase, with the first letter of each
102          * internal word capitalized.
103          * </blockquote>
104          */
105         METHOD_NAME_CONVENTION,
106         /**
107          * Mode to normalize according to the
108          * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Variables</cite>.
109          * <blockquote>
110          * Except for variables, all instance, class, and class constants are in mixed case with a lowercase first
111          * letter. Internal words start with capital letters. Variable names should not start with underscore _ or
112          * dollar sign $ characters, even though both are allowed. Variable names should be short yet meaningful. The
113          * choice of a variable name should be mnemonic - that is - designed to indicate to the casual observer the
114          * intent of its use. One-character variable names should be avoided except for temporary "throwaway" variables.
115          * Common names for temporary variables are i, j, k, m, and n for integers; c, d, and e for characters.
116          * </blockquote>
117          */
118         VARIABLE_NAME_CONVENTION
119 
120     }
121 
122     /**
123      * The value of the instance.
124      *
125      * @serial
126      */
127     private String identifier;
128 
129     /**
130      * Cached instances.
131      */
132     private static volatile Reference<Map<CacheKey, JavaIdentifier>> cache;
133 
134     /**
135      * Serial version UID for backwards compatibility with 1.4.x object streams.
136      */
137     private static final long serialVersionUID = 7600377999055800720L;
138 
139     /**
140      * Underscore character.
141      */
142     private static final int UNDERSCORE_CODEPOINT = Character.codePointAt( "_", 0 );
143 
144     /**
145      * Creates a new {@code JavaIdentifier} instance.
146      */
147     private JavaIdentifier()
148     {
149         super();
150     }
151 
152     /**
153      * Returns the length of this character sequence.
154      *
155      * @return The number of {@code char}s in this sequence.
156      */
157     public int length()
158     {
159         return this.identifier.length();
160     }
161 
162     /**
163      * Returns the {@code char} value at a given index.
164      *
165      * @param index The index of the {@code char} value to return.
166      *
167      * @return The {@code char} value at {@code index}.
168      *
169      * @throws IndexOutOfBoundsException if {@code index} is negative or not less than the length of the sequence.
170      */
171     public char charAt( final int index )
172     {
173         return this.identifier.charAt( index );
174     }
175 
176     /**
177      * Returns a new {@code CharSequence} that is a subsequence of this sequence.
178      *
179      * @param start The start index, inclusive.
180      * @param end The end index, exclusive.
181      *
182      * @return The sequence of characters starting at index {@code start} up to index {@code end - 1}.
183      *
184      * @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, if {@code end} is greater than
185      * the length of the sequence, or if {@code start} is greater than {@code end}.
186      */
187     public CharSequence subSequence( final int start, final int end )
188     {
189         return this.identifier.subSequence( start, end );
190     }
191 
192     /**
193      * Returns a string containing the characters in this sequence in the same order as this sequence. The length of the
194      * string will be the length of this sequence.
195      *
196      * @return A string consisting of exactly this sequence of characters.
197      */
198     @Override
199     public String toString()
200     {
201         return this.identifier;
202     }
203 
204     /**
205      * Returns the hash-code value of the object.
206      *
207      * @return The hash-code value of the object.
208      */
209     @Override
210     public int hashCode()
211     {
212         return this.identifier.hashCode();
213     }
214 
215     /**
216      * Tests whether some other object is equal to the object.
217      *
218      * @param o The object to test.
219      *
220      * @return {@code true}, if {@code o} is an instance of the class of the object and its string value is equal to the
221      * string value of the object.
222      */
223     @Override
224     public boolean equals( final Object o )
225     {
226         boolean equal = o == this;
227 
228         if ( !equal && o instanceof JavaIdentifier )
229         {
230             equal = this.toString().equals( o.toString() );
231         }
232 
233         return equal;
234     }
235 
236     /**
237      * Normalizes text from the beginning of the given string to produce a {@code JavaIdentifier}.
238      *
239      * @param text The text to normalize.
240      * @param mode The normalization to apply.
241      *
242      * @return A {@code JavaIdentifier} instance constructed by normalizing {@code text} according to {@code mode}.
243      *
244      * @throws NullPointerException if {@code text} or {@code mode} is {@code null}.
245      * @throws ParseException if normalization fails.
246      */
247     public static JavaIdentifier normalize( final String text, final NormalizationMode mode ) throws ParseException
248     {
249         if ( text == null )
250         {
251             throw new NullPointerException( "text" );
252         }
253         if ( mode == null )
254         {
255             throw new NullPointerException( "mode" );
256         }
257 
258         return parse( text, mode, false );
259     }
260 
261     /**
262      * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
263      *
264      * @param text The text to parse.
265      *
266      * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
267      *
268      * @throws NullPointerException if {@code text} is {@code null}.
269      * @throws ParseException if parsing fails.
270      *
271      * @see #valueOf(java.lang.String)
272      */
273     public static JavaIdentifier parse( final String text ) throws ParseException
274     {
275         if ( text == null )
276         {
277             throw new NullPointerException( "text" );
278         }
279 
280         return parse( text, null, false );
281     }
282 
283     /**
284      * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
285      * <p>
286      * Unlike the {@link #parse(String)} method, this method throws an {@code IllegalArgumentException} if parsing
287      * fails.
288      * </p>
289      *
290      * @param text The text to parse.
291      *
292      * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
293      *
294      * @throws NullPointerException if {@code text} is {@code null}.
295      * @throws IllegalArgumentException if parsing fails.
296      *
297      * @see #parse(java.lang.String)
298      */
299     public static JavaIdentifier valueOf( final String text ) throws IllegalArgumentException
300     {
301         if ( text == null )
302         {
303             throw new NullPointerException( "text" );
304         }
305 
306         try
307         {
308             return parse( text, null, true );
309         }
310         catch ( final ParseException e )
311         {
312             throw new AssertionError( e );
313         }
314     }
315 
316     private static JavaIdentifier parse( final String text, final NormalizationMode mode,
317                                          final boolean runtimeException )
318         throws ParseException
319     {
320         Map<CacheKey, JavaIdentifier> map = cache == null ? null : cache.get();
321 
322         if ( map == null )
323         {
324             map = new HashMap<CacheKey, JavaIdentifier>( 128 );
325             cache = new SoftReference<Map<CacheKey, JavaIdentifier>>( map );
326         }
327 
328         synchronized ( map )
329         {
330             final CacheKey key = new CacheKey( text, mode );
331             JavaIdentifier javaIdentifier = map.get( key );
332 
333             if ( javaIdentifier == null )
334             {
335                 javaIdentifier = new JavaIdentifier();
336                 parseIdentifier( javaIdentifier, text, mode, runtimeException );
337 
338                 if ( mode != null )
339                 {
340                     final CacheKey normalizedKey = new CacheKey( javaIdentifier.toString(), mode );
341                     final JavaIdentifier normalizedInstance = map.get( normalizedKey );
342 
343                     if ( normalizedInstance != null )
344                     {
345                         map.put( key, normalizedInstance );
346                         javaIdentifier = normalizedInstance;
347                     }
348                     else
349                     {
350                         map.put( key, javaIdentifier );
351                         map.put( normalizedKey, javaIdentifier );
352                     }
353                 }
354                 else
355                 {
356                     map.put( key, javaIdentifier );
357                 }
358             }
359 
360             return javaIdentifier;
361         }
362     }
363 
364     private static void parseIdentifier( final JavaIdentifier t, final String text, final NormalizationMode mode,
365                                          final boolean runtimeException )
366         throws ParseException
367     {
368         if ( text.length() <= 0 )
369         {
370             if ( runtimeException )
371             {
372                 throw new IllegalArgumentException( getMessage( "invalidEmptyString" ) );
373             }
374             else
375             {
376                 throw new ParseException( getMessage( "invalidEmptyString" ), 0 );
377             }
378         }
379 
380         final StringBuilder identifierBuilder = new StringBuilder( text.length() );
381         final List<Integer> retainedIndices = new ArrayList<Integer>( text.length() );
382         boolean start_of_word = true;
383         int words = 0;
384 
385         for ( int i = 0, j = 1, s0 = text.length(), last_codepoint = -1; i < s0; i++, j++ )
386         {
387             if ( !isWordSeparator( text.codePointAt( i ), mode, identifierBuilder.length() <= 0 ) )
388             {
389                 if ( mode != null )
390                 {
391                     switch ( mode )
392                     {
393                         case CAMEL_CASE:
394                             if ( start_of_word )
395                             {
396                                 identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
397                             }
398                             else if ( last_codepoint > -1 && j < s0
399                                           && isCamelCase( last_codepoint, text.codePointAt( i ),
400                                                           text.codePointAt( j ) ) )
401                             { // Retain camel-case in words.
402                                 identifierBuilder.append( text.charAt( i ) );
403                                 retainedIndices.add( identifierBuilder.length() - 1 );
404                             }
405                             else
406                             {
407                                 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
408                             }
409                             break;
410 
411                         case LOWER_CASE:
412                             if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
413                             {
414                                 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
415                             }
416 
417                             identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
418                             break;
419 
420                         case UPPER_CASE:
421                         case CONSTANT_NAME_CONVENTION:
422                             if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
423                             {
424                                 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
425                             }
426 
427                             identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
428                             break;
429 
430                         case VARIABLE_NAME_CONVENTION:
431                         case METHOD_NAME_CONVENTION:
432                             if ( start_of_word )
433                             {
434                                 identifierBuilder.append( words == 0
435                                                               ? Character.toLowerCase( text.charAt( i ) )
436                                                               : Character.toUpperCase( text.charAt( i ) ) );
437 
438                             }
439                             else if ( last_codepoint > -1 && j < s0
440                                           && isCamelCase( last_codepoint, text.codePointAt( i ),
441                                                           text.codePointAt( j ) ) )
442                             { // Retain camel-case in words.
443                                 identifierBuilder.append( text.charAt( i ) );
444                                 retainedIndices.add( identifierBuilder.length() - 1 );
445                             }
446                             else
447                             {
448                                 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
449                             }
450                             break;
451 
452                         default:
453                             throw new AssertionError( mode );
454 
455                     }
456                 }
457                 else
458                 {
459                     identifierBuilder.append( text.charAt( i ) );
460                 }
461 
462                 last_codepoint = identifierBuilder.codePointAt( identifierBuilder.length() - 1 );
463                 start_of_word = false;
464             }
465             else
466             {
467                 if ( mode != null )
468                 {
469                     if ( !start_of_word )
470                     {
471                         start_of_word = true;
472                         words++;
473                     }
474                 }
475                 else if ( runtimeException )
476                 {
477                     throw new IllegalArgumentException( getMessage( "invalidCharacter", text, text.charAt( i ), i ) );
478                 }
479                 else
480                 {
481                     throw new ParseException( getMessage( "invalidCharacter", text, text.charAt( i ), i ), i );
482                 }
483             }
484         }
485 
486         if ( words > 0 )
487         {
488             // Multiple words - no camel-case retained in any word.
489             toLowerCase( identifierBuilder, retainedIndices );
490         }
491 
492         t.identifier = identifierBuilder.toString();
493 
494         if ( t.identifier.length() <= 0 )
495         {
496             if ( runtimeException )
497             {
498                 throw new IllegalArgumentException( getMessage( "invalidCharacters", text ) );
499             }
500             else
501             {
502                 throw new ParseException( getMessage( "invalidCharacters", text ), 0 );
503             }
504         }
505 
506         if ( JavaLanguage.KEYWORDS.contains( t.identifier )
507                  || JavaLanguage.BOOLEAN_LITERALS.contains( t.identifier )
508                  || JavaLanguage.NULL_LITERAL.equals( t.identifier ) )
509         {
510             if ( mode != null )
511             {
512                 t.identifier = "_" + t.identifier;
513             }
514             else if ( runtimeException )
515             {
516                 throw new IllegalArgumentException( getMessage( "invalidWord", text, t.identifier,
517                                                                 text.indexOf( t.identifier ) ) );
518 
519             }
520             else
521             {
522                 throw new ParseException( getMessage( "invalidWord", text, t.identifier, text.indexOf( t.identifier ) ),
523                                           text.indexOf( t.identifier ) );
524 
525             }
526         }
527     }
528 
529     private static boolean isWordSeparator( final int codePoint, final NormalizationMode mode, final boolean first )
530     {
531         return !( ( first ? Character.isJavaIdentifierStart( codePoint ) : Character.isJavaIdentifierPart( codePoint ) )
532                   && ( mode != null ? Character.isLetterOrDigit( codePoint ) : true ) );
533 
534     }
535 
536     private static boolean isCamelCase( final int left, final int middle, final int right )
537     {
538         return Character.isLowerCase( left ) && Character.isUpperCase( middle ) && Character.isLowerCase( right );
539     }
540 
541     private static void toLowerCase( final StringBuilder stringBuilder, final List<Integer> indices )
542     {
543         for ( int i = 0, s0 = indices.size(); i < s0; i++ )
544         {
545             final int index = indices.get( i );
546             final int cp = Character.toLowerCase( stringBuilder.codePointAt( index ) );
547             stringBuilder.replace( index, index + 1, String.valueOf( Character.toChars( cp ) ) );
548         }
549     }
550 
551     private static String getMessage( final String key, final Object... args )
552     {
553         return MessageFormat.format( ResourceBundle.getBundle(
554             JavaIdentifier.class.getName().replace( '.', '/' ), Locale.getDefault() ).
555             getString( key ), args );
556 
557     }
558 
559     private static final class CacheKey
560     {
561 
562         private final String text;
563 
564         private final NormalizationMode mode;
565 
566         private CacheKey( final String text, final NormalizationMode mode )
567         {
568             super();
569             this.text = text;
570             this.mode = mode;
571         }
572 
573         @Override
574         public int hashCode()
575         {
576             int hc = 23;
577             hc = 37 * hc + this.text.hashCode();
578             hc = 37 * hc + ( this.mode == null ? 0 : this.mode.hashCode() );
579             return hc;
580         }
581 
582         @Override
583         public boolean equals( final Object o )
584         {
585             boolean equal = o == this;
586 
587             if ( !equal && o instanceof CacheKey )
588             {
589                 final CacheKey that = (CacheKey) o;
590                 equal = this.mode == that.mode && this.text.equals( that.text );
591             }
592 
593             return equal;
594         }
595 
596     }
597 
598 }