View Javadoc

1   /*
2    *   Copyright (C) Christian Schulte, 2012-253
3    *   All rights reserved.
4    *
5    *   Redistribution and use in source and binary forms, with or without
6    *   modification, are permitted provided that the following conditions
7    *   are met:
8    *
9    *     o Redistributions of source code must retain the above copyright
10   *       notice, this list of conditions and the following disclaimer.
11   *
12   *     o Redistributions in binary form must reproduce the above copyright
13   *       notice, this list of conditions and the following disclaimer in
14   *       the documentation and/or other materials provided with the
15   *       distribution.
16   *
17   *   THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
18   *   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
19   *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
20   *   THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT,
21   *   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22   *   NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23   *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24   *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25   *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26   *   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27   *
28   *   $JOMC: JavaIdentifier.java 4804 2013-04-22 05:07:33Z schulte $
29   *
30   */
31  package org.jomc.model;
32  
33  import java.io.Serializable;
34  import java.lang.ref.Reference;
35  import java.lang.ref.SoftReference;
36  import java.text.MessageFormat;
37  import java.text.ParseException;
38  import java.util.ArrayList;
39  import java.util.HashMap;
40  import java.util.List;
41  import java.util.Locale;
42  import java.util.Map;
43  import java.util.ResourceBundle;
44  
45  /**
46   * Data type of a Java identifier.
47   * <p>This class provides support for parsing and normalizing text to java identifiers as specified in the Java
48   * Language Specification - Java SE 7 Edition - Chapter 3.8ff.</p>
49   *
50   * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
51   * @version $JOMC: JavaIdentifier.java 4804 2013-04-22 05:07:33Z schulte $
52   * @see #normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode)
53   * @see #parse(java.lang.String)
54   * @see #valueOf(java.lang.String)
55   * @since 1.4
56   */
57  public final class JavaIdentifier implements CharSequence, Serializable
58  {
59  
60      /**
61       * Normalization modes.
62       *
63       * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
64       * @version $JOMC: JavaIdentifier.java 4804 2013-04-22 05:07:33Z schulte $
65       * @since 1.4
66       * @see JavaIdentifier#normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode)
67       */
68      public static enum NormalizationMode
69      {
70  
71          /** Mode to normalize by compacting words using camel-case. */
72          CAMEL_CASE,
73          /** Mode to normalize by separating words using '_' and by converting all characters to lower-case. */
74          LOWER_CASE,
75          /** Mode to normalize by separating words using '_' and by converting all characters to upper-case. */
76          UPPER_CASE,
77          /**
78           * Mode to normalize according to the
79           * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Constants</cite>.
80           * <blockquote>
81           * The names of variables declared class constants and of ANSI constants should be all uppercase with words
82           * separated by underscores ("_"). (ANSI constants should be avoided, for ease of debugging.)
83           * </blockquote>
84           */
85          CONSTANT_NAME_CONVENTION,
86          /**
87           * Mode to normalize according to the
88           * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Methods</cite>.
89           * <blockquote>
90           * Methods should be verbs, in mixed case with the first letter lowercase, with the first letter of each
91           * internal word capitalized.
92           * </blockquote>
93           */
94          METHOD_NAME_CONVENTION,
95          /**
96           * Mode to normalize according to the
97           * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Variables</cite>.
98           * <blockquote>
99           * Except for variables, all instance, class, and class constants are in mixed case with a lowercase first
100          * letter. Internal words start with capital letters. Variable names should not start with underscore _ or
101          * dollar sign $ characters, even though both are allowed. Variable names should be short yet meaningful. The
102          * choice of a variable name should be mnemonic - that is - designed to indicate to the casual observer the
103          * intent of its use. One-character variable names should be avoided except for temporary "throwaway" variables.
104          * Common names for temporary variables are i, j, k, m, and n for integers; c, d, and e for characters.
105          * </blockquote>
106          */
107         VARIABLE_NAME_CONVENTION
108 
109     }
110 
111     /**
112      * The value of the instance.
113      * @serial
114      */
115     private String identifier;
116 
117     /** Cached instances. */
118     private static volatile Reference<Map<CacheKey, JavaIdentifier>> cache;
119 
120     /** Serial version UID for backwards compatibility with 1.4.x object streams. */
121     private static final long serialVersionUID = 7600377999055800720L;
122 
123     /** Underscore character. */
124     private static final int UNDERSCORE_CODEPOINT = Character.codePointAt( "_", 0 );
125 
126     /** Creates a new {@code JavaIdentifier} instance. */
127     private JavaIdentifier()
128     {
129         super();
130     }
131 
132     /**
133      * Returns the length of this character sequence.
134      *
135      * @return The number of {@code char}s in this sequence.
136      */
137     public int length()
138     {
139         return this.identifier.length();
140     }
141 
142     /**
143      * Returns the {@code char} value at a given index.
144      *
145      * @param index The index of the {@code char} value to return.
146      *
147      * @return The {@code char} value at {@code index}.
148      *
149      * @throws IndexOutOfBoundsException if {@code index} is negative or not less than the length of the sequence.
150      */
151     public char charAt( final int index )
152     {
153         return this.identifier.charAt( index );
154     }
155 
156     /**
157      * Returns a new {@code CharSequence} that is a subsequence of this sequence.
158      *
159      * @param start The start index, inclusive.
160      * @param end The end index, exclusive.
161      *
162      * @return The sequence of characters starting at index {@code start} up to index {@code end - 1}.
163      *
164      * @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, if {@code end} is greater than
165      * the length of the sequence, or if {@code start} is greater than {@code end}.
166      */
167     public CharSequence subSequence( final int start, final int end )
168     {
169         return this.identifier.subSequence( start, end );
170     }
171 
172     /**
173      * Returns a string containing the characters in this sequence in the same order as this sequence. The length of the
174      * string will be the length of this sequence.
175      *
176      * @return A string consisting of exactly this sequence of characters.
177      */
178     @Override
179     public String toString()
180     {
181         return this.identifier;
182     }
183 
184     /**
185      * Returns the hash-code value of the object.
186      *
187      * @return The hash-code value of the object.
188      */
189     @Override
190     public int hashCode()
191     {
192         return this.identifier.hashCode();
193     }
194 
195     /**
196      * Tests whether some other object is equal to the object.
197      *
198      * @param o The object to test.
199      *
200      * @return {@code true}, if {@code o} is an instance of the class of the object and its string value is equal to the
201      * string value of the object.
202      */
203     @Override
204     public boolean equals( final Object o )
205     {
206         boolean equal = o == this;
207 
208         if ( !equal && o instanceof JavaIdentifier )
209         {
210             equal = this.toString().equals( o.toString() );
211         }
212 
213         return equal;
214     }
215 
216     /**
217      * Normalizes text from the beginning of the given string to produce a {@code JavaIdentifier}.
218      *
219      * @param text The text to normalize.
220      * @param mode The normalization to apply.
221      *
222      * @return A {@code JavaIdentifier} instance constructed by normalizing {@code text} according to {@code mode}.
223      *
224      * @throws NullPointerException if {@code text} or {@code mode} is {@code null}.
225      * @throws ParseException if normalization fails.
226      */
227     public static JavaIdentifier normalize( final String text, final NormalizationMode mode ) throws ParseException
228     {
229         if ( text == null )
230         {
231             throw new NullPointerException( "text" );
232         }
233         if ( mode == null )
234         {
235             throw new NullPointerException( "mode" );
236         }
237 
238         return parse( text, mode, false );
239     }
240 
241     /**
242      * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
243      *
244      * @param text The text to parse.
245      *
246      * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
247      *
248      * @throws NullPointerException if {@code text} is {@code null}.
249      * @throws ParseException if parsing fails.
250      *
251      * @see #valueOf(java.lang.String)
252      */
253     public static JavaIdentifier parse( final String text ) throws ParseException
254     {
255         if ( text == null )
256         {
257             throw new NullPointerException( "text" );
258         }
259 
260         return parse( text, null, false );
261     }
262 
263     /**
264      * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
265      * <p>Unlike the {@link #parse(String)} method, this method throws an {@code IllegalArgumentException} if parsing
266      * fails.</p>
267      *
268      * @param text The text to parse.
269      *
270      * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
271      *
272      * @throws NullPointerException if {@code text} is {@code null}.
273      * @throws IllegalArgumentException if parsing fails.
274      *
275      * @see #parse(java.lang.String)
276      */
277     public static JavaIdentifier valueOf( final String text ) throws IllegalArgumentException
278     {
279         if ( text == null )
280         {
281             throw new NullPointerException( "text" );
282         }
283 
284         try
285         {
286             return parse( text, null, true );
287         }
288         catch ( final ParseException e )
289         {
290             throw new AssertionError( e );
291         }
292     }
293 
294     private static JavaIdentifier parse( final String text, final NormalizationMode mode,
295                                          final boolean runtimeException )
296         throws ParseException
297     {
298         Map<CacheKey, JavaIdentifier> map = cache == null ? null : cache.get();
299 
300         if ( map == null )
301         {
302             map = new HashMap<CacheKey, JavaIdentifier>( 128 );
303             cache = new SoftReference<Map<CacheKey, JavaIdentifier>>( map );
304         }
305 
306         synchronized ( map )
307         {
308             final CacheKey key = new CacheKey( text, mode );
309             JavaIdentifier javaIdentifier = map.get( key );
310 
311             if ( javaIdentifier == null )
312             {
313                 javaIdentifier = new JavaIdentifier();
314                 parseIdentifier( javaIdentifier, text, mode, runtimeException );
315 
316                 if ( mode != null )
317                 {
318                     final CacheKey normalizedKey = new CacheKey( javaIdentifier.toString(), mode );
319                     final JavaIdentifier normalizedInstance = map.get( normalizedKey );
320 
321                     if ( normalizedInstance != null )
322                     {
323                         map.put( key, normalizedInstance );
324                         javaIdentifier = normalizedInstance;
325                     }
326                     else
327                     {
328                         map.put( key, javaIdentifier );
329                         map.put( normalizedKey, javaIdentifier );
330                     }
331                 }
332                 else
333                 {
334                     map.put( key, javaIdentifier );
335                 }
336             }
337 
338             return javaIdentifier;
339         }
340     }
341 
342     private static void parseIdentifier( final JavaIdentifier t, final String text, final NormalizationMode mode,
343                                          final boolean runtimeException )
344         throws ParseException
345     {
346         if ( text.length() <= 0 )
347         {
348             if ( runtimeException )
349             {
350                 throw new IllegalArgumentException( getMessage( "invalidEmptyString" ) );
351             }
352             else
353             {
354                 throw new ParseException( getMessage( "invalidEmptyString" ), 0 );
355             }
356         }
357 
358         final StringBuilder identifierBuilder = new StringBuilder( text.length() );
359         final List<Integer> retainedIndices = new ArrayList<Integer>( text.length() );
360         boolean start_of_word = true;
361         int words = 0;
362 
363         for ( int i = 0, j = 1, s0 = text.length(), last_codepoint = -1; i < s0; i++, j++ )
364         {
365             if ( !isWordSeparator( text.codePointAt( i ), mode, identifierBuilder.length() <= 0 ) )
366             {
367                 if ( mode != null )
368                 {
369                     switch ( mode )
370                     {
371                         case CAMEL_CASE:
372                             if ( start_of_word )
373                             {
374                                 identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
375                             }
376                             else if ( last_codepoint > -1 && j < s0
377                                       && isCamelCase( last_codepoint, text.codePointAt( i ), text.codePointAt( j ) ) )
378                             { // Retain camel-case in words.
379                                 identifierBuilder.append( text.charAt( i ) );
380                                 retainedIndices.add( identifierBuilder.length() - 1 );
381                             }
382                             else
383                             {
384                                 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
385                             }
386                             break;
387 
388                         case LOWER_CASE:
389                             if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
390                             {
391                                 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
392                             }
393 
394                             identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
395                             break;
396 
397                         case UPPER_CASE:
398                         case CONSTANT_NAME_CONVENTION:
399                             if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
400                             {
401                                 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
402                             }
403 
404                             identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
405                             break;
406 
407                         case VARIABLE_NAME_CONVENTION:
408                         case METHOD_NAME_CONVENTION:
409                             if ( start_of_word )
410                             {
411                                 identifierBuilder.append( words == 0 ? Character.toLowerCase( text.charAt( i ) )
412                                                           : Character.toUpperCase( text.charAt( i ) ) );
413 
414                             }
415                             else if ( last_codepoint > -1 && j < s0
416                                       && isCamelCase( last_codepoint, text.codePointAt( i ), text.codePointAt( j ) ) )
417                             { // Retain camel-case in words.
418                                 identifierBuilder.append( text.charAt( i ) );
419                                 retainedIndices.add( identifierBuilder.length() - 1 );
420                             }
421                             else
422                             {
423                                 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
424                             }
425                             break;
426 
427                         default:
428                             throw new AssertionError( mode );
429 
430                     }
431                 }
432                 else
433                 {
434                     identifierBuilder.append( text.charAt( i ) );
435                 }
436 
437                 last_codepoint = identifierBuilder.codePointAt( identifierBuilder.length() - 1 );
438                 start_of_word = false;
439             }
440             else
441             {
442                 if ( mode != null )
443                 {
444                     if ( !start_of_word )
445                     {
446                         start_of_word = true;
447                         words++;
448                     }
449                 }
450                 else if ( runtimeException )
451                 {
452                     throw new IllegalArgumentException( getMessage( "invalidCharacter", text, text.charAt( i ), i ) );
453                 }
454                 else
455                 {
456                     throw new ParseException( getMessage( "invalidCharacter", text, text.charAt( i ), i ), i );
457                 }
458             }
459         }
460 
461         if ( words > 0 )
462         {
463             // Multiple words - no camel-case retained in any word.
464             toLowerCase( identifierBuilder, retainedIndices );
465         }
466 
467         t.identifier = identifierBuilder.toString();
468 
469         if ( t.identifier.length() <= 0 )
470         {
471             if ( runtimeException )
472             {
473                 throw new IllegalArgumentException( getMessage( "invalidCharacters", text ) );
474             }
475             else
476             {
477                 throw new ParseException( getMessage( "invalidCharacters", text ), 0 );
478             }
479         }
480 
481         if ( JavaLanguage.KEYWORDS.contains( t.identifier )
482              || JavaLanguage.BOOLEAN_LITERALS.contains( t.identifier )
483              || JavaLanguage.NULL_LITERAL.equals( t.identifier ) )
484         {
485             if ( mode != null )
486             {
487                 t.identifier = "_" + t.identifier;
488             }
489             else if ( runtimeException )
490             {
491                 throw new IllegalArgumentException( getMessage( "invalidWord", text, t.identifier,
492                                                                 text.indexOf( t.identifier ) ) );
493 
494             }
495             else
496             {
497                 throw new ParseException( getMessage( "invalidWord", text, t.identifier, text.indexOf( t.identifier ) ),
498                                           text.indexOf( t.identifier ) );
499 
500             }
501         }
502     }
503 
504     private static boolean isWordSeparator( final int codePoint, final NormalizationMode mode, final boolean first )
505     {
506         return !( ( first ? Character.isJavaIdentifierStart( codePoint ) : Character.isJavaIdentifierPart( codePoint ) )
507                   && ( mode != null ? Character.isLetterOrDigit( codePoint ) : true ) );
508 
509     }
510 
511     private static boolean isCamelCase( final int left, final int middle, final int right )
512     {
513         return Character.isLowerCase( left ) && Character.isUpperCase( middle ) && Character.isLowerCase( right );
514     }
515 
516     private static void toLowerCase( final StringBuilder stringBuilder, final List<Integer> indices )
517     {
518         for ( int i = 0, s0 = indices.size(); i < s0; i++ )
519         {
520             final int index = indices.get( i );
521             final int cp = Character.toLowerCase( stringBuilder.codePointAt( index ) );
522             stringBuilder.replace( index, index + 1, String.valueOf( Character.toChars( cp ) ) );
523         }
524     }
525 
526     private static String getMessage( final String key, final Object... args )
527     {
528         return MessageFormat.format( ResourceBundle.getBundle(
529             JavaIdentifier.class.getName().replace( '.', '/' ), Locale.getDefault() ).
530             getString( key ), args );
531 
532     }
533 
534     private static final class CacheKey
535     {
536 
537         private final String text;
538 
539         private final NormalizationMode mode;
540 
541         private CacheKey( final String text, final NormalizationMode mode )
542         {
543             super();
544             this.text = text;
545             this.mode = mode;
546         }
547 
548         @Override
549         public int hashCode()
550         {
551             int hc = 23;
552             hc = 37 * hc + this.text.hashCode();
553             hc = 37 * hc + ( this.mode == null ? 0 : this.mode.hashCode() );
554             return hc;
555         }
556 
557         @Override
558         public boolean equals( final Object o )
559         {
560             boolean equal = o == this;
561 
562             if ( !equal && o instanceof CacheKey )
563             {
564                 final CacheKey that = (CacheKey) o;
565                 equal = this.mode == that.mode && this.text.equals( that.text );
566             }
567 
568             return equal;
569         }
570 
571     }
572 
573 }