1 /*
2 * Copyright (C) 2012 Christian Schulte <cs@schulte.it>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * o Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * o Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
18 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
19 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $JOMC: JavaIdentifier.java 5106 2016-04-04 19:56:25Z schulte $
29 *
30 */
31 package org.jomc.jls;
32
33 import java.io.Serializable;
34 import java.lang.ref.Reference;
35 import java.lang.ref.SoftReference;
36 import java.text.MessageFormat;
37 import java.text.ParseException;
38 import java.util.ArrayList;
39 import java.util.HashMap;
40 import java.util.List;
41 import java.util.Locale;
42 import java.util.Map;
43 import java.util.ResourceBundle;
44
45 /**
46 * Data type of a Java identifier.
47 * <p>
48 * This class provides support for parsing and normalizing text to java identifiers as specified in the Java
49 * Language Specification - Java SE 7 Edition - Chapter 3.8ff.
50 * </p>
51 *
52 * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
53 * @version $JOMC: JavaIdentifier.java 5106 2016-04-04 19:56:25Z schulte $
54 * @see #normalize(java.lang.String, org.jomc.jls.JavaIdentifier.NormalizationMode)
55 * @see #parse(java.lang.String)
56 * @see #valueOf(java.lang.String)
57 */
58 public final class JavaIdentifier implements CharSequence, Serializable
59 {
60
61 /**
62 * Normalization modes.
63 *
64 * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
65 * @version $JOMC: JavaIdentifier.java 5106 2016-04-04 19:56:25Z schulte $
66 * @see JavaIdentifier#normalize(java.lang.String, org.jomc.jls.JavaIdentifier.NormalizationMode)
67 */
68 public static enum NormalizationMode
69 {
70
71 /**
72 * Mode to normalize by compacting words using camel-case.
73 */
74 CAMEL_CASE,
75 /**
76 * Mode to normalize by separating words using '_' and by converting all characters to lower-case.
77 */
78 LOWER_CASE,
79 /**
80 * Mode to normalize by separating words using '_' and by converting all characters to upper-case.
81 */
82 UPPER_CASE,
83 /**
84 * Mode to normalize according to the
85 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Constants</cite>.
86 * <blockquote>
87 * The names of variables declared class constants and of ANSI constants should be all uppercase with words
88 * separated by underscores ("_"). (ANSI constants should be avoided, for ease of debugging.)
89 * </blockquote>
90 */
91 CONSTANT_NAME_CONVENTION,
92 /**
93 * Mode to normalize according to the
94 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Methods</cite>.
95 * <blockquote>
96 * Methods should be verbs, in mixed case with the first letter lowercase, with the first letter of each
97 * internal word capitalized.
98 * </blockquote>
99 */
100 METHOD_NAME_CONVENTION,
101 /**
102 * Mode to normalize according to the
103 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Variables</cite>.
104 * <blockquote>
105 * Except for variables, all instance, class, and class constants are in mixed case with a lowercase first
106 * letter. Internal words start with capital letters. Variable names should not start with underscore _ or
107 * dollar sign $ characters, even though both are allowed. Variable names should be short yet meaningful. The
108 * choice of a variable name should be mnemonic - that is - designed to indicate to the casual observer the
109 * intent of its use. One-character variable names should be avoided except for temporary "throwaway" variables.
110 * Common names for temporary variables are i, j, k, m, and n for integers; c, d, and e for characters.
111 * </blockquote>
112 */
113 VARIABLE_NAME_CONVENTION
114
115 }
116
117 /**
118 * The value of the instance.
119 *
120 * @serial
121 */
122 private String identifier;
123
124 /**
125 * Cached instances.
126 */
127 private static volatile Reference<Map<CacheKey, JavaIdentifier>> cache;
128
129 /**
130 * Serial version UID for backwards compatibility with 7.x object streams.
131 */
132 private static final long serialVersionUID = 7639783770152985285L;
133
134 /**
135 * Underscore character.
136 */
137 private static final int UNDERSCORE_CODEPOINT = Character.codePointAt( "_", 0 );
138
139 /**
140 * Creates a new {@code JavaIdentifier} instance.
141 */
142 private JavaIdentifier()
143 {
144 super();
145 }
146
147 /**
148 * Returns the length of this character sequence.
149 *
150 * @return The number of {@code char}s in this sequence.
151 */
152 public int length()
153 {
154 return this.identifier.length();
155 }
156
157 /**
158 * Returns the {@code char} value at a given index.
159 *
160 * @param index The index of the {@code char} value to return.
161 *
162 * @return The {@code char} value at {@code index}.
163 *
164 * @throws IndexOutOfBoundsException if {@code index} is negative or not less than the length of the sequence.
165 */
166 public char charAt( final int index )
167 {
168 return this.identifier.charAt( index );
169 }
170
171 /**
172 * Returns a new {@code CharSequence} that is a subsequence of this sequence.
173 *
174 * @param start The start index, inclusive.
175 * @param end The end index, exclusive.
176 *
177 * @return The sequence of characters starting at index {@code start} up to index {@code end - 1}.
178 *
179 * @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, if {@code end} is greater than
180 * the length of the sequence, or if {@code start} is greater than {@code end}.
181 */
182 public CharSequence subSequence( final int start, final int end )
183 {
184 return this.identifier.subSequence( start, end );
185 }
186
187 /**
188 * Returns a string containing the characters in this sequence in the same order as this sequence. The length of the
189 * string will be the length of this sequence.
190 *
191 * @return A string consisting of exactly this sequence of characters.
192 */
193 @Override
194 public String toString()
195 {
196 return this.identifier;
197 }
198
199 /**
200 * Returns the hash-code value of the object.
201 *
202 * @return The hash-code value of the object.
203 */
204 @Override
205 public int hashCode()
206 {
207 return this.identifier.hashCode();
208 }
209
210 /**
211 * Tests whether some other object is equal to the object.
212 *
213 * @param o The object to test.
214 *
215 * @return {@code true}, if {@code o} is an instance of the class of the object and its string value is equal to the
216 * string value of the object.
217 */
218 @Override
219 public boolean equals( final Object o )
220 {
221 boolean equal = o == this;
222
223 if ( !equal && o instanceof JavaIdentifier )
224 {
225 equal = this.toString().equals( o.toString() );
226 }
227
228 return equal;
229 }
230
231 /**
232 * Normalizes text from the beginning of the given string to produce a {@code JavaIdentifier}.
233 *
234 * @param text The text to normalize.
235 * @param mode The normalization to apply.
236 *
237 * @return A {@code JavaIdentifier} instance constructed by normalizing {@code text} according to {@code mode}.
238 *
239 * @throws NullPointerException if {@code text} or {@code mode} is {@code null}.
240 * @throws ParseException if normalization fails.
241 */
242 public static JavaIdentifier normalize( final String text, final NormalizationMode mode ) throws ParseException
243 {
244 if ( text == null )
245 {
246 throw new NullPointerException( "text" );
247 }
248 if ( mode == null )
249 {
250 throw new NullPointerException( "mode" );
251 }
252
253 return parse( text, mode, false );
254 }
255
256 /**
257 * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
258 *
259 * @param text The text to parse.
260 *
261 * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
262 *
263 * @throws NullPointerException if {@code text} is {@code null}.
264 * @throws ParseException if parsing fails.
265 *
266 * @see #valueOf(java.lang.String)
267 */
268 public static JavaIdentifier parse( final String text ) throws ParseException
269 {
270 if ( text == null )
271 {
272 throw new NullPointerException( "text" );
273 }
274
275 return parse( text, null, false );
276 }
277
278 /**
279 * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
280 * <p>
281 * Unlike the {@link #parse(String)} method, this method throws an {@code IllegalArgumentException} if parsing
282 * fails.
283 * </p>
284 *
285 * @param text The text to parse.
286 *
287 * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
288 *
289 * @throws NullPointerException if {@code text} is {@code null}.
290 * @throws IllegalArgumentException if parsing fails.
291 *
292 * @see #parse(java.lang.String)
293 */
294 public static JavaIdentifier valueOf( final String text ) throws IllegalArgumentException
295 {
296 if ( text == null )
297 {
298 throw new NullPointerException( "text" );
299 }
300
301 try
302 {
303 return parse( text, null, true );
304 }
305 catch ( final ParseException e )
306 {
307 throw new AssertionError( e );
308 }
309 }
310
311 private static JavaIdentifier parse( final String text, final NormalizationMode mode,
312 final boolean runtimeException )
313 throws ParseException
314 {
315 Map<CacheKey, JavaIdentifier> map = cache == null ? null : cache.get();
316
317 if ( map == null )
318 {
319 map = new HashMap<CacheKey, JavaIdentifier>( 128 );
320 cache = new SoftReference<Map<CacheKey, JavaIdentifier>>( map );
321 }
322
323 synchronized ( map )
324 {
325 final CacheKey key = new CacheKey( text, mode );
326 JavaIdentifier javaIdentifier = map.get( key );
327
328 if ( javaIdentifier == null )
329 {
330 javaIdentifier = new JavaIdentifier();
331 parseIdentifier( javaIdentifier, text, mode, runtimeException );
332
333 if ( mode != null )
334 {
335 final CacheKey normalizedKey = new CacheKey( javaIdentifier.toString(), mode );
336 final JavaIdentifier normalizedInstance = map.get( normalizedKey );
337
338 if ( normalizedInstance != null )
339 {
340 map.put( key, normalizedInstance );
341 javaIdentifier = normalizedInstance;
342 }
343 else
344 {
345 map.put( key, javaIdentifier );
346 map.put( normalizedKey, javaIdentifier );
347 }
348 }
349 else
350 {
351 map.put( key, javaIdentifier );
352 }
353 }
354
355 return javaIdentifier;
356 }
357 }
358
359 private static void parseIdentifier( final JavaIdentifier t, final String text, final NormalizationMode mode,
360 final boolean runtimeException )
361 throws ParseException
362 {
363 if ( text.length() <= 0 )
364 {
365 if ( runtimeException )
366 {
367 throw new IllegalArgumentException( getMessage( "invalidEmptyString" ) );
368 }
369 else
370 {
371 throw new ParseException( getMessage( "invalidEmptyString" ), 0 );
372 }
373 }
374
375 final StringBuilder identifierBuilder = new StringBuilder( text.length() );
376 final List<Integer> retainedIndices = new ArrayList<Integer>( text.length() );
377 boolean start_of_word = true;
378 int words = 0;
379
380 for ( int i = 0, j = 1, s0 = text.length(), last_codepoint = -1; i < s0; i++, j++ )
381 {
382 if ( !isWordSeparator( text.codePointAt( i ), mode, identifierBuilder.length() <= 0 ) )
383 {
384 if ( mode != null )
385 {
386 switch ( mode )
387 {
388 case CAMEL_CASE:
389 if ( start_of_word )
390 {
391 identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
392 }
393 else if ( last_codepoint > -1 && j < s0
394 && isCamelCase( last_codepoint, text.codePointAt( i ),
395 text.codePointAt( j ) ) )
396 { // Retain camel-case in words.
397 identifierBuilder.append( text.charAt( i ) );
398 retainedIndices.add( identifierBuilder.length() - 1 );
399 }
400 else
401 {
402 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
403 }
404 break;
405
406 case LOWER_CASE:
407 if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
408 {
409 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
410 }
411
412 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
413 break;
414
415 case UPPER_CASE:
416 case CONSTANT_NAME_CONVENTION:
417 if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
418 {
419 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
420 }
421
422 identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
423 break;
424
425 case VARIABLE_NAME_CONVENTION:
426 case METHOD_NAME_CONVENTION:
427 if ( start_of_word )
428 {
429 identifierBuilder.append( words == 0
430 ? Character.toLowerCase( text.charAt( i ) )
431 : Character.toUpperCase( text.charAt( i ) ) );
432
433 }
434 else if ( last_codepoint > -1 && j < s0
435 && isCamelCase( last_codepoint, text.codePointAt( i ),
436 text.codePointAt( j ) ) )
437 { // Retain camel-case in words.
438 identifierBuilder.append( text.charAt( i ) );
439 retainedIndices.add( identifierBuilder.length() - 1 );
440 }
441 else
442 {
443 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
444 }
445 break;
446
447 default:
448 throw new AssertionError( mode );
449
450 }
451 }
452 else
453 {
454 identifierBuilder.append( text.charAt( i ) );
455 }
456
457 last_codepoint = identifierBuilder.codePointAt( identifierBuilder.length() - 1 );
458 start_of_word = false;
459 }
460 else
461 {
462 if ( mode != null )
463 {
464 if ( !start_of_word )
465 {
466 start_of_word = true;
467 words++;
468 }
469 }
470 else if ( runtimeException )
471 {
472 throw new IllegalArgumentException( getMessage( "invalidCharacter", text, text.charAt( i ), i ) );
473 }
474 else
475 {
476 throw new ParseException( getMessage( "invalidCharacter", text, text.charAt( i ), i ), i );
477 }
478 }
479 }
480
481 if ( words > 0 )
482 {
483 // Multiple words - no camel-case retained in any word.
484 toLowerCase( identifierBuilder, retainedIndices );
485 }
486
487 t.identifier = identifierBuilder.toString();
488
489 if ( t.identifier.length() <= 0 )
490 {
491 if ( runtimeException )
492 {
493 throw new IllegalArgumentException( getMessage( "invalidCharacters", text ) );
494 }
495 else
496 {
497 throw new ParseException( getMessage( "invalidCharacters", text ), 0 );
498 }
499 }
500
501 if ( JavaLanguage.KEYWORDS.contains( t.identifier )
502 || JavaLanguage.BOOLEAN_LITERALS.contains( t.identifier )
503 || JavaLanguage.NULL_LITERAL.equals( t.identifier ) )
504 {
505 if ( mode != null )
506 {
507 t.identifier = "_" + t.identifier;
508 }
509 else if ( runtimeException )
510 {
511 throw new IllegalArgumentException( getMessage( "invalidWord", text, t.identifier,
512 text.indexOf( t.identifier ) ) );
513
514 }
515 else
516 {
517 throw new ParseException( getMessage( "invalidWord", text, t.identifier, text.indexOf( t.identifier ) ),
518 text.indexOf( t.identifier ) );
519
520 }
521 }
522 }
523
524 private static boolean isWordSeparator( final int codePoint, final NormalizationMode mode, final boolean first )
525 {
526 return !( ( first ? Character.isJavaIdentifierStart( codePoint ) : Character.isJavaIdentifierPart( codePoint ) )
527 && ( mode != null ? Character.isLetterOrDigit( codePoint ) : true ) );
528
529 }
530
531 private static boolean isCamelCase( final int left, final int middle, final int right )
532 {
533 return Character.isLowerCase( left ) && Character.isUpperCase( middle ) && Character.isLowerCase( right );
534 }
535
536 private static void toLowerCase( final StringBuilder stringBuilder, final List<Integer> indices )
537 {
538 for ( int i = 0, s0 = indices.size(); i < s0; i++ )
539 {
540 final int index = indices.get( i );
541 final int cp = Character.toLowerCase( stringBuilder.codePointAt( index ) );
542 stringBuilder.replace( index, index + 1, String.valueOf( Character.toChars( cp ) ) );
543 }
544 }
545
546 private static String getMessage( final String key, final Object... args )
547 {
548 return MessageFormat.format( ResourceBundle.getBundle(
549 JavaIdentifier.class.getName().replace( '.', '/' ), Locale.getDefault() ).
550 getString( key ), args );
551
552 }
553
554 private static final class CacheKey
555 {
556
557 private final String text;
558
559 private final NormalizationMode mode;
560
561 private CacheKey( final String text, final NormalizationMode mode )
562 {
563 super();
564 this.text = text;
565 this.mode = mode;
566 }
567
568 @Override
569 public int hashCode()
570 {
571 int hc = 23;
572 hc = 37 * hc + this.text.hashCode();
573 hc = 37 * hc + ( this.mode == null ? 0 : this.mode.hashCode() );
574 return hc;
575 }
576
577 @Override
578 public boolean equals( final Object o )
579 {
580 boolean equal = o == this;
581
582 if ( !equal && o instanceof CacheKey )
583 {
584 final CacheKey that = (CacheKey) o;
585 equal = this.mode == that.mode && this.text.equals( that.text );
586 }
587
588 return equal;
589 }
590
591 }
592
593 }