1 /*
2 * Copyright (C) Christian Schulte <cs@schulte.it>, 2012-253
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * o Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * o Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
18 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
19 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $JOMC: JavaIdentifier.java 5043 2015-05-27 07:03:39Z schulte $
29 *
30 */
31 package org.jomc.model;
32
33 import java.io.Serializable;
34 import java.lang.ref.Reference;
35 import java.lang.ref.SoftReference;
36 import java.text.MessageFormat;
37 import java.text.ParseException;
38 import java.util.ArrayList;
39 import java.util.HashMap;
40 import java.util.List;
41 import java.util.Locale;
42 import java.util.Map;
43 import java.util.ResourceBundle;
44
45 /**
46 * Data type of a Java identifier.
47 * <p>
48 * This class provides support for parsing and normalizing text to java identifiers as specified in the Java
49 * Language Specification - Java SE 7 Edition - Chapter 3.8ff.
50 * </p>
51 * <p>
52 * <i>Please note that this class will move to package {@code org.jomc.util} in JOMC 2.0.</i>
53 * </p>
54 *
55 * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
56 * @version $JOMC: JavaIdentifier.java 5043 2015-05-27 07:03:39Z schulte $
57 * @see #normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode)
58 * @see #parse(java.lang.String)
59 * @see #valueOf(java.lang.String)
60 * @since 1.4
61 */
62 public final class JavaIdentifier implements CharSequence, Serializable
63 {
64
65 /**
66 * Normalization modes.
67 *
68 * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
69 * @version $JOMC: JavaIdentifier.java 5043 2015-05-27 07:03:39Z schulte $
70 * @since 1.4
71 * @see JavaIdentifier#normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode)
72 */
73 public static enum NormalizationMode
74 {
75
76 /**
77 * Mode to normalize by compacting words using camel-case.
78 */
79 CAMEL_CASE,
80 /**
81 * Mode to normalize by separating words using '_' and by converting all characters to lower-case.
82 */
83 LOWER_CASE,
84 /**
85 * Mode to normalize by separating words using '_' and by converting all characters to upper-case.
86 */
87 UPPER_CASE,
88 /**
89 * Mode to normalize according to the
90 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Constants</cite>.
91 * <blockquote>
92 * The names of variables declared class constants and of ANSI constants should be all uppercase with words
93 * separated by underscores ("_"). (ANSI constants should be avoided, for ease of debugging.)
94 * </blockquote>
95 */
96 CONSTANT_NAME_CONVENTION,
97 /**
98 * Mode to normalize according to the
99 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Methods</cite>.
100 * <blockquote>
101 * Methods should be verbs, in mixed case with the first letter lowercase, with the first letter of each
102 * internal word capitalized.
103 * </blockquote>
104 */
105 METHOD_NAME_CONVENTION,
106 /**
107 * Mode to normalize according to the
108 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Variables</cite>.
109 * <blockquote>
110 * Except for variables, all instance, class, and class constants are in mixed case with a lowercase first
111 * letter. Internal words start with capital letters. Variable names should not start with underscore _ or
112 * dollar sign $ characters, even though both are allowed. Variable names should be short yet meaningful. The
113 * choice of a variable name should be mnemonic - that is - designed to indicate to the casual observer the
114 * intent of its use. One-character variable names should be avoided except for temporary "throwaway" variables.
115 * Common names for temporary variables are i, j, k, m, and n for integers; c, d, and e for characters.
116 * </blockquote>
117 */
118 VARIABLE_NAME_CONVENTION
119
120 }
121
122 /**
123 * The value of the instance.
124 *
125 * @serial
126 */
127 private String identifier;
128
129 /**
130 * Cached instances.
131 */
132 private static volatile Reference<Map<CacheKey, JavaIdentifier>> cache;
133
134 /**
135 * Serial version UID for backwards compatibility with 1.4.x object streams.
136 */
137 private static final long serialVersionUID = 7600377999055800720L;
138
139 /**
140 * Underscore character.
141 */
142 private static final int UNDERSCORE_CODEPOINT = Character.codePointAt( "_", 0 );
143
144 /**
145 * Creates a new {@code JavaIdentifier} instance.
146 */
147 private JavaIdentifier()
148 {
149 super();
150 }
151
152 /**
153 * Returns the length of this character sequence.
154 *
155 * @return The number of {@code char}s in this sequence.
156 */
157 public int length()
158 {
159 return this.identifier.length();
160 }
161
162 /**
163 * Returns the {@code char} value at a given index.
164 *
165 * @param index The index of the {@code char} value to return.
166 *
167 * @return The {@code char} value at {@code index}.
168 *
169 * @throws IndexOutOfBoundsException if {@code index} is negative or not less than the length of the sequence.
170 */
171 public char charAt( final int index )
172 {
173 return this.identifier.charAt( index );
174 }
175
176 /**
177 * Returns a new {@code CharSequence} that is a subsequence of this sequence.
178 *
179 * @param start The start index, inclusive.
180 * @param end The end index, exclusive.
181 *
182 * @return The sequence of characters starting at index {@code start} up to index {@code end - 1}.
183 *
184 * @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, if {@code end} is greater than
185 * the length of the sequence, or if {@code start} is greater than {@code end}.
186 */
187 public CharSequence subSequence( final int start, final int end )
188 {
189 return this.identifier.subSequence( start, end );
190 }
191
192 /**
193 * Returns a string containing the characters in this sequence in the same order as this sequence. The length of the
194 * string will be the length of this sequence.
195 *
196 * @return A string consisting of exactly this sequence of characters.
197 */
198 @Override
199 public String toString()
200 {
201 return this.identifier;
202 }
203
204 /**
205 * Returns the hash-code value of the object.
206 *
207 * @return The hash-code value of the object.
208 */
209 @Override
210 public int hashCode()
211 {
212 return this.identifier.hashCode();
213 }
214
215 /**
216 * Tests whether some other object is equal to the object.
217 *
218 * @param o The object to test.
219 *
220 * @return {@code true}, if {@code o} is an instance of the class of the object and its string value is equal to the
221 * string value of the object.
222 */
223 @Override
224 public boolean equals( final Object o )
225 {
226 boolean equal = o == this;
227
228 if ( !equal && o instanceof JavaIdentifier )
229 {
230 equal = this.toString().equals( o.toString() );
231 }
232
233 return equal;
234 }
235
236 /**
237 * Normalizes text from the beginning of the given string to produce a {@code JavaIdentifier}.
238 *
239 * @param text The text to normalize.
240 * @param mode The normalization to apply.
241 *
242 * @return A {@code JavaIdentifier} instance constructed by normalizing {@code text} according to {@code mode}.
243 *
244 * @throws NullPointerException if {@code text} or {@code mode} is {@code null}.
245 * @throws ParseException if normalization fails.
246 */
247 public static JavaIdentifier normalize( final String text, final NormalizationMode mode ) throws ParseException
248 {
249 if ( text == null )
250 {
251 throw new NullPointerException( "text" );
252 }
253 if ( mode == null )
254 {
255 throw new NullPointerException( "mode" );
256 }
257
258 return parse( text, mode, false );
259 }
260
261 /**
262 * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
263 *
264 * @param text The text to parse.
265 *
266 * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
267 *
268 * @throws NullPointerException if {@code text} is {@code null}.
269 * @throws ParseException if parsing fails.
270 *
271 * @see #valueOf(java.lang.String)
272 */
273 public static JavaIdentifier parse( final String text ) throws ParseException
274 {
275 if ( text == null )
276 {
277 throw new NullPointerException( "text" );
278 }
279
280 return parse( text, null, false );
281 }
282
283 /**
284 * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
285 * <p>
286 * Unlike the {@link #parse(String)} method, this method throws an {@code IllegalArgumentException} if parsing
287 * fails.
288 * </p>
289 *
290 * @param text The text to parse.
291 *
292 * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
293 *
294 * @throws NullPointerException if {@code text} is {@code null}.
295 * @throws IllegalArgumentException if parsing fails.
296 *
297 * @see #parse(java.lang.String)
298 */
299 public static JavaIdentifier valueOf( final String text ) throws IllegalArgumentException
300 {
301 if ( text == null )
302 {
303 throw new NullPointerException( "text" );
304 }
305
306 try
307 {
308 return parse( text, null, true );
309 }
310 catch ( final ParseException e )
311 {
312 throw new AssertionError( e );
313 }
314 }
315
316 private static JavaIdentifier parse( final String text, final NormalizationMode mode,
317 final boolean runtimeException )
318 throws ParseException
319 {
320 Map<CacheKey, JavaIdentifier> map = cache == null ? null : cache.get();
321
322 if ( map == null )
323 {
324 map = new HashMap<CacheKey, JavaIdentifier>( 128 );
325 cache = new SoftReference<Map<CacheKey, JavaIdentifier>>( map );
326 }
327
328 synchronized ( map )
329 {
330 final CacheKey key = new CacheKey( text, mode );
331 JavaIdentifier javaIdentifier = map.get( key );
332
333 if ( javaIdentifier == null )
334 {
335 javaIdentifier = new JavaIdentifier();
336 parseIdentifier( javaIdentifier, text, mode, runtimeException );
337
338 if ( mode != null )
339 {
340 final CacheKey normalizedKey = new CacheKey( javaIdentifier.toString(), mode );
341 final JavaIdentifier normalizedInstance = map.get( normalizedKey );
342
343 if ( normalizedInstance != null )
344 {
345 map.put( key, normalizedInstance );
346 javaIdentifier = normalizedInstance;
347 }
348 else
349 {
350 map.put( key, javaIdentifier );
351 map.put( normalizedKey, javaIdentifier );
352 }
353 }
354 else
355 {
356 map.put( key, javaIdentifier );
357 }
358 }
359
360 return javaIdentifier;
361 }
362 }
363
364 private static void parseIdentifier( final JavaIdentifier t, final String text, final NormalizationMode mode,
365 final boolean runtimeException )
366 throws ParseException
367 {
368 if ( text.length() <= 0 )
369 {
370 if ( runtimeException )
371 {
372 throw new IllegalArgumentException( getMessage( "invalidEmptyString" ) );
373 }
374 else
375 {
376 throw new ParseException( getMessage( "invalidEmptyString" ), 0 );
377 }
378 }
379
380 final StringBuilder identifierBuilder = new StringBuilder( text.length() );
381 final List<Integer> retainedIndices = new ArrayList<Integer>( text.length() );
382 boolean start_of_word = true;
383 int words = 0;
384
385 for ( int i = 0, j = 1, s0 = text.length(), last_codepoint = -1; i < s0; i++, j++ )
386 {
387 if ( !isWordSeparator( text.codePointAt( i ), mode, identifierBuilder.length() <= 0 ) )
388 {
389 if ( mode != null )
390 {
391 switch ( mode )
392 {
393 case CAMEL_CASE:
394 if ( start_of_word )
395 {
396 identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
397 }
398 else if ( last_codepoint > -1 && j < s0
399 && isCamelCase( last_codepoint, text.codePointAt( i ),
400 text.codePointAt( j ) ) )
401 { // Retain camel-case in words.
402 identifierBuilder.append( text.charAt( i ) );
403 retainedIndices.add( identifierBuilder.length() - 1 );
404 }
405 else
406 {
407 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
408 }
409 break;
410
411 case LOWER_CASE:
412 if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
413 {
414 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
415 }
416
417 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
418 break;
419
420 case UPPER_CASE:
421 case CONSTANT_NAME_CONVENTION:
422 if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
423 {
424 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
425 }
426
427 identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
428 break;
429
430 case VARIABLE_NAME_CONVENTION:
431 case METHOD_NAME_CONVENTION:
432 if ( start_of_word )
433 {
434 identifierBuilder.append( words == 0
435 ? Character.toLowerCase( text.charAt( i ) )
436 : Character.toUpperCase( text.charAt( i ) ) );
437
438 }
439 else if ( last_codepoint > -1 && j < s0
440 && isCamelCase( last_codepoint, text.codePointAt( i ),
441 text.codePointAt( j ) ) )
442 { // Retain camel-case in words.
443 identifierBuilder.append( text.charAt( i ) );
444 retainedIndices.add( identifierBuilder.length() - 1 );
445 }
446 else
447 {
448 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
449 }
450 break;
451
452 default:
453 throw new AssertionError( mode );
454
455 }
456 }
457 else
458 {
459 identifierBuilder.append( text.charAt( i ) );
460 }
461
462 last_codepoint = identifierBuilder.codePointAt( identifierBuilder.length() - 1 );
463 start_of_word = false;
464 }
465 else
466 {
467 if ( mode != null )
468 {
469 if ( !start_of_word )
470 {
471 start_of_word = true;
472 words++;
473 }
474 }
475 else if ( runtimeException )
476 {
477 throw new IllegalArgumentException( getMessage( "invalidCharacter", text, text.charAt( i ), i ) );
478 }
479 else
480 {
481 throw new ParseException( getMessage( "invalidCharacter", text, text.charAt( i ), i ), i );
482 }
483 }
484 }
485
486 if ( words > 0 )
487 {
488 // Multiple words - no camel-case retained in any word.
489 toLowerCase( identifierBuilder, retainedIndices );
490 }
491
492 t.identifier = identifierBuilder.toString();
493
494 if ( t.identifier.length() <= 0 )
495 {
496 if ( runtimeException )
497 {
498 throw new IllegalArgumentException( getMessage( "invalidCharacters", text ) );
499 }
500 else
501 {
502 throw new ParseException( getMessage( "invalidCharacters", text ), 0 );
503 }
504 }
505
506 if ( JavaLanguage.KEYWORDS.contains( t.identifier )
507 || JavaLanguage.BOOLEAN_LITERALS.contains( t.identifier )
508 || JavaLanguage.NULL_LITERAL.equals( t.identifier ) )
509 {
510 if ( mode != null )
511 {
512 t.identifier = "_" + t.identifier;
513 }
514 else if ( runtimeException )
515 {
516 throw new IllegalArgumentException( getMessage( "invalidWord", text, t.identifier,
517 text.indexOf( t.identifier ) ) );
518
519 }
520 else
521 {
522 throw new ParseException( getMessage( "invalidWord", text, t.identifier, text.indexOf( t.identifier ) ),
523 text.indexOf( t.identifier ) );
524
525 }
526 }
527 }
528
529 private static boolean isWordSeparator( final int codePoint, final NormalizationMode mode, final boolean first )
530 {
531 return !( ( first ? Character.isJavaIdentifierStart( codePoint ) : Character.isJavaIdentifierPart( codePoint ) )
532 && ( mode != null ? Character.isLetterOrDigit( codePoint ) : true ) );
533
534 }
535
536 private static boolean isCamelCase( final int left, final int middle, final int right )
537 {
538 return Character.isLowerCase( left ) && Character.isUpperCase( middle ) && Character.isLowerCase( right );
539 }
540
541 private static void toLowerCase( final StringBuilder stringBuilder, final List<Integer> indices )
542 {
543 for ( int i = 0, s0 = indices.size(); i < s0; i++ )
544 {
545 final int index = indices.get( i );
546 final int cp = Character.toLowerCase( stringBuilder.codePointAt( index ) );
547 stringBuilder.replace( index, index + 1, String.valueOf( Character.toChars( cp ) ) );
548 }
549 }
550
551 private static String getMessage( final String key, final Object... args )
552 {
553 return MessageFormat.format( ResourceBundle.getBundle(
554 JavaIdentifier.class.getName().replace( '.', '/' ), Locale.getDefault() ).
555 getString( key ), args );
556
557 }
558
559 private static final class CacheKey
560 {
561
562 private final String text;
563
564 private final NormalizationMode mode;
565
566 private CacheKey( final String text, final NormalizationMode mode )
567 {
568 super();
569 this.text = text;
570 this.mode = mode;
571 }
572
573 @Override
574 public int hashCode()
575 {
576 int hc = 23;
577 hc = 37 * hc + this.text.hashCode();
578 hc = 37 * hc + ( this.mode == null ? 0 : this.mode.hashCode() );
579 return hc;
580 }
581
582 @Override
583 public boolean equals( final Object o )
584 {
585 boolean equal = o == this;
586
587 if ( !equal && o instanceof CacheKey )
588 {
589 final CacheKey that = (CacheKey) o;
590 equal = this.mode == that.mode && this.text.equals( that.text );
591 }
592
593 return equal;
594 }
595
596 }
597
598 }