View Javadoc

1   /*
2    wsmo4j - a WSMO API and Reference Implementation
3    
4    Copyright (c) 2004-2005, OntoText Lab. / SIRMA
5                             University of Innsbruck, Austria
6    
7    This library is free software; you can redistribute it and/or modify it under
8    the terms of the GNU Lesser General Public License as published by the Free
9    Software Foundation; either version 2.1 of the License, or (at your option)
10   any later version.
11   This library is distributed in the hope that it will be useful, but WITHOUT
12   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13   FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
14   details.
15   You should have received a copy of the GNU Lesser General Public License along
16   with this library; if not, write to the Free Software Foundation, Inc.,
17   59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18   */
19  package com.ontotext.wsmo4j.common;
20  
21  /**
22   * Utility class to check if a character is a letter, a digit, an extender, ...
23   * 
24   * <pre>
25   * Created on Nov 17, 2006
26   * Committed by $Author: nathaliest $
27   * $Source$,
28   * </pre>
29   * 
30   * @author Nathalie Steinmetz, DERI Innsbruck
31   * @version $Revision: 1890 $ $Date: 2006-11-17 18:40:14 +0200 (Fri, 17 Nov 2006) $
32   */
33  public class CharUtil {
34  
35  	/**
36       * This method checks if a given character is a valid basechar or a valid
37       * ideograph
38       * 
39       * @param chr
40       *            character
41       * @return <code>true</code> if chr is a valid letter or
42       *         <code>false</code> otherwise
43       */
44      public boolean isLetter(char chr) {
45          if (isAlpha(chr)) 
46              return true;
47          // ideographic 
48          if ((chr >= '\u4E00' && chr <= '\u9FA5') || chr == '\u3007'
49                  || (chr >= '\u3021' && chr <= '\u3029'))
50              return true;
51          return false;
52      }
53      
54      /**
55       * This method checks if a given character is a valid basechar
56       * 
57       * @param chr
58       *            character
59       * @return <code>true</code> if chr is a valid basechar or
60       *         <code>false</code> otherwise
61       */
62      public boolean isAlpha(char chr){
63          // basechar A-Z || a-z
64          return ((chr >= '\u0041' && chr <= '\u005A')
65                  || (chr >= '\u0061' && chr <= '\u007A'));
66      }
67      
68      /**
69       * This method checks if a given character is a valid digit
70       * 
71       * @param chr
72       *            character
73       * @return <code>true</code> if chr is a valid digit or
74       *         <code>false</code> otherwise
75       */
76      public boolean isDigit(char chr){
77          // digit 0-9
78          return (chr >= '\u0030' && chr <= '\u0039');
79      }
80      
81      /**
82       * This method checks if a given character is a combining char
83       * 
84       * @param chr
85       *            character
86       * @return <code>true</code> if chr is a combining char or
87       *         <code>false</code> otherwise
88       */
89  	public boolean isCombiningChar(char chr) {
90  		if ((chr >= '\u0300' && chr <= '\u0345') || (chr >= '\u0360' && 
91  				chr <= '\u0361') || (chr >= '\u0483' && chr <= '\u0486')) { 
92  			return true;
93  		}
94  		return false;
95  	}
96  	
97  	/**
98       * This method checks if a given character is an extender
99       * 
100      * @param chr
101      *            character
102      * @return <code>true</code> if chr is an extender or
103      *         <code>false</code> otherwise
104      */
105 	public boolean isExtender(char chr) {
106 		if (chr == '\u00B7' || chr == '\u02D0' || chr == '\u02D1' 
107 			|| chr == '\u0387' || chr == '\u0640' || chr == '\u0E46' 
108 			|| chr == '\u0EC6' || chr == '\u3005' || (chr >= '\u3031' && 
109 			chr <= '\u3035') || (chr >= '\u309D' && chr <= '\u309E') 
110 			|| (chr >= '\u30FC' && chr <= '\u30FE')) { 
111 			return true;
112 		}
113 		return false;
114 	}
115 }
116 /*
117  * $Log$
118  * Revision 1.1  2006/11/17 16:40:14  nathaliest
119  * fixed wsml serializer to not serialize sqnames with unallowed characters and added util class to check characters
120  *
121  * 
122  */