1 /* 2 wsmo4j - a WSMO API and Reference Implementation 3 4 Copyright (c) 2004-2005, OntoText Lab. / SIRMA 5 University of Innsbruck, Austria 6 7 This library is free software; you can redistribute it and/or modify it under 8 the terms of the GNU Lesser General Public License as published by the Free 9 Software Foundation; either version 2.1 of the License, or (at your option) 10 any later version. 11 This library is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 14 details. 15 You should have received a copy of the GNU Lesser General Public License along 16 with this library; if not, write to the Free Software Foundation, Inc., 17 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 */ 19 package com.ontotext.wsmo4j.common; 20 21 /** 22 * Utility class to check if a character is a letter, a digit, an extender, ... 23 * 24 * <pre> 25 * Created on Nov 17, 2006 26 * Committed by $Author: nathaliest $ 27 * $Source$, 28 * </pre> 29 * 30 * @author Nathalie Steinmetz, DERI Innsbruck 31 * @version $Revision: 1890 $ $Date: 2006-11-17 18:40:14 +0200 (Fri, 17 Nov 2006) $ 32 */ 33 public class CharUtil { 34 35 /** 36 * This method checks if a given character is a valid basechar or a valid 37 * ideograph 38 * 39 * @param chr 40 * character 41 * @return <code>true</code> if chr is a valid letter or 42 * <code>false</code> otherwise 43 */ 44 public boolean isLetter(char chr) { 45 if (isAlpha(chr)) 46 return true; 47 // ideographic 48 if ((chr >= '\u4E00' && chr <= '\u9FA5') || chr == '\u3007' 49 || (chr >= '\u3021' && chr <= '\u3029')) 50 return true; 51 return false; 52 } 53 54 /** 55 * This method checks if a given character is a valid basechar 56 * 57 * @param chr 58 * character 59 * @return <code>true</code> if chr is a valid basechar or 60 * <code>false</code> otherwise 61 */ 62 public boolean isAlpha(char chr){ 63 // basechar A-Z || a-z 64 return ((chr >= '\u0041' && chr <= '\u005A') 65 || (chr >= '\u0061' && chr <= '\u007A')); 66 } 67 68 /** 69 * This method checks if a given character is a valid digit 70 * 71 * @param chr 72 * character 73 * @return <code>true</code> if chr is a valid digit or 74 * <code>false</code> otherwise 75 */ 76 public boolean isDigit(char chr){ 77 // digit 0-9 78 return (chr >= '\u0030' && chr <= '\u0039'); 79 } 80 81 /** 82 * This method checks if a given character is a combining char 83 * 84 * @param chr 85 * character 86 * @return <code>true</code> if chr is a combining char or 87 * <code>false</code> otherwise 88 */ 89 public boolean isCombiningChar(char chr) { 90 if ((chr >= '\u0300' && chr <= '\u0345') || (chr >= '\u0360' && 91 chr <= '\u0361') || (chr >= '\u0483' && chr <= '\u0486')) { 92 return true; 93 } 94 return false; 95 } 96 97 /** 98 * This method checks if a given character is an extender 99 * 100 * @param chr 101 * character 102 * @return <code>true</code> if chr is an extender or 103 * <code>false</code> otherwise 104 */ 105 public boolean isExtender(char chr) { 106 if (chr == '\u00B7' || chr == '\u02D0' || chr == '\u02D1' 107 || chr == '\u0387' || chr == '\u0640' || chr == '\u0E46' 108 || chr == '\u0EC6' || chr == '\u3005' || (chr >= '\u3031' && 109 chr <= '\u3035') || (chr >= '\u309D' && chr <= '\u309E') 110 || (chr >= '\u30FC' && chr <= '\u30FE')) { 111 return true; 112 } 113 return false; 114 } 115 } 116 /* 117 * $Log$ 118 * Revision 1.1 2006/11/17 16:40:14 nathaliest 119 * fixed wsml serializer to not serialize sqnames with unallowed characters and added util class to check characters 120 * 121 * 122 */