001 package org.maltparser.core.helper;
002
003
004 import java.io.BufferedInputStream;
005 import java.io.BufferedOutputStream;
006 import java.io.File;
007 import java.io.FileInputStream;
008 import java.io.FileNotFoundException;
009 import java.io.FileOutputStream;
010 import java.io.IOException;
011 import java.io.InputStream;
012 import java.net.MalformedURLException;
013 import java.net.URL;
014
015 import org.apache.log4j.Logger;
016 import org.maltparser.core.config.ConfigurationException;
017 import org.maltparser.core.exception.MaltChainedException;
018 import org.maltparser.core.plugin.Plugin;
019 import org.maltparser.core.plugin.PluginLoader;
020
021 /**
022 *
023 *
024 * @author Johan Hall
025 */
026 public class Util {
027 private static final int BUFFER = 4096;
028 private static final char AMP_CHAR = '&';
029 private static final char LT_CHAR = '<';
030 private static final char GT_CHAR = '>';
031 private static final char QUOT_CHAR = '"';
032 private static final char APOS_CHAR = '\'';
033
034 public static String xmlEscape(String str) {
035 boolean needEscape = false;
036 char c;
037 for (int i = 0; i < str.length(); i++) {
038 c = str.charAt(i);
039 if (c == AMP_CHAR || c == LT_CHAR || c == GT_CHAR || c == QUOT_CHAR || c == APOS_CHAR) {
040 needEscape = true;
041 break;
042 }
043 }
044 if (!needEscape) {
045 return str;
046 }
047 final StringBuilder sb = new StringBuilder();
048 for (int i = 0; i < str.length(); i++) {
049 c = str.charAt(i);
050 if (str.charAt(i) == AMP_CHAR) {
051 sb.append("&");
052 } else if ( str.charAt(i) == LT_CHAR) {
053 sb.append("<");
054 } else if (str.charAt(i) == GT_CHAR) {
055 sb.append(">");
056 } else if (str.charAt(i) == QUOT_CHAR) {
057 sb.append(""");
058 } else if (str.charAt(i) == APOS_CHAR) {
059 sb.append("'");
060 } else {
061 sb.append(c);
062 }
063 }
064 return sb.toString();
065 }
066
067 /**
068 * Search for a file according the following priority:
069 * <ol>
070 * <li>The local file system
071 * <li>Specified as an URL (starting with http:, file:, ftp: or jar:
072 * <li>MaltParser distribution file (malt.jar)
073 * <li>MaltParser plugins
074 * </ol>
075 *
076 * If the file string is found, an URL object is returned, otherwise <b>null</b>
077 *
078 * @param fileString the file string to convert into an URL.
079 * @return an URL object, if the file string is found, otherwise <b>null</b>
080 * @throws MaltChainedException
081 */
082 public static URL findURL(String fileString) throws MaltChainedException {
083 File specFile = new File(fileString);
084
085 try {
086 if (specFile.exists()) {
087 // found the file in the file system
088 return new URL("file:///"+specFile.getAbsolutePath());
089 } else if (fileString.startsWith("http:") || fileString.startsWith("file:") || fileString.startsWith("ftp:") || fileString.startsWith("jar:")) {
090 // the input string is an URL string starting with http, file, ftp or jar
091 return new URL(fileString);
092 } else {
093 return findURLinJars(fileString);
094 }
095 } catch (MalformedURLException e) {
096 throw new MaltChainedException("Malformed URL: "+fileString, e);
097 }
098 }
099
100 public static URL findURLinJars(String fileString) throws MaltChainedException {
101 try {
102 // search in malt.jar and its plugins
103 if (Thread.currentThread().getClass().getResource(fileString) != null) {
104 // found the input string in the malt.jar file
105 return Thread.currentThread().getClass().getResource(fileString);
106 } else {
107 for (Plugin plugin : PluginLoader.instance()) {
108 URL url = null;
109 if (!fileString.startsWith("/")) {
110 url = new URL("jar:"+plugin.getUrl() + "!/" + fileString);
111 } else {
112 url = new URL("jar:"+plugin.getUrl() + "!" + fileString);
113 }
114
115 try {
116 InputStream is = url.openStream();
117 is.close();
118 } catch (IOException e) {
119 continue;
120 }
121 // found the input string in one of the plugins
122 return url;
123 }
124 // could not convert the input string into an URL
125 return null;
126 }
127 } catch (MalformedURLException e) {
128 throw new MaltChainedException("Malformed URL: "+fileString, e);
129 }
130 }
131
132 public static int simpleTicer(Logger logger, long startTime, int nTicxRow, int inTic, int subject) {
133 logger.info(".");
134 int tic = inTic + 1;
135 if (tic >= nTicxRow) {
136 ticInfo(logger, startTime, subject);
137 tic = 0;
138 }
139 return tic;
140 }
141
142 public static void startTicer(Logger logger, long startTime, int nTicxRow, int subject) {
143 logger.info(".");
144 for (int i = 1; i <= nTicxRow; i++) {
145 logger.info(" ");
146 }
147 ticInfo(logger, startTime, subject);
148 }
149
150 public static void endTicer(Logger logger, long startTime, int nTicxRow, int inTic, int subject) {
151 for (int i = inTic; i <= nTicxRow; i++) {
152 logger.info(" ");
153 }
154 ticInfo(logger, startTime, subject);
155 }
156
157 private static void ticInfo(Logger logger, long startTime, int subject) {
158 logger.info("\t");
159 int a = 1000000;
160 if (subject != 0) {
161 while (subject/a == 0) {
162 logger.info(" ");
163 a /= 10;
164 }
165 } else {
166 logger.info(" ");
167 }
168 logger.info(subject);
169 logger.info("\t");
170 long time = (System.currentTimeMillis()-startTime)/1000;
171 a = 1000000;
172 if (time != 0) {
173 while (time/a == 0 ) {
174 logger.info(" ");
175 a /= 10;
176 }
177 logger.info(time);
178 logger.info("s");
179 } else {
180 logger.info(" 0s");
181 }
182 logger.info("\t");
183 long memory = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/1000000;
184 a = 1000000;
185 if (memory != 0) {
186 while (memory/a == 0 ) {
187 logger.info(" ");
188 a /= 10;
189 }
190 logger.info(memory);
191 logger.info("MB\n");
192 } else {
193 logger.info(" 0MB\n");
194 }
195 }
196
197 public static void copyfile(String source, String destination) throws MaltChainedException {
198 try {
199 byte[] readBuffer = new byte[BUFFER];
200 BufferedInputStream bis = new BufferedInputStream(new FileInputStream(source));
201 BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER);
202 int n = 0;
203 while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) {
204 bos.write(readBuffer, 0, n);
205 }
206 bos.flush();
207 bos.close();
208 bis.close();
209 } catch (FileNotFoundException e) {
210 throw new MaltChainedException("The destination file '"+destination+"' cannot be created when coping the file. ", e);
211 } catch (IOException e) {
212 throw new MaltChainedException("The source file '"+source+"' cannot be copied to destination '"+destination+"'. ", e);
213 }
214 }
215
216 }