diff -Nru java-webdict-lib-0.3ubuntu3/build.xml java-webdict-lib-0.4/build.xml
--- java-webdict-lib-0.3ubuntu3/build.xml 2009-02-07 10:48:19.000000000 +0000
+++ java-webdict-lib-0.4/build.xml 2011-04-20 23:18:36.000000000 +0000
@@ -4,7 +4,7 @@
-
+
diff -Nru java-webdict-lib-0.3ubuntu3/debian/changelog java-webdict-lib-0.4/debian/changelog
--- java-webdict-lib-0.3ubuntu3/debian/changelog 2009-04-24 12:41:10.000000000 +0000
+++ java-webdict-lib-0.4/debian/changelog 2011-04-20 23:19:40.000000000 +0000
@@ -1,3 +1,10 @@
+java-webdict-lib (0.4) maverick; urgency=low
+
+ * Added support for lexicons.
+ * Updated packaging for maverick.
+
+ -- Louis-Dominique Dubeau Wed, 20 Apr 2011 19:18:56 -0400
+
java-webdict-lib (0.3ubuntu3) jaunty; urgency=low
* Added support for variant readings.
diff -Nru java-webdict-lib-0.3ubuntu3/debian/control java-webdict-lib-0.4/debian/control
--- java-webdict-lib-0.3ubuntu3/debian/control 2009-04-24 12:42:03.000000000 +0000
+++ java-webdict-lib-0.4/debian/control 2011-04-20 23:30:59.000000000 +0000
@@ -2,13 +2,13 @@
Section: universe/libs
Priority: extra
Maintainer: Louis-Dominique Dubeau
-Build-Depends: ant, debhelper (>= 7)
-Build-Depends-Indep: ldd-ant (>= 0.8), default-jdk
-Standards-Version: 3.8.0
+Build-Depends: ant, debhelper (>= 8)
+Build-Depends-Indep: ldd-ant (>= 0.10), default-jdk
+Standards-Version: 3.9.1
Package: java-webdict-lib
Architecture: all
-Depends: default-jre (>= 1.6) | default-jre-headless (>= 1.6) | java6-runtime | java6-runtime-headless
+Depends: ${misc:Depends}, default-jre (>= 1.6) | default-jre-headless (>= 1.6) | java6-runtime | java6-runtime-headless
Description: Library to examine and manipulate indices of online dictionaries
This library provides Index objects to determine which words are present
in web libraries.
diff -Nru java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/BasicLexicon.java java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/BasicLexicon.java
--- java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/BasicLexicon.java 1970-01-01 00:00:00.000000000 +0000
+++ java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/BasicLexicon.java 2010-07-17 12:28:12.000000000 +0000
@@ -0,0 +1,374 @@
+package com.lddubeau.ddb;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public final class BasicLexicon
+{
+
+ private static final int CACHE_FILE_VERSION = 1;
+
+ private final URL url;
+
+ private final ProgressMonitor monitor;
+
+ /**
+ * This constructor creates a new Index object which allows to check whether
+ * a word exists in an index.
+ *
+ * @param url
+ * The URL where the index is located on the web.
+ * @throws MalformedURLException
+ * When the URL is incorrect.
+ */
+ public BasicLexicon(String url) throws MalformedURLException
+ {
+ this(url, null);
+ }
+
+ /**
+ * This constructor creates a new Index object which allows to check whether
+ * a word exists in an index.
+ *
+ * @param url
+ * The URL where the index is located on the web.
+ * @param monitor
+ * The monitor to use to check progress.
+ * @throws MalformedURLException
+ * When the URL is incorrect.
+ */
+ public BasicLexicon(String url, ProgressMonitor monitor)
+ throws MalformedURLException
+ {
+ if (url == null)
+ {
+ throw new NullPointerException("url is null");
+ }
+ this.url = new URL(url);
+ this.monitor = monitor;
+ }
+
+ /**
+ * This method verifies whether a term exists in the index.
+ *
+ * @param term
+ * The term to lookup.
+ * @return True if the term is present in the index, false if not.
+ */
+ public boolean exists(String term) throws CancelledByUser
+ {
+ if (term == null)
+ {
+ throw new NullPointerException("term is null");
+ }
+ return exists(this.url, this.monitor, term);
+ }
+
+ /**
+ * This method returns the length of the longest term in the index.
+ *
+ * @return The length.
+ */
+ public int getLongestTermLength() throws CancelledByUser
+ {
+ return getLongestTermLength(this.url, this.monitor);
+ }
+
+ /**
+ * This method returns the data associated with a term.
+ *
+ * @return The data.
+ */
+ public String getTermData(String term) throws CancelledByUser
+ {
+ return this.exists(term)
+ ? getTermData(this.url, this.monitor, term)
+ : null;
+ }
+
+ private static final class DatedMap implements DatedData
+ {
+ public final long date;
+
+ public final Map map;
+
+ public final int longest;
+
+ public DatedMap(long date, Map map, int longest)
+ {
+ this.date = date;
+ this.map = map;
+ this.longest = longest;
+ }
+
+ /**
+ * @see com.lddubeau.ddb.DatedData#getDate()
+ */
+ @Override
+ public long getDate()
+ {
+ return this.date;
+ }
+
+ }
+
+ private static final class DatedMapBuilder implements DataBuilder
+ {
+ private int version = 0;
+
+ private long date = 0;
+
+ private int longest = -1;
+
+ private Map ret;
+
+ @Override
+ public DatedMap getData()
+ {
+ return (this.ret == null) ? null : new DatedMap(this.date,
+ this.ret, this.longest);
+ }
+
+ @Override
+ public long getDate()
+ {
+ return this.date;
+ }
+
+ @Override
+ public void readFromCache(DataInputStream in)
+ {
+ try
+ {
+ this.longest = in.readInt();
+ int num = in.readInt();
+
+ this.ret = new TreeMap();
+ for (int i = 0; i < num; i++)
+ {
+ this.ret.put(in.readUTF(), in.readUTF());
+ }
+
+ if (in.read() > -1)
+ {
+ // Eek, we did not read all the data.
+ // Flush...
+ this.ret = null;
+ }
+ }
+ catch (Exception e)
+ {
+ // Assume a corrupted cache
+ this.ret = null;
+ }
+ }
+
+ /**
+ * @see com.lddubeau.ddb.DataBuilder#setDate(long)
+ */
+ @Override
+ public void setDate(long date)
+ {
+ this.date = date;
+
+ }
+
+ @Override
+ public void readFromWeb(BufferedReader in, ProgressMonitor monitor)
+ throws CancelledByUser, IOException
+ {
+ final Pattern xml_re = Pattern.compile("^<\\?xml",
+ Pattern.CASE_INSENSITIVE);
+ final Pattern head_re = Pattern.compile("(.*?)",
+ Pattern.CASE_INSENSITIVE);
+ final Pattern meaning_re = Pattern.compile(
+ "^
.*?Meaning.*?:(.*)", Pattern.CASE_INSENSITIVE);
+ final Pattern clean_re = Pattern.compile("<.*?>");
+ final Pattern close_p_re = Pattern.compile("
",
+ Pattern.CASE_INSENSITIVE);
+
+ String line = in.readLine();
+ this.ret = new TreeMap();
+ try
+ {
+ String head = null;
+ String meaning = null;
+ boolean capturing_meaning = false;
+ while (line != null)
+ {
+ if (monitor != null && monitor.isCancelled())
+ {
+ throw new CancelledByUser();
+ }
+ line = line.trim();
+
+ if ((line.length() > 0) && !xml_re.matcher(line).find())
+ {
+ // We have a line which contains more than white space
+ // and is not the
+ // longest)
+ {
+ longest = head.length();
+ }
+ head = null;
+ meaning = null;
+ capturing_meaning = false;
+ }
+ }
+ }
+ line = in.readLine();
+ }
+ }
+ // Any exception means we were unsuccessful so we flush ret
+ catch (IOException e)
+ {
+ this.ret = null;
+ throw e;
+ }
+ catch (CancelledByUser e)
+ {
+ this.ret = null;
+ throw e;
+ }
+ catch (Exception e)
+ {
+ this.ret = null;
+ }
+ }
+
+ @Override
+ public void writeToCache(DataOutputStream out) throws IOException
+ {
+ out.writeInt(CACHE_FILE_VERSION);
+ out.writeLong(date);
+ out.writeInt(longest);
+ out.writeInt(ret.size());
+ for (Map.Entry entry : this.ret.entrySet())
+ {
+ out.writeUTF(entry.getKey());
+ out.writeUTF(entry.getValue());
+ }
+ }
+
+ @Override
+ public boolean checkVersion()
+ {
+ return (this.version == CACHE_FILE_VERSION);
+ }
+
+ @Override
+ public void loadPreamble(DataInputStream in)
+ {
+ try
+ {
+ this.version = in.readInt();
+ if (this.checkVersion())
+ {
+ this.date = in.readLong();
+ }
+ }
+ catch (IOException e)
+ {
+ // Ignore the exception: act as if the cache is corrupted.
+ }
+ }
+
+ }
+
+ private static final Util.Store store = new Util.Store();
+
+ private static boolean exists(URL url, ProgressMonitor monitor, String term)
+ throws CancelledByUser
+ {
+ return store.getDatedData(url, monitor, new DatedMapBuilder()).map
+ .containsKey(Util.normalize(term));
+ }
+
+ private static int getLongestTermLength(URL url, ProgressMonitor monitor)
+ throws CancelledByUser
+ {
+ return store.getDatedData(url, monitor, new DatedMapBuilder()).longest;
+ }
+
+ private static String getTermData(URL url, ProgressMonitor monitor,
+ String term) throws CancelledByUser
+ {
+ return store.getDatedData(url, monitor, new DatedMapBuilder()).map
+ .get(Util.normalize(term));
+ }
+
+ public static void main(String [] argv)
+ {
+ // This ought to be used for testing only
+ try
+ {
+ Util.CACHE_PATH = new File(argv[0]);
+ BasicLexicon lex = new BasicLexicon(argv[1]);
+ System.out.println(lex.getTermData(argv[2]));
+ }
+ catch (Exception e)
+ {
+ throw new Error(e);
+ }
+ }
+
+}
diff -Nru java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/DataBuilder.java java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/DataBuilder.java
--- java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/DataBuilder.java 1970-01-01 00:00:00.000000000 +0000
+++ java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/DataBuilder.java 2010-07-14 14:14:12.000000000 +0000
@@ -0,0 +1,25 @@
+package com.lddubeau.ddb;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+interface DataBuilder
+{
+ void loadPreamble(DataInputStream in);
+
+ boolean checkVersion();
+
+ long getDate();
+
+ void readFromCache(DataInputStream in);
+
+ void writeToCache(DataOutputStream out) throws IOException;
+
+ void setDate(long date);
+
+ void readFromWeb(BufferedReader in, ProgressMonitor monitor) throws CancelledByUser, IOException;
+
+ T getData();
+}
diff -Nru java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/DatedData.java java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/DatedData.java
--- java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/DatedData.java 1970-01-01 00:00:00.000000000 +0000
+++ java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/DatedData.java 2010-07-14 15:51:27.000000000 +0000
@@ -0,0 +1,6 @@
+package com.lddubeau.ddb;
+
+interface DatedData
+{
+ long getDate();
+}
diff -Nru java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/Index.java java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/Index.java
--- java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/Index.java 2009-02-07 08:49:49.000000000 +0000
+++ java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/Index.java 2010-07-14 19:29:10.000000000 +0000
@@ -4,488 +4,27 @@
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.net.HttpURLConnection;
import java.net.MalformedURLException;
-import java.net.URISyntaxException;
import java.net.URL;
import java.util.HashSet;
-import java.util.Hashtable;
-import java.util.Locale;
-import java.util.Map;
import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
import java.util.regex.Pattern;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
/**
* This class is designed to model simple indices that some web dictionaries
* export. These indices are expected to contain only a list of the words
* present in the dictionary. Internally, the class caches the indices so if two
* Index objects are created with the same URL, only one data structure exists
- * internally. The index object itself merely knows what its URL is.?
+ * internally. The index object itself merely knows what its URL is.
*
* @author ldd
*/
public final class Index
{
- private static final Logger logger = Logger
- .getLogger(Index.class.getName());
- private static final Hashtable normalize_table = new Hashtable();
private static final int CACHE_FILE_VERSION = 1;
- private static File CACHE_PATH = null;
- static
- {
- String os_name = System.getProperty("os.name").toLowerCase(Locale.US);
- if ((os_name.indexOf("linux") != -1) || (os_name.indexOf("unix") != -1)
- || (os_name.indexOf("solaris") != -1))
- {
- CACHE_PATH = new File(System.getProperty("user.home"),
- ".webdict-lib");
- }
- else if (os_name.indexOf("windows") != -1)
- {
- CACHE_PATH = new File(System.getenv("USERPROFILE"), "WebDict-Lib");
- }
- else
- {
- throw new Error(
- "cannot parse operating system name and adjust parameters: "
- + os_name);
- }
- normalize_table.put("万", "萬");
- normalize_table.put("与", "與");
- normalize_table.put("両", "兩");
- normalize_table.put("並", "竝");
- normalize_table.put("乗", "乘");
- normalize_table.put("乱", "亂");
- normalize_table.put("亀", "龜");
- normalize_table.put("予", "豫");
- normalize_table.put("争", "爭");
- normalize_table.put("亘", "亙");
- normalize_table.put("亜", "亞");
- normalize_table.put("仏", "佛");
- normalize_table.put("仮", "假");
- normalize_table.put("会", "會");
- normalize_table.put("伜", "倅");
- normalize_table.put("伝", "傳");
- normalize_table.put("体", "體");
- normalize_table.put("余", "餘");
- normalize_table.put("佞", "侫");
- normalize_table.put("你", "儞");
- normalize_table.put("併", "倂");
- normalize_table.put("価", "價");
- normalize_table.put("侭", "儘");
- normalize_table.put("俱", "倶");
- normalize_table.put("値", "值");
- normalize_table.put("倹", "儉");
- normalize_table.put("偷", "偸");
- normalize_table.put("偽", "僞");
- normalize_table.put("僣", "僭");
- normalize_table.put("児", "兒");
- normalize_table.put("党", "黨");
- normalize_table.put("内", "內");
- normalize_table.put("円", "圓");
- normalize_table.put("冊", "册");
- normalize_table.put("写", "寫");
- normalize_table.put("処", "處");
- normalize_table.put("函", "凾");
- normalize_table.put("剎", "刹");
- normalize_table.put("剣", "劍");
- normalize_table.put("剤", "劑");
- normalize_table.put("剰", "剩");
- normalize_table.put("励", "勵");
- normalize_table.put("労", "勞");
- normalize_table.put("効", "效");
- normalize_table.put("勅", "敕");
- normalize_table.put("勧", "勸");
- normalize_table.put("勲", "勳");
- normalize_table.put("区", "區");
- normalize_table.put("医", "醫");
- normalize_table.put("卆", "卒");
- normalize_table.put("単", "單");
- normalize_table.put("即", "卽");
- normalize_table.put("历", "歷");
- normalize_table.put("厨", "廚");
- normalize_table.put("厩", "廐");
- normalize_table.put("厳", "嚴");
- normalize_table.put("参", "參");
- normalize_table.put("双", "雙");
- normalize_table.put("収", "收");
- normalize_table.put("叙", "敍");
- normalize_table.put("号", "號");
- normalize_table.put("告", "吿");
- normalize_table.put("呑", "吞");
- normalize_table.put("唖", "啞");
- normalize_table.put("営", "營");
- normalize_table.put("啟", "啓");
- normalize_table.put("喻", "喩");
- normalize_table.put("嘱", "囑");
- normalize_table.put("嚢", "囊");
- normalize_table.put("回", "囘");
- normalize_table.put("団", "團");
- normalize_table.put("囲", "圍");
- normalize_table.put("図", "圖");
- normalize_table.put("国", "國");
- normalize_table.put("圏", "圈");
- normalize_table.put("圧", "壓");
- normalize_table.put("堕", "墮");
- normalize_table.put("塁", "壘");
- normalize_table.put("塩", "鹽");
- normalize_table.put("増", "增");
- normalize_table.put("壊", "壞");
- normalize_table.put("壌", "壤");
- normalize_table.put("壮", "壯");
- normalize_table.put("声", "聲");
- normalize_table.put("壱", "壹");
- normalize_table.put("売", "賣");
- normalize_table.put("壷", "壺");
- normalize_table.put("変", "變");
- normalize_table.put("奨", "奬");
- normalize_table.put("娛", "娯");
- normalize_table.put("嬢", "孃");
- normalize_table.put("学", "學");
- normalize_table.put("宝", "寶");
- normalize_table.put("実", "實");
- normalize_table.put("寝", "寢");
- normalize_table.put("対", "對");
- normalize_table.put("寿", "壽");
- normalize_table.put("専", "專");
- normalize_table.put("将", "將");
- normalize_table.put("尓", "爾");
- normalize_table.put("戾", "戻");
- normalize_table.put("尭", "堯");
- normalize_table.put("尽", "盡");
- normalize_table.put("届", "屆");
- normalize_table.put("屏", "屛");
- normalize_table.put("属", "屬");
- normalize_table.put("岳", "嶽");
- normalize_table.put("峡", "峽");
- normalize_table.put("島", "嶋");
- normalize_table.put("巌", "巖");
- normalize_table.put("巣", "巢");
- normalize_table.put("巻", "卷");
- normalize_table.put("帯", "帶");
- normalize_table.put("帰", "歸");
- normalize_table.put("并", "幷");
- normalize_table.put("庁", "廳");
- normalize_table.put("広", "廣");
- normalize_table.put("廃", "廢");
- normalize_table.put("廄", "廏");
- normalize_table.put("弁", "辯");
- normalize_table.put("弐", "貳");
- normalize_table.put("弑", "弒");
- normalize_table.put("弥", "彌");
- normalize_table.put("弯", "彎");
- normalize_table.put("弾", "彈");
- normalize_table.put("当", "當");
- normalize_table.put("径", "徑");
- normalize_table.put("従", "從");
- normalize_table.put("徳", "德");
- normalize_table.put("徴", "徵");
- normalize_table.put("応", "應");
- normalize_table.put("忰", "悴");
- normalize_table.put("怛", "憺");
- normalize_table.put("恒", "恆");
- normalize_table.put("恋", "戀");
- normalize_table.put("恵", "惠");
- normalize_table.put("悦", "悅");
- normalize_table.put("悩", "惱");
- normalize_table.put("悪", "惡");
- normalize_table.put("惨", "慘");
- normalize_table.put("愠", "慍");
- normalize_table.put("慎", "愼");
- normalize_table.put("懐", "懷");
- normalize_table.put("懴", "懺");
- normalize_table.put("戦", "戰");
- normalize_table.put("戯", "戲");
- normalize_table.put("戱", "戲");
- normalize_table.put("戸", "戶");
- normalize_table.put("払", "拂");
- normalize_table.put("抜", "拔");
- normalize_table.put("択", "擇");
- normalize_table.put("抬", "擡");
- normalize_table.put("担", "擔");
- normalize_table.put("拝", "拜");
- normalize_table.put("拠", "據");
- normalize_table.put("拡", "擴");
- normalize_table.put("挙", "擧");
- normalize_table.put("挟", "挾");
- normalize_table.put("捜", "搜");
- normalize_table.put("掲", "揭");
- normalize_table.put("摂", "攝");
- normalize_table.put("撹", "攪");
- normalize_table.put("敎", "教");
- normalize_table.put("数", "數");
- normalize_table.put("斉", "齊");
- normalize_table.put("斎", "齋");
- normalize_table.put("断", "斷");
- normalize_table.put("既", "旣");
- normalize_table.put("旧", "舊");
- normalize_table.put("昼", "晝");
- normalize_table.put("昿", "曠");
- normalize_table.put("晋", "晉");
- normalize_table.put("暁", "曉");
- normalize_table.put("暦", "曆");
- normalize_table.put("曽", "曾");
- normalize_table.put("条", "條");
- normalize_table.put("来", "來");
- normalize_table.put("枢", "樞");
- normalize_table.put("枦", "櫨");
- normalize_table.put("栄", "榮");
- normalize_table.put("桜", "櫻");
- normalize_table.put("桟", "棧");
- normalize_table.put("桧", "檜");
- normalize_table.put("梹", "檳");
- normalize_table.put("梼", "檮");
- normalize_table.put("検", "檢");
- normalize_table.put("楼", "樓");
- normalize_table.put("楽", "樂");
- normalize_table.put("様", "樣");
- normalize_table.put("槙", "槇");
- normalize_table.put("権", "權");
- normalize_table.put("横", "橫");
- normalize_table.put("檪", "櫟");
- normalize_table.put("櫺", "欞");
- normalize_table.put("欧", "歐");
- normalize_table.put("歓", "歡");
- normalize_table.put("歩", "步");
- normalize_table.put("歯", "齒");
- normalize_table.put("歳", "歲");
- normalize_table.put("歴", "歷");
- normalize_table.put("残", "殘");
- normalize_table.put("殱", "殲");
- normalize_table.put("殴", "毆");
- normalize_table.put("殻", "殼");
- normalize_table.put("毁", "毀");
- normalize_table.put("毎", "每");
- normalize_table.put("毗", "毘");
- normalize_table.put("気", "氣");
- normalize_table.put("氷", "冰");
- normalize_table.put("污", "汚");
- normalize_table.put("沢", "澤");
- normalize_table.put("浄", "淨");
- normalize_table.put("浅", "淺");
- normalize_table.put("浜", "濱");
- normalize_table.put("涙", "淚");
- normalize_table.put("涛", "濤");
- normalize_table.put("涜", "瀆");
- normalize_table.put("清", "淸");
- normalize_table.put("渇", "渴");
- normalize_table.put("済", "濟");
- normalize_table.put("渋", "澁");
- normalize_table.put("渓", "溪");
- normalize_table.put("温", "溫");
- normalize_table.put("湾", "灣");
- normalize_table.put("湿", "濕");
- normalize_table.put("満", "滿");
- normalize_table.put("滞", "滯");
- normalize_table.put("潅", "灌");
- normalize_table.put("潜", "潛");
- normalize_table.put("灯", "燈");
- normalize_table.put("炉", "爐");
- normalize_table.put("点", "點");
- normalize_table.put("為", "爲");
- normalize_table.put("焔", "焰");
- normalize_table.put("焼", "燒");
- normalize_table.put("状", "狀");
- normalize_table.put("独", "獨");
- normalize_table.put("狭", "狹");
- normalize_table.put("猟", "獵");
- normalize_table.put("猪", "猪");
- normalize_table.put("献", "獻");
- normalize_table.put("獣", "獸");
- normalize_table.put("珱", "瓔");
- normalize_table.put("瑶", "瑤");
- normalize_table.put("瓶", "甁");
- normalize_table.put("産", "產");
- normalize_table.put("画", "畫");
- normalize_table.put("畳", "疊");
- normalize_table.put("畴", "疇");
- normalize_table.put("痴", "癡");
- normalize_table.put("発", "發");
- normalize_table.put("皋", "皐");
- normalize_table.put("盗", "盜");
- normalize_table.put("県", "縣");
- normalize_table.put("真", "眞");
- normalize_table.put("眾", "衆");
- normalize_table.put("砕", "碎");
- normalize_table.put("砺", "礪");
- normalize_table.put("砿", "礦");
- normalize_table.put("碍", "礙");
- normalize_table.put("礼", "禮");
- normalize_table.put("祢", "禰");
- normalize_table.put("祷", "禱");
- normalize_table.put("禄", "祿");
- normalize_table.put("禅", "禪");
- normalize_table.put("秘", "祕");
- normalize_table.put("称", "稱");
- normalize_table.put("稲", "稻");
- normalize_table.put("穂", "穗");
- normalize_table.put("穏", "穩");
- normalize_table.put("穐", "龝");
- normalize_table.put("穣", "穰");
- normalize_table.put("窃", "竊");
- normalize_table.put("窓", "窗");
- normalize_table.put("竜", "龍");
- normalize_table.put("競", "竸");
- normalize_table.put("筝", "箏");
- normalize_table.put("篭", "籠");
- normalize_table.put("簑", "簔");
- normalize_table.put("籖", "籤");
- normalize_table.put("粋", "粹");
- normalize_table.put("粛", "肅");
- normalize_table.put("粤", "粵");
- normalize_table.put("糸", "絲");
- normalize_table.put("経", "經");
- normalize_table.put("絵", "繪");
- normalize_table.put("絶", "絕");
- normalize_table.put("継", "繼");
- normalize_table.put("続", "續");
- normalize_table.put("総", "總");
- normalize_table.put("緑", "綠");
- normalize_table.put("緒", "緖");
- normalize_table.put("緕", "纃");
- normalize_table.put("縁", "緣");
- normalize_table.put("纒", "纏");
- normalize_table.put("縄", "繩");
- normalize_table.put("縦", "縱");
- normalize_table.put("繊", "纖");
- normalize_table.put("繋", "繫");
- normalize_table.put("翆", "翠");
- normalize_table.put("聡", "聰");
- normalize_table.put("聴", "聽");
- normalize_table.put("胆", "膽");
- normalize_table.put("脱", "脫");
- normalize_table.put("脳", "腦");
- normalize_table.put("臓", "臟");
- normalize_table.put("舉", "擧");
- normalize_table.put("舎", "舍");
- normalize_table.put("舮", "艫");
- normalize_table.put("艶", "艷");
- normalize_table.put("芦", "蘆");
- normalize_table.put("芸", "藝");
- normalize_table.put("苔", "薹");
- normalize_table.put("茎", "莖");
- normalize_table.put("荘", "莊");
- normalize_table.put("蔵", "藏");
- normalize_table.put("薫", "薰");
- normalize_table.put("薬", "藥");
- normalize_table.put("薮", "藪");
- normalize_table.put("虚", "虛");
- normalize_table.put("虫", "蟲");
- normalize_table.put("虱", "蝨");
- normalize_table.put("蚕", "蠶");
- normalize_table.put("蛍", "螢");
- normalize_table.put("蛎", "蠣");
- normalize_table.put("蛮", "蠻");
- normalize_table.put("蝿", "蠅");
- normalize_table.put("衛", "衞");
- normalize_table.put("装", "裝");
- normalize_table.put("褒", "襃");
- normalize_table.put("褝", "襌");
- normalize_table.put("覚", "覺");
- normalize_table.put("覧", "覽");
- normalize_table.put("観", "觀");
- normalize_table.put("触", "觸");
- normalize_table.put("訳", "譯");
- normalize_table.put("証", "證");
- normalize_table.put("誉", "譽");
- normalize_table.put("説", "說");
- normalize_table.put("読", "讀");
- normalize_table.put("諌", "諫");
- normalize_table.put("謡", "謠");
- normalize_table.put("譛", "譖");
- normalize_table.put("譲", "讓");
- normalize_table.put("讃", "讚");
- normalize_table.put("豊", "豐");
- normalize_table.put("賎", "賤");
- normalize_table.put("賛", "贊");
- normalize_table.put("践", "踐");
- normalize_table.put("転", "轉");
- normalize_table.put("軽", "輕");
- normalize_table.put("輌", "輛");
- normalize_table.put("辞", "辭");
- normalize_table.put("辺", "邊");
- normalize_table.put("迩", "邇");
- normalize_table.put("逓", "遞");
- normalize_table.put("遅", "遲");
- normalize_table.put("遥", "遙");
- normalize_table.put("郎", "郞");
- normalize_table.put("郷", "鄕");
- normalize_table.put("鄉", "鄕");
- normalize_table.put("酔", "醉");
- normalize_table.put("醸", "釀");
- normalize_table.put("釈", "釋");
- normalize_table.put("鈎", "鉤");
- normalize_table.put("鈩", "鑪");
- normalize_table.put("鈬", "鐸");
- normalize_table.put("鉄", "鐵");
- normalize_table.put("鉱", "鑛");
- normalize_table.put("銭", "錢");
- normalize_table.put("鋭", "銳");
- normalize_table.put("鋳", "鑄");
- normalize_table.put("録", "錄");
- normalize_table.put("鎮", "鎭");
- normalize_table.put("鑚", "鑽");
- normalize_table.put("関", "關");
- normalize_table.put("閲", "閱");
- normalize_table.put("闘", "鬪");
- normalize_table.put("陥", "陷");
- normalize_table.put("険", "險");
- normalize_table.put("随", "隨");
- normalize_table.put("隠", "隱");
- normalize_table.put("隣", "鄰");
- normalize_table.put("隷", "隸");
- normalize_table.put("雑", "雜");
- normalize_table.put("霊", "靈");
- normalize_table.put("青", "靑");
- normalize_table.put("静", "靜");
- normalize_table.put("頚", "頸");
- normalize_table.put("頬", "頰");
- normalize_table.put("頼", "賴");
- normalize_table.put("顔", "顏");
- normalize_table.put("顕", "顯");
- normalize_table.put("顛", "顚");
- normalize_table.put("飲", "飮");
- normalize_table.put("餅", "餠");
- normalize_table.put("駄", "馱");
- normalize_table.put("駅", "驛");
- normalize_table.put("駆", "驅");
- normalize_table.put("騒", "騷");
- normalize_table.put("験", "驗");
- normalize_table.put("髄", "髓");
- normalize_table.put("髪", "髮");
- normalize_table.put("鯵", "鰺");
- normalize_table.put("鰛", "鰮");
- normalize_table.put("鴎", "鷗");
- normalize_table.put("鴬", "鶯");
- normalize_table.put("鶏", "鷄");
- normalize_table.put("鷏", "鷆");
- normalize_table.put("麁", "麤");
- normalize_table.put("麦", "麥");
- normalize_table.put("麸", "麩");
- normalize_table.put("麺", "麪");
- normalize_table.put("麽", "麼");
- normalize_table.put("黄", "黃");
- normalize_table.put("黒", "黑");
- normalize_table.put("黙", "默");
- normalize_table.put("鼡", "鼠");
- normalize_table.put("齢", "齡");
- normalize_table.put("朗", "朗");
- normalize_table.put("猪", "猪");
-
- }
-
private final URL url;
private final ProgressMonitor monitor;
@@ -552,7 +91,21 @@
return getLongestTermLength(this.url, this.monitor);
}
- private static final class DatedSet
+
+ private static boolean exists(URL url, ProgressMonitor monitor, String term)
+ throws CancelledByUser
+ {
+ return store.getDatedData(url, monitor, new DatedSetBuilder()).set
+ .contains(Util.normalize(term));
+ }
+
+ private static int getLongestTermLength(URL url, ProgressMonitor monitor)
+ throws CancelledByUser
+ {
+ return store.getDatedData(url, monitor, new DatedSetBuilder()).longest;
+ }
+
+ private static final class DatedSet implements DatedData
{
public final long date;
@@ -566,286 +119,160 @@
this.set = set;
this.longest = longest;
}
- }
-
- private static Hashtable indices = new Hashtable();
-
- private static boolean exists(URL url, ProgressMonitor monitor, String term) throws CancelledByUser
- {
- for (Map.Entry entry : normalize_table.entrySet())
- {
- term = term.replace(entry.getKey().charAt(0), entry.getValue().charAt(0));
- }
-
- return getDatedSet(url, monitor).set.contains(term);
- }
-
- private static int getLongestTermLength(URL url, ProgressMonitor monitor) throws CancelledByUser
- {
- return getDatedSet(url, monitor).longest;
- }
- private static DatedSet getDatedSet(URL url, ProgressMonitor monitor) throws CancelledByUser
- {
- String url_str = url.toString().intern();
- /*
- * Although there is no support for concurrent access of Index objects,
- * we need to synchronize at this point. This is required because
- * multiple libraries which do not talk to each other could be using
- * this code simultaneously. If library A access an Index object with
- * URL U at the same time library B access its own Index object with URL
- * U then, because the two URLs are the same, there is a risk of
- * concurrent access here.
+ /**
+ * @see com.lddubeau.ddb.DatedData#getDate()
*/
- synchronized (indices)
+ @Override
+ public long getDate()
{
- DatedSet ds = indices.get(url_str);
-
- if (ds == null)
- {
- ds = loadIndex(url, monitor);
- indices.put(url_str, ds);
- }
-
- return ds;
+ return this.date;
}
- }
- private static final Pattern head_clean_re = Pattern.compile("<.*?>");
+ }
- private static DatedSet loadIndex(URL url, ProgressMonitor monitor) throws CancelledByUser
+ private static final class DatedSetBuilder implements DataBuilder
{
- // The goal is as follow:
- // - If there is no cached data, load the index from the web site.
- // - If there is cached data but the index on the web site is newer,
- // load the index from the web site.
- // - If the web site is not responding, used the cached data.
- // - If the web site is not responding and there is no cached data,
- // return the empty set.
+ private int version = 0;
- CACHE_PATH.mkdirs();
+ private long date = 0;
- int longest = 0;
- long date = 0;
- Set ret = null;
+ private int longest = -1;
- HttpURLConnection conn = null;
+ private Set ret;
- // First try to see whether it is cached.
- File cache_path = null;
- try
+ @Override
+ public DatedSet getData()
{
- cache_path = new File(CACHE_PATH, url.toURI().toASCIIString()
- .replaceAll("[:/]", "_"));
-
+ return (this.ret == null) ? null : new DatedSet(this.date,
+ this.ret, this.longest);
}
- catch (URISyntaxException e)
+
+ @Override
+ public long getDate()
{
- throw new Error("cannot convert URL to URI: " + url.toString(), e);
+ return this.date;
}
- DataInputStream in_file = null;
- try
+ @Override
+ public void readFromCache(DataInputStream in)
{
- in_file = new DataInputStream(new GZIPInputStream(
- new FileInputStream(cache_path)));
- if (in_file.readInt() == CACHE_FILE_VERSION)
+ try
{
- date = in_file.readLong();
+ longest = in.readInt();
+ int num = in.readInt();
- // Check whether the data on the web site is newer
- boolean read_the_cache = true;
- try
+ this.ret = new HashSet(num);
+ for (int i = 0; i < num; i++)
{
- conn = (HttpURLConnection) url.openConnection();
- conn.setIfModifiedSince(date);
-
- conn.connect();
-
- // If we get the response HTTP_NOT_MODIFIED we want to read
- // the cache.
- // If we get any response other than HTTP_OK we want to read
- // the cache
- // because those are likely errors of communication.
- // If we get HTTP_OK, it means that the data on the web site
- // is newer.
- read_the_cache = (conn.getResponseCode() != HttpURLConnection.HTTP_OK);
- logger.log(Level.FINE, "HTTP response: "
- + conn.getResponseCode());
- }
- catch (IOException e)
- {
- // The web site is not accessible... read the cache.
+ this.ret.add(in.readUTF());
}
- if (read_the_cache)
+ if (in.read() > -1)
{
- logger.log(Level.FINE, "reading cache: "
- + cache_path.toString());
- // HTTP_OK would mean that the data on the server is newer
- longest = in_file.readInt();
- int num = in_file.readInt();
-
- ret = new HashSet(num);
- for (int i = 0; i < num; i++)
- {
- ret.add(in_file.readUTF());
- }
-
- if (in_file.read() > -1)
- {
- // Eek, we did not read all the data.
- // Flush...
- ret = null;
- }
+ // Eek, we did not read all the data.
+ // Flush...
+ this.ret = null;
}
- in_file.close();
}
- else
+ catch (Exception e)
{
- // Else, act as if there were no cache.
- logger.log(Level.FINE, "older cache: " + cache_path);
+ // Assume a corrupted cache
+ this.ret = null;
}
}
- catch (FileNotFoundException e)
- {
- // Ok, the cache does not exist. Just ignore.
- }
- catch (IOException e)
+
+ /**
+ * @see com.lddubeau.ddb.DataBuilder#setDate(long)
+ */
+ @Override
+ public void setDate(long date)
{
- // Ok, there was a problem reading the cache.
- // Flush and ignore.
- ret = null;
+ this.date = date;
+
}
- if (ret == null)
+ @Override
+ public void readFromWeb(BufferedReader in, ProgressMonitor monitor)
+ throws CancelledByUser, IOException
{
- logger.log(Level.FINE, "fetching data from web site" + url);
+ final Pattern head_clean_re = Pattern.compile("<.*?>");
+ String line = in.readLine();
+ this.ret = new HashSet();
try
{
- // It may happen that conn is null.
- if (conn == null)
+ while (line != null)
{
- conn = (HttpURLConnection) url.openConnection();
- conn.connect();
- }
-
- // Fail silently if we get anything else than HTTP_OK.
- if (conn.getResponseCode() == HttpURLConnection.HTTP_OK)
- {
- if (monitor != null)
- {
- monitor.setMinimum(0);
- monitor.setMaximum(conn.getContentLength());
- }
- date = conn.getDate();
- InputStream raw_stream = (monitor == null) ? conn
- .getInputStream() : new MonitoredInputStream(conn
- .getInputStream(), monitor);
- BufferedReader reader = new BufferedReader(
- new InputStreamReader(new GZIPInputStream(
- raw_stream), "UTF-8"));
- try
+ if (monitor != null && monitor.isCancelled())
{
- String line = reader.readLine();
- ret = new HashSet();
- while (line != null)
- {
- if (monitor != null && monitor.isCancelled())
- {
- throw new CancelledByUser();
- }
- line = head_clean_re.matcher(line).replaceAll("")
- .intern();
- ret.add(line);
- if (line.length() > longest)
- {
- longest = line.length();
- }
- line = reader.readLine();
- }
+ throw new CancelledByUser();
}
- finally
+ line = head_clean_re.matcher(line).replaceAll("").intern();
+ this.ret.add(line);
+ if (line.length() > longest)
{
- reader.close();
- if (monitor != null)
- {
- monitor.close();
- }
+ longest = line.length();
}
- }
- else
- {
- logger.log(Level.FINE,
- "fetching data from web site failed with code: "
- + conn.getResponseCode());
+ line = in.readLine();
}
}
+ // Any exception means we were unsuccessful so we flush ret
catch (IOException e)
{
- throw new Error(e);
+ this.ret = null;
+ throw e;
+ }
+ catch (CancelledByUser e)
+ {
+ this.ret = null;
+ throw e;
}
+ catch (Exception e)
+ {
+ this.ret = null;
+ }
+ }
+
+ @Override
+ public void writeToCache(DataOutputStream out) throws IOException
+ {
+ out.writeInt(CACHE_FILE_VERSION);
+ out.writeLong(date);
+ out.writeInt(longest);
+ out.writeInt(ret.size());
+ for (String entry : ret)
+ {
+ out.writeUTF(entry);
+ }
+ }
+
+ @Override
+ public boolean checkVersion()
+ {
+ return (this.version == CACHE_FILE_VERSION);
}
- if (ret != null)
+ @Override
+ public void loadPreamble(DataInputStream in)
{
- // Save the Set in the cache
- DataOutputStream out_file = null;
try
{
- out_file = new DataOutputStream(new GZIPOutputStream(
- new FileOutputStream(cache_path)));
- out_file.writeInt(CACHE_FILE_VERSION);
- out_file.writeLong(date);
- out_file.writeInt(longest);
- out_file.writeInt(ret.size());
- for (String entry : ret)
+ this.version = in.readInt();
+ if (this.checkVersion())
{
- out_file.writeUTF(entry);
+ this.date = in.readLong();
}
- out_file.flush();
- out_file.close();
- }
- catch (FileNotFoundException e)
- {
- // Ignore. It means there is some architectural issue which
- // prevents
- // us to cache.
- // This is not a fatal problem.
}
catch (IOException e)
{
- // Cleanup and otherwise ignore as above.
- if (out_file != null)
- {
- try
- {
- out_file.close();
- }
- catch (IOException e1)
- {
- // This is strange and would most likely be the result
- // of a
- // serious system problem. This is not the place to
- // report
- // serious system problems.
- }
- cache_path.delete();
- }
+ // Ignore the exception: act as if the cache is corrupted.
}
}
- else
- {
- logger.log(Level.FINE, "unable to fetch from cache or web site: "
- + url);
- ret = new HashSet();
- date = 0;
- longest = 0;
- }
- return (ret == null)?null:new DatedSet(date, ret, longest);
}
+ private static final Util.Store store = new Util.Store();
+
public static String getVersion()
{
return LastBuild.getVersion();
@@ -876,8 +303,8 @@
// This ought to be used for testing only
try
{
- CACHE_PATH = new File(argv[0]);
- loadIndex(new URL(argv[1]), null);
+ Util.CACHE_PATH = new File(argv[0]);
+ new Util.Store().getDatedData(new URL(argv[1]), null, new DatedSetBuilder());
}
catch (Exception e)
{
@@ -885,65 +312,4 @@
}
}
- private static final class MonitoredInputStream extends InputStream
- {
- private final InputStream stream;
-
- private final ProgressMonitor monitor;
-
- private int count = 0;
-
- public MonitoredInputStream(InputStream in, ProgressMonitor monitor)
- {
- this.stream = in;
- this.monitor = monitor;
- }
-
- @Override
- public int read() throws IOException
- {
- int ret = this.stream.read();
- this.count += ret;
- this.monitor.setProgress(this.count);
- return ret;
- }
-
- @Override
- public int read(byte [] b, int off, int len) throws IOException
- {
- int ret = this.stream.read(b, off, len);
- this.count += ret;
- this.monitor.setProgress(this.count);
- return ret;
- }
-
- @Override
- public int available() throws IOException
- {
- return this.stream.available();
- }
-
- @Override
- public void close() throws IOException
- {
- this.stream.close();
- this.monitor.close();
- }
-
- @Override
- public boolean markSupported()
- {
- return false;
- }
-
- @Override
- public long skip(long n) throws IOException
- {
- long ret = this.stream.skip(n);
- this.count += ret;
- this.monitor.setProgress(this.count);
- return ret;
- }
- }
-
}
diff -Nru java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/Util.java java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/Util.java
--- java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/Util.java 1970-01-01 00:00:00.000000000 +0000
+++ java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/Util.java 2010-07-14 19:28:01.000000000 +0000
@@ -0,0 +1,782 @@
+package com.lddubeau.ddb;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.util.Hashtable;
+import java.util.Locale;
+import java.util.Map;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+final class Util
+{
+ private static final Logger logger = Logger.getLogger(Util.class.getName());
+
+ static final class MonitoredInputStream extends InputStream
+ {
+ private final InputStream stream;
+
+ private final ProgressMonitor monitor;
+
+ private int count = 0;
+
+ public MonitoredInputStream(InputStream in, ProgressMonitor monitor)
+ {
+ this.stream = in;
+ this.monitor = monitor;
+ }
+
+ @Override
+ public int read() throws IOException
+ {
+ int ret = this.stream.read();
+ this.count += ret;
+ this.monitor.setProgress(this.count);
+ return ret;
+ }
+
+ @Override
+ public int read(byte [] b, int off, int len) throws IOException
+ {
+ int ret = this.stream.read(b, off, len);
+ this.count += ret;
+ this.monitor.setProgress(this.count);
+ return ret;
+ }
+
+ @Override
+ public int available() throws IOException
+ {
+ return this.stream.available();
+ }
+
+ @Override
+ public void close() throws IOException
+ {
+ this.stream.close();
+ this.monitor.close();
+ }
+
+ @Override
+ public boolean markSupported()
+ {
+ return false;
+ }
+
+ @Override
+ public long skip(long n) throws IOException
+ {
+ long ret = this.stream.skip(n);
+ this.count += ret;
+ this.monitor.setProgress(this.count);
+ return ret;
+ }
+ }
+
+ private static final Hashtable normalize_table = new Hashtable();
+
+ static File CACHE_PATH = null;
+
+ static
+ {
+ String os_name = System.getProperty("os.name").toLowerCase(Locale.US);
+ if ((os_name.indexOf("linux") != -1) || (os_name.indexOf("unix") != -1)
+ || (os_name.indexOf("solaris") != -1))
+ {
+ Util.CACHE_PATH = new File(System.getProperty("user.home"),
+ ".webdict-lib");
+ }
+ else if (os_name.indexOf("windows") != -1)
+ {
+ Util.CACHE_PATH = new File(System.getenv("USERPROFILE"),
+ "WebDict-Lib");
+ }
+ else
+ {
+ throw new Error(
+ "cannot parse operating system name and adjust parameters: "
+ + os_name);
+ }
+ normalize_table.put("万", "萬");
+ normalize_table.put("与", "與");
+ normalize_table.put("両", "兩");
+ normalize_table.put("並", "竝");
+ normalize_table.put("乗", "乘");
+ normalize_table.put("乱", "亂");
+ normalize_table.put("亀", "龜");
+ normalize_table.put("予", "豫");
+ normalize_table.put("争", "爭");
+ normalize_table.put("亘", "亙");
+ normalize_table.put("亜", "亞");
+ normalize_table.put("仏", "佛");
+ normalize_table.put("仮", "假");
+ normalize_table.put("会", "會");
+ normalize_table.put("伜", "倅");
+ normalize_table.put("伝", "傳");
+ normalize_table.put("体", "體");
+ normalize_table.put("余", "餘");
+ normalize_table.put("佞", "侫");
+ normalize_table.put("你", "儞");
+ normalize_table.put("併", "倂");
+ normalize_table.put("価", "價");
+ normalize_table.put("侭", "儘");
+ normalize_table.put("俱", "倶");
+ normalize_table.put("値", "值");
+ normalize_table.put("倹", "儉");
+ normalize_table.put("偷", "偸");
+ normalize_table.put("偽", "僞");
+ normalize_table.put("僣", "僭");
+ normalize_table.put("児", "兒");
+ normalize_table.put("党", "黨");
+ normalize_table.put("内", "內");
+ normalize_table.put("円", "圓");
+ normalize_table.put("冊", "册");
+ normalize_table.put("写", "寫");
+ normalize_table.put("処", "處");
+ normalize_table.put("函", "凾");
+ normalize_table.put("剎", "刹");
+ normalize_table.put("剣", "劍");
+ normalize_table.put("剤", "劑");
+ normalize_table.put("剰", "剩");
+ normalize_table.put("励", "勵");
+ normalize_table.put("労", "勞");
+ normalize_table.put("効", "效");
+ normalize_table.put("勅", "敕");
+ normalize_table.put("勧", "勸");
+ normalize_table.put("勲", "勳");
+ normalize_table.put("区", "區");
+ normalize_table.put("医", "醫");
+ normalize_table.put("卆", "卒");
+ normalize_table.put("単", "單");
+ normalize_table.put("即", "卽");
+ normalize_table.put("历", "歷");
+ normalize_table.put("厨", "廚");
+ normalize_table.put("厩", "廐");
+ normalize_table.put("厳", "嚴");
+ normalize_table.put("参", "參");
+ normalize_table.put("双", "雙");
+ normalize_table.put("収", "收");
+ normalize_table.put("叙", "敍");
+ normalize_table.put("号", "號");
+ normalize_table.put("告", "吿");
+ normalize_table.put("呑", "吞");
+ normalize_table.put("唖", "啞");
+ normalize_table.put("営", "營");
+ normalize_table.put("啟", "啓");
+ normalize_table.put("喻", "喩");
+ normalize_table.put("嘱", "囑");
+ normalize_table.put("嚢", "囊");
+ normalize_table.put("回", "囘");
+ normalize_table.put("団", "團");
+ normalize_table.put("囲", "圍");
+ normalize_table.put("図", "圖");
+ normalize_table.put("国", "國");
+ normalize_table.put("圏", "圈");
+ normalize_table.put("圧", "壓");
+ normalize_table.put("堕", "墮");
+ normalize_table.put("塁", "壘");
+ normalize_table.put("塩", "鹽");
+ normalize_table.put("増", "增");
+ normalize_table.put("壊", "壞");
+ normalize_table.put("壌", "壤");
+ normalize_table.put("壮", "壯");
+ normalize_table.put("声", "聲");
+ normalize_table.put("壱", "壹");
+ normalize_table.put("売", "賣");
+ normalize_table.put("壷", "壺");
+ normalize_table.put("変", "變");
+ normalize_table.put("奨", "奬");
+ normalize_table.put("娛", "娯");
+ normalize_table.put("嬢", "孃");
+ normalize_table.put("学", "學");
+ normalize_table.put("宝", "寶");
+ normalize_table.put("実", "實");
+ normalize_table.put("寝", "寢");
+ normalize_table.put("対", "對");
+ normalize_table.put("寿", "壽");
+ normalize_table.put("専", "專");
+ normalize_table.put("将", "將");
+ normalize_table.put("尓", "爾");
+ normalize_table.put("戾", "戻");
+ normalize_table.put("尭", "堯");
+ normalize_table.put("尽", "盡");
+ normalize_table.put("届", "屆");
+ normalize_table.put("屏", "屛");
+ normalize_table.put("属", "屬");
+ normalize_table.put("岳", "嶽");
+ normalize_table.put("峡", "峽");
+ normalize_table.put("島", "嶋");
+ normalize_table.put("巌", "巖");
+ normalize_table.put("巣", "巢");
+ normalize_table.put("巻", "卷");
+ normalize_table.put("帯", "帶");
+ normalize_table.put("帰", "歸");
+ normalize_table.put("并", "幷");
+ normalize_table.put("庁", "廳");
+ normalize_table.put("広", "廣");
+ normalize_table.put("廃", "廢");
+ normalize_table.put("廄", "廏");
+ normalize_table.put("弁", "辯");
+ normalize_table.put("弐", "貳");
+ normalize_table.put("弑", "弒");
+ normalize_table.put("弥", "彌");
+ normalize_table.put("弯", "彎");
+ normalize_table.put("弾", "彈");
+ normalize_table.put("当", "當");
+ normalize_table.put("径", "徑");
+ normalize_table.put("従", "從");
+ normalize_table.put("徳", "德");
+ normalize_table.put("徴", "徵");
+ normalize_table.put("応", "應");
+ normalize_table.put("忰", "悴");
+ normalize_table.put("怛", "憺");
+ normalize_table.put("恒", "恆");
+ normalize_table.put("恋", "戀");
+ normalize_table.put("恵", "惠");
+ normalize_table.put("悦", "悅");
+ normalize_table.put("悩", "惱");
+ normalize_table.put("悪", "惡");
+ normalize_table.put("惨", "慘");
+ normalize_table.put("愠", "慍");
+ normalize_table.put("慎", "愼");
+ normalize_table.put("懐", "懷");
+ normalize_table.put("懴", "懺");
+ normalize_table.put("戦", "戰");
+ normalize_table.put("戯", "戲");
+ normalize_table.put("戱", "戲");
+ normalize_table.put("戸", "戶");
+ normalize_table.put("払", "拂");
+ normalize_table.put("抜", "拔");
+ normalize_table.put("択", "擇");
+ normalize_table.put("抬", "擡");
+ normalize_table.put("担", "擔");
+ normalize_table.put("拝", "拜");
+ normalize_table.put("拠", "據");
+ normalize_table.put("拡", "擴");
+ normalize_table.put("挙", "擧");
+ normalize_table.put("挟", "挾");
+ normalize_table.put("捜", "搜");
+ normalize_table.put("掲", "揭");
+ normalize_table.put("摂", "攝");
+ normalize_table.put("撹", "攪");
+ normalize_table.put("敎", "教");
+ normalize_table.put("数", "數");
+ normalize_table.put("斉", "齊");
+ normalize_table.put("斎", "齋");
+ normalize_table.put("断", "斷");
+ normalize_table.put("既", "旣");
+ normalize_table.put("旧", "舊");
+ normalize_table.put("昼", "晝");
+ normalize_table.put("昿", "曠");
+ normalize_table.put("晋", "晉");
+ normalize_table.put("暁", "曉");
+ normalize_table.put("暦", "曆");
+ normalize_table.put("曽", "曾");
+ normalize_table.put("条", "條");
+ normalize_table.put("来", "來");
+ normalize_table.put("枢", "樞");
+ normalize_table.put("枦", "櫨");
+ normalize_table.put("栄", "榮");
+ normalize_table.put("桜", "櫻");
+ normalize_table.put("桟", "棧");
+ normalize_table.put("桧", "檜");
+ normalize_table.put("梹", "檳");
+ normalize_table.put("梼", "檮");
+ normalize_table.put("検", "檢");
+ normalize_table.put("楼", "樓");
+ normalize_table.put("楽", "樂");
+ normalize_table.put("様", "樣");
+ normalize_table.put("槙", "槇");
+ normalize_table.put("権", "權");
+ normalize_table.put("横", "橫");
+ normalize_table.put("檪", "櫟");
+ normalize_table.put("櫺", "欞");
+ normalize_table.put("欧", "歐");
+ normalize_table.put("歓", "歡");
+ normalize_table.put("歩", "步");
+ normalize_table.put("歯", "齒");
+ normalize_table.put("歳", "歲");
+ normalize_table.put("歴", "歷");
+ normalize_table.put("残", "殘");
+ normalize_table.put("殱", "殲");
+ normalize_table.put("殴", "毆");
+ normalize_table.put("殻", "殼");
+ normalize_table.put("毁", "毀");
+ normalize_table.put("毎", "每");
+ normalize_table.put("毗", "毘");
+ normalize_table.put("気", "氣");
+ normalize_table.put("氷", "冰");
+ normalize_table.put("污", "汚");
+ normalize_table.put("沢", "澤");
+ normalize_table.put("浄", "淨");
+ normalize_table.put("浅", "淺");
+ normalize_table.put("浜", "濱");
+ normalize_table.put("涙", "淚");
+ normalize_table.put("涛", "濤");
+ normalize_table.put("涜", "瀆");
+ normalize_table.put("清", "淸");
+ normalize_table.put("渇", "渴");
+ normalize_table.put("済", "濟");
+ normalize_table.put("渋", "澁");
+ normalize_table.put("渓", "溪");
+ normalize_table.put("温", "溫");
+ normalize_table.put("湾", "灣");
+ normalize_table.put("湿", "濕");
+ normalize_table.put("満", "滿");
+ normalize_table.put("滞", "滯");
+ normalize_table.put("潅", "灌");
+ normalize_table.put("潜", "潛");
+ normalize_table.put("灯", "燈");
+ normalize_table.put("炉", "爐");
+ normalize_table.put("点", "點");
+ normalize_table.put("為", "爲");
+ normalize_table.put("焔", "焰");
+ normalize_table.put("焼", "燒");
+ normalize_table.put("状", "狀");
+ normalize_table.put("独", "獨");
+ normalize_table.put("狭", "狹");
+ normalize_table.put("猟", "獵");
+ normalize_table.put("猪", "猪");
+ normalize_table.put("献", "獻");
+ normalize_table.put("獣", "獸");
+ normalize_table.put("珱", "瓔");
+ normalize_table.put("瑶", "瑤");
+ normalize_table.put("瓶", "甁");
+ normalize_table.put("産", "產");
+ normalize_table.put("画", "畫");
+ normalize_table.put("畳", "疊");
+ normalize_table.put("畴", "疇");
+ normalize_table.put("痴", "癡");
+ normalize_table.put("発", "發");
+ normalize_table.put("皋", "皐");
+ normalize_table.put("盗", "盜");
+ normalize_table.put("県", "縣");
+ normalize_table.put("真", "眞");
+ normalize_table.put("眾", "衆");
+ normalize_table.put("砕", "碎");
+ normalize_table.put("砺", "礪");
+ normalize_table.put("砿", "礦");
+ normalize_table.put("碍", "礙");
+ normalize_table.put("礼", "禮");
+ normalize_table.put("祢", "禰");
+ normalize_table.put("祷", "禱");
+ normalize_table.put("禄", "祿");
+ normalize_table.put("禅", "禪");
+ normalize_table.put("秘", "祕");
+ normalize_table.put("称", "稱");
+ normalize_table.put("稲", "稻");
+ normalize_table.put("穂", "穗");
+ normalize_table.put("穏", "穩");
+ normalize_table.put("穐", "龝");
+ normalize_table.put("穣", "穰");
+ normalize_table.put("窃", "竊");
+ normalize_table.put("窓", "窗");
+ normalize_table.put("竜", "龍");
+ normalize_table.put("競", "竸");
+ normalize_table.put("筝", "箏");
+ normalize_table.put("篭", "籠");
+ normalize_table.put("簑", "簔");
+ normalize_table.put("籖", "籤");
+ normalize_table.put("粋", "粹");
+ normalize_table.put("粛", "肅");
+ normalize_table.put("粤", "粵");
+ normalize_table.put("糸", "絲");
+ normalize_table.put("経", "經");
+ normalize_table.put("絵", "繪");
+ normalize_table.put("絶", "絕");
+ normalize_table.put("継", "繼");
+ normalize_table.put("続", "續");
+ normalize_table.put("総", "總");
+ normalize_table.put("緑", "綠");
+ normalize_table.put("緒", "緖");
+ normalize_table.put("緕", "纃");
+ normalize_table.put("縁", "緣");
+ normalize_table.put("纒", "纏");
+ normalize_table.put("縄", "繩");
+ normalize_table.put("縦", "縱");
+ normalize_table.put("繊", "纖");
+ normalize_table.put("繋", "繫");
+ normalize_table.put("翆", "翠");
+ normalize_table.put("聡", "聰");
+ normalize_table.put("聴", "聽");
+ normalize_table.put("胆", "膽");
+ normalize_table.put("脱", "脫");
+ normalize_table.put("脳", "腦");
+ normalize_table.put("臓", "臟");
+ normalize_table.put("舉", "擧");
+ normalize_table.put("舎", "舍");
+ normalize_table.put("舮", "艫");
+ normalize_table.put("艶", "艷");
+ normalize_table.put("芦", "蘆");
+ normalize_table.put("芸", "藝");
+ normalize_table.put("苔", "薹");
+ normalize_table.put("茎", "莖");
+ normalize_table.put("荘", "莊");
+ normalize_table.put("蔵", "藏");
+ normalize_table.put("薫", "薰");
+ normalize_table.put("薬", "藥");
+ normalize_table.put("薮", "藪");
+ normalize_table.put("虚", "虛");
+ normalize_table.put("虫", "蟲");
+ normalize_table.put("虱", "蝨");
+ normalize_table.put("蚕", "蠶");
+ normalize_table.put("蛍", "螢");
+ normalize_table.put("蛎", "蠣");
+ normalize_table.put("蛮", "蠻");
+ normalize_table.put("蝿", "蠅");
+ normalize_table.put("衛", "衞");
+ normalize_table.put("装", "裝");
+ normalize_table.put("褒", "襃");
+ normalize_table.put("褝", "襌");
+ normalize_table.put("覚", "覺");
+ normalize_table.put("覧", "覽");
+ normalize_table.put("観", "觀");
+ normalize_table.put("触", "觸");
+ normalize_table.put("訳", "譯");
+ normalize_table.put("証", "證");
+ normalize_table.put("誉", "譽");
+ normalize_table.put("説", "說");
+ normalize_table.put("読", "讀");
+ normalize_table.put("諌", "諫");
+ normalize_table.put("謡", "謠");
+ normalize_table.put("譛", "譖");
+ normalize_table.put("譲", "讓");
+ normalize_table.put("讃", "讚");
+ normalize_table.put("豊", "豐");
+ normalize_table.put("賎", "賤");
+ normalize_table.put("賛", "贊");
+ normalize_table.put("践", "踐");
+ normalize_table.put("転", "轉");
+ normalize_table.put("軽", "輕");
+ normalize_table.put("輌", "輛");
+ normalize_table.put("辞", "辭");
+ normalize_table.put("辺", "邊");
+ normalize_table.put("迩", "邇");
+ normalize_table.put("逓", "遞");
+ normalize_table.put("遅", "遲");
+ normalize_table.put("遥", "遙");
+ normalize_table.put("郎", "郞");
+ normalize_table.put("郷", "鄕");
+ normalize_table.put("鄉", "鄕");
+ normalize_table.put("酔", "醉");
+ normalize_table.put("醸", "釀");
+ normalize_table.put("釈", "釋");
+ normalize_table.put("鈎", "鉤");
+ normalize_table.put("鈩", "鑪");
+ normalize_table.put("鈬", "鐸");
+ normalize_table.put("鉄", "鐵");
+ normalize_table.put("鉱", "鑛");
+ normalize_table.put("銭", "錢");
+ normalize_table.put("鋭", "銳");
+ normalize_table.put("鋳", "鑄");
+ normalize_table.put("録", "錄");
+ normalize_table.put("鎮", "鎭");
+ normalize_table.put("鑚", "鑽");
+ normalize_table.put("関", "關");
+ normalize_table.put("閲", "閱");
+ normalize_table.put("闘", "鬪");
+ normalize_table.put("陥", "陷");
+ normalize_table.put("険", "險");
+ normalize_table.put("随", "隨");
+ normalize_table.put("隠", "隱");
+ normalize_table.put("隣", "鄰");
+ normalize_table.put("隷", "隸");
+ normalize_table.put("雑", "雜");
+ normalize_table.put("霊", "靈");
+ normalize_table.put("青", "靑");
+ normalize_table.put("静", "靜");
+ normalize_table.put("頚", "頸");
+ normalize_table.put("頬", "頰");
+ normalize_table.put("頼", "賴");
+ normalize_table.put("顔", "顏");
+ normalize_table.put("顕", "顯");
+ normalize_table.put("顛", "顚");
+ normalize_table.put("飲", "飮");
+ normalize_table.put("餅", "餠");
+ normalize_table.put("駄", "馱");
+ normalize_table.put("駅", "驛");
+ normalize_table.put("駆", "驅");
+ normalize_table.put("騒", "騷");
+ normalize_table.put("験", "驗");
+ normalize_table.put("髄", "髓");
+ normalize_table.put("髪", "髮");
+ normalize_table.put("鯵", "鰺");
+ normalize_table.put("鰛", "鰮");
+ normalize_table.put("鴎", "鷗");
+ normalize_table.put("鴬", "鶯");
+ normalize_table.put("鶏", "鷄");
+ normalize_table.put("鷏", "鷆");
+ normalize_table.put("麁", "麤");
+ normalize_table.put("麦", "麥");
+ normalize_table.put("麸", "麩");
+ normalize_table.put("麺", "麪");
+ normalize_table.put("麽", "麼");
+ normalize_table.put("黄", "黃");
+ normalize_table.put("黒", "黑");
+ normalize_table.put("黙", "默");
+ normalize_table.put("鼡", "鼠");
+ normalize_table.put("齢", "齡");
+ normalize_table.put("朗", "朗");
+ normalize_table.put("猪", "猪");
+
+ }
+
+ static String normalize(String term)
+ {
+ for (Map.Entry entry : normalize_table.entrySet())
+ {
+ term = term.replace(entry.getKey().charAt(0), entry.getValue()
+ .charAt(0));
+ }
+ return term;
+ }
+
+ static final class Store
+ {
+ private final Hashtable indices = new Hashtable();
+
+ public T getDatedData(URL url, ProgressMonitor monitor,
+ DataBuilder builder) throws CancelledByUser
+ {
+ String url_str = url.toString().intern();
+ /*
+ * Although there is no support for concurrent access of Index
+ * objects, we need to synchronize at this point. This is required
+ * because multiple libraries which do not talk to each other could
+ * be using this code simultaneously. If library A access an Index
+ * object with URL U at the same time library B access its own Index
+ * object with URL U then, because the two URLs are the same, there
+ * is a risk of concurrent access here.
+ */
+ synchronized (indices)
+ {
+ T ds = indices.get(url_str);
+
+ if (ds == null)
+ {
+ ds = Util.loadData(url, monitor, builder);
+ indices.put(url_str, ds);
+ }
+
+ return ds;
+ }
+ }
+ }
+
+
+ static T loadData(URL url, ProgressMonitor monitor,
+ DataBuilder builder) throws CancelledByUser
+ {
+ // The goal is as follow:
+ // - If there is no cached data, load the index from the web site.
+ // - If there is cached data but the index on the web site is newer,
+ // load the index from the web site.
+ // - If the web site is not responding, used the cached data.
+ // - If the web site is not responding and there is no cached data,
+ // return the empty set.
+
+ CACHE_PATH.mkdirs();
+
+ long date = 0;
+
+ HttpURLConnection conn = null;
+
+ // First try to see whether it is cached.
+ File cache_path = null;
+ try
+ {
+ cache_path = new File(CACHE_PATH, url.toURI().toASCIIString()
+ .replaceAll("[:/]", "_"));
+
+ }
+ catch (URISyntaxException e)
+ {
+ throw new Error("cannot convert URL to URI: " + url.toString(), e);
+ }
+
+ DataInputStream in_file = null;
+ try
+ {
+ in_file = new DataInputStream(new GZIPInputStream(
+ new FileInputStream(cache_path)));
+ builder.loadPreamble(in_file);
+ if (builder.checkVersion())
+ {
+ date = builder.getDate();
+
+ // Check whether the data on the web site is newer
+ boolean read_the_cache = true;
+ try
+ {
+ conn = (HttpURLConnection) url.openConnection();
+ conn.setIfModifiedSince(date);
+
+ conn.connect();
+
+ // If we get the response HTTP_NOT_MODIFIED we want to read
+ // the cache.
+ // If we get any response other than HTTP_OK we want to read
+ // the cache
+ // because those are likely errors of communication.
+ // If we get HTTP_OK, it means that the data on the web site
+ // is newer.
+ read_the_cache = (conn.getResponseCode() != HttpURLConnection.HTTP_OK);
+ logger.log(Level.FINE, "HTTP response: "
+ + conn.getResponseCode());
+ }
+ catch (IOException e)
+ {
+ // The web site is not accessible... read the cache.
+ }
+
+ if (read_the_cache)
+ {
+ logger.log(Level.FINE, "reading cache: "
+ + cache_path.toString());
+ builder.readFromCache(in_file);
+ }
+ in_file.close();
+ }
+ else
+ {
+ // Else, act as if there were no cache.
+ logger.log(Level.FINE, "older cache: " + cache_path);
+ }
+ }
+ catch (FileNotFoundException e)
+ {
+ // Ok, the cache does not exist. Just ignore.
+ }
+ catch (IOException e)
+ {
+ // Ok, there was a problem reading the cache.
+ // Flush and ignore.
+ }
+
+ T ret = builder.getData();
+
+ if (ret != null)
+ {
+ // We got our data from the cache: no need to fetch from web or to
+ // save to cache: return now!
+ return ret;
+ }
+
+ // Otherwise, fetch from web site.
+
+ logger.log(Level.FINE, "fetching data from web site: " + url);
+ try
+ {
+ // It may happen that conn is null.
+ if (conn == null)
+ {
+ conn = (HttpURLConnection) url.openConnection();
+ conn.connect();
+ }
+
+ // Fail silently if we get anything else than HTTP_OK.
+ if (conn.getResponseCode() == HttpURLConnection.HTTP_OK)
+ {
+ if (monitor != null)
+ {
+ monitor.setMinimum(0);
+ monitor.setMaximum(conn.getContentLength());
+ }
+ date = conn.getDate();
+ InputStream raw_stream = (monitor == null) ? conn
+ .getInputStream() : new MonitoredInputStream(conn
+ .getInputStream(), monitor);
+ BufferedReader reader = new BufferedReader(
+ new InputStreamReader(new GZIPInputStream(raw_stream),
+ "UTF-8"));
+ try
+ {
+ builder.setDate(date);
+ builder.readFromWeb(reader, monitor);
+ }
+ finally
+ {
+ reader.close();
+ if (monitor != null)
+ {
+ monitor.close();
+ }
+ }
+ }
+ else
+ {
+ logger.log(Level.FINE,
+ "fetching data from web site failed with code: "
+ + conn.getResponseCode());
+ }
+ }
+ catch (IOException e)
+ {
+ throw new Error(e);
+ }
+
+ ret = builder.getData();
+
+ if (ret != null)
+ {
+ // Save the Set to the cache
+ DataOutputStream out_file = null;
+ try
+ {
+ out_file = new DataOutputStream(new GZIPOutputStream(
+ new FileOutputStream(cache_path)));
+ builder.writeToCache(out_file);
+ out_file.flush();
+ out_file.close();
+ }
+ catch (FileNotFoundException e)
+ {
+ // Ignore. It means there is some architectural issue which
+ // prevents
+ // us to cache.
+ // This is not a fatal problem.
+ }
+ catch (IOException e)
+ {
+ // Cleanup and otherwise ignore as above.
+ if (out_file != null)
+ {
+ try
+ {
+ out_file.close();
+ }
+ catch (IOException e1)
+ {
+ // This is strange and would most likely be the result
+ // of a
+ // serious system problem. This is not the place to
+ // report
+ // serious system problems.
+ }
+ cache_path.delete();
+ }
+ }
+ }
+ else
+ {
+ logger.log(Level.FINE, "unable to fetch from cache or web site: "
+ + url);
+ }
+
+ return ret;
+ }
+
+}
diff -Nru java-webdict-lib-0.3ubuntu3/logging.properties java-webdict-lib-0.4/logging.properties
--- java-webdict-lib-0.3ubuntu3/logging.properties 2008-11-07 07:59:58.000000000 +0000
+++ java-webdict-lib-0.4/logging.properties 2010-07-14 15:01:00.000000000 +0000
@@ -1,3 +1,3 @@
handlers= java.util.logging.ConsoleHandler
java.util.logging.ConsoleHandler.level=ALL
-com.lddubeau.ddb.Index.level=FINEST
+com.lddubeau.ddb.Util.level=FINEST
diff -Nru java-webdict-lib-0.3ubuntu3/tests/fetch.lexicon.txt java-webdict-lib-0.4/tests/fetch.lexicon.txt
--- java-webdict-lib-0.3ubuntu3/tests/fetch.lexicon.txt 1970-01-01 00:00:00.000000000 +0000
+++ java-webdict-lib-0.4/tests/fetch.lexicon.txt 2010-07-17 12:30:04.000000000 +0000
@@ -0,0 +1,3 @@
+@@@ com.lddubeau.ddb.Util loadData
+FINE: fetching data from web site: http://www.acmuller.net/download/buddhdic.txt.gz
+one
diff -Nru java-webdict-lib-0.3ubuntu3/tests/fetch.txt java-webdict-lib-0.4/tests/fetch.txt
--- java-webdict-lib-0.3ubuntu3/tests/fetch.txt 2008-11-07 08:04:44.000000000 +0000
+++ java-webdict-lib-0.4/tests/fetch.txt 2010-07-14 15:01:18.000000000 +0000
@@ -1,2 +1,2 @@
-@@@ com.lddubeau.ddb.Index loadIndex
-FINE: fetching data from web sitehttp://www.acmuller.net/download/ddb-hdwd.txt.gz
+@@@ com.lddubeau.ddb.Util loadData
+FINE: fetching data from web site: http://www.acmuller.net/download/ddb-hdwd.txt.gz
diff -Nru java-webdict-lib-0.3ubuntu3/tests/usecache.lexicon.txt java-webdict-lib-0.4/tests/usecache.lexicon.txt
--- java-webdict-lib-0.3ubuntu3/tests/usecache.lexicon.txt 1970-01-01 00:00:00.000000000 +0000
+++ java-webdict-lib-0.4/tests/usecache.lexicon.txt 2010-07-17 12:29:55.000000000 +0000
@@ -0,0 +1,5 @@
+@@@ com.lddubeau.ddb.Util loadData
+FINE: HTTP response: 304
+@@@ com.lddubeau.ddb.Util loadData
+FINE: reading cache: tests/testcache/http___www.acmuller.net_download_buddhdic.txt.gz
+Buddha
diff -Nru java-webdict-lib-0.3ubuntu3/tests/usecache.txt java-webdict-lib-0.4/tests/usecache.txt
--- java-webdict-lib-0.3ubuntu3/tests/usecache.txt 2008-11-07 08:08:27.000000000 +0000
+++ java-webdict-lib-0.4/tests/usecache.txt 2010-07-14 15:01:30.000000000 +0000
@@ -1,4 +1,4 @@
-@@@ com.lddubeau.ddb.Index loadIndex
+@@@ com.lddubeau.ddb.Util loadData
FINE: HTTP response: 304
-@@@ com.lddubeau.ddb.Index loadIndex
+@@@ com.lddubeau.ddb.Util loadData
FINE: reading cache: tests/testcache/http___www.acmuller.net_download_ddb-hdwd.txt.gz
diff -Nru java-webdict-lib-0.3ubuntu3/test_webdict.py java-webdict-lib-0.4/test_webdict.py
--- java-webdict-lib-0.3ubuntu3/test_webdict.py 2008-11-07 08:26:36.000000000 +0000
+++ java-webdict-lib-0.4/test_webdict.py 2010-07-15 21:16:05.000000000 +0000
@@ -1,16 +1,25 @@
+# coding=utf8
import pydunit
import os
import re
-date_re = re.compile(r"^.*?( com.lddubeau.ddb.Index.*)", re.MULTILINE)
+date_re = re.compile(r"^.*?( com.lddubeau.ddb.Util.*)", re.MULTILINE)
def date_filter(str):
return date_re.sub(r"@@@\1", str)
-def test_load_from_website():
+def test_load_index_from_website():
os.system("rm -rf tests/testcache")
pydunit.execTest(["java", "-Djava.util.logging.config.file=./logging.properties", "-classpath", "jobj", "com.lddubeau.ddb.Index", "tests/testcache", "http://www.acmuller.net/download/ddb-hdwd.txt.gz"], file_expected="tests/fetch.txt", status_expected=0, filter=date_filter)
-def test_load_from_cache():
+def test_load_index_from_cache():
pydunit.execTest(["java", "-Djava.util.logging.config.file=./logging.properties", "-classpath", "jobj", "com.lddubeau.ddb.Index", "tests/testcache", "http://www.acmuller.net/download/ddb-hdwd.txt.gz"], file_expected="tests/usecache.txt", status_expected=0, filter=date_filter)
os.system("rm -rf tests/testcache")
+
+def test_load_lexicon_from_website():
+ os.system("rm -rf tests/testcache")
+ pydunit.execTest(["java", "-Djava.util.logging.config.file=./logging.properties", "-classpath", "jobj", "com.lddubeau.ddb.BasicLexicon", "tests/testcache", "http://www.acmuller.net/download/buddhdic.txt.gz", u"一"], file_expected="tests/fetch.lexicon.txt", status_expected=0, filter=date_filter)
+
+def test_load_lexicon_from_cache():
+ pydunit.execTest(["java", "-Djava.util.logging.config.file=./logging.properties", "-classpath", "jobj", "com.lddubeau.ddb.BasicLexicon", "tests/testcache", "http://www.acmuller.net/download/buddhdic.txt.gz", u"佛"], file_expected="tests/usecache.lexicon.txt", status_expected=0, filter=date_filter)
+ os.system("rm -rf tests/testcache")
Binary files /tmp/n5Z48W8xYy/java-webdict-lib-0.3ubuntu3/test_webdict.pyc and /tmp/w3PTQYlB4I/java-webdict-lib-0.4/test_webdict.pyc differ