diff -Nru java-webdict-lib-0.3ubuntu3/build.xml java-webdict-lib-0.4/build.xml --- java-webdict-lib-0.3ubuntu3/build.xml 2009-02-07 10:48:19.000000000 +0000 +++ java-webdict-lib-0.4/build.xml 2011-04-20 23:18:36.000000000 +0000 @@ -4,7 +4,7 @@ - + diff -Nru java-webdict-lib-0.3ubuntu3/debian/changelog java-webdict-lib-0.4/debian/changelog --- java-webdict-lib-0.3ubuntu3/debian/changelog 2009-04-24 12:41:10.000000000 +0000 +++ java-webdict-lib-0.4/debian/changelog 2011-04-20 23:19:40.000000000 +0000 @@ -1,3 +1,10 @@ +java-webdict-lib (0.4) maverick; urgency=low + + * Added support for lexicons. + * Updated packaging for maverick. + + -- Louis-Dominique Dubeau Wed, 20 Apr 2011 19:18:56 -0400 + java-webdict-lib (0.3ubuntu3) jaunty; urgency=low * Added support for variant readings. diff -Nru java-webdict-lib-0.3ubuntu3/debian/control java-webdict-lib-0.4/debian/control --- java-webdict-lib-0.3ubuntu3/debian/control 2009-04-24 12:42:03.000000000 +0000 +++ java-webdict-lib-0.4/debian/control 2011-04-20 23:30:59.000000000 +0000 @@ -2,13 +2,13 @@ Section: universe/libs Priority: extra Maintainer: Louis-Dominique Dubeau -Build-Depends: ant, debhelper (>= 7) -Build-Depends-Indep: ldd-ant (>= 0.8), default-jdk -Standards-Version: 3.8.0 +Build-Depends: ant, debhelper (>= 8) +Build-Depends-Indep: ldd-ant (>= 0.10), default-jdk +Standards-Version: 3.9.1 Package: java-webdict-lib Architecture: all -Depends: default-jre (>= 1.6) | default-jre-headless (>= 1.6) | java6-runtime | java6-runtime-headless +Depends: ${misc:Depends}, default-jre (>= 1.6) | default-jre-headless (>= 1.6) | java6-runtime | java6-runtime-headless Description: Library to examine and manipulate indices of online dictionaries This library provides Index objects to determine which words are present in web libraries. diff -Nru java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/BasicLexicon.java java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/BasicLexicon.java --- java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/BasicLexicon.java 1970-01-01 00:00:00.000000000 +0000 +++ java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/BasicLexicon.java 2010-07-17 12:28:12.000000000 +0000 @@ -0,0 +1,374 @@ +package com.lddubeau.ddb; + +import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Map; +import java.util.TreeMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public final class BasicLexicon +{ + + private static final int CACHE_FILE_VERSION = 1; + + private final URL url; + + private final ProgressMonitor monitor; + + /** + * This constructor creates a new Index object which allows to check whether + * a word exists in an index. + * + * @param url + * The URL where the index is located on the web. + * @throws MalformedURLException + * When the URL is incorrect. + */ + public BasicLexicon(String url) throws MalformedURLException + { + this(url, null); + } + + /** + * This constructor creates a new Index object which allows to check whether + * a word exists in an index. + * + * @param url + * The URL where the index is located on the web. + * @param monitor + * The monitor to use to check progress. + * @throws MalformedURLException + * When the URL is incorrect. + */ + public BasicLexicon(String url, ProgressMonitor monitor) + throws MalformedURLException + { + if (url == null) + { + throw new NullPointerException("url is null"); + } + this.url = new URL(url); + this.monitor = monitor; + } + + /** + * This method verifies whether a term exists in the index. + * + * @param term + * The term to lookup. + * @return True if the term is present in the index, false if not. + */ + public boolean exists(String term) throws CancelledByUser + { + if (term == null) + { + throw new NullPointerException("term is null"); + } + return exists(this.url, this.monitor, term); + } + + /** + * This method returns the length of the longest term in the index. + * + * @return The length. + */ + public int getLongestTermLength() throws CancelledByUser + { + return getLongestTermLength(this.url, this.monitor); + } + + /** + * This method returns the data associated with a term. + * + * @return The data. + */ + public String getTermData(String term) throws CancelledByUser + { + return this.exists(term) + ? getTermData(this.url, this.monitor, term) + : null; + } + + private static final class DatedMap implements DatedData + { + public final long date; + + public final Map map; + + public final int longest; + + public DatedMap(long date, Map map, int longest) + { + this.date = date; + this.map = map; + this.longest = longest; + } + + /** + * @see com.lddubeau.ddb.DatedData#getDate() + */ + @Override + public long getDate() + { + return this.date; + } + + } + + private static final class DatedMapBuilder implements DataBuilder + { + private int version = 0; + + private long date = 0; + + private int longest = -1; + + private Map ret; + + @Override + public DatedMap getData() + { + return (this.ret == null) ? null : new DatedMap(this.date, + this.ret, this.longest); + } + + @Override + public long getDate() + { + return this.date; + } + + @Override + public void readFromCache(DataInputStream in) + { + try + { + this.longest = in.readInt(); + int num = in.readInt(); + + this.ret = new TreeMap(); + for (int i = 0; i < num; i++) + { + this.ret.put(in.readUTF(), in.readUTF()); + } + + if (in.read() > -1) + { + // Eek, we did not read all the data. + // Flush... + this.ret = null; + } + } + catch (Exception e) + { + // Assume a corrupted cache + this.ret = null; + } + } + + /** + * @see com.lddubeau.ddb.DataBuilder#setDate(long) + */ + @Override + public void setDate(long date) + { + this.date = date; + + } + + @Override + public void readFromWeb(BufferedReader in, ProgressMonitor monitor) + throws CancelledByUser, IOException + { + final Pattern xml_re = Pattern.compile("^<\\?xml", + Pattern.CASE_INSENSITIVE); + final Pattern head_re = Pattern.compile("(.*?)", + Pattern.CASE_INSENSITIVE); + final Pattern meaning_re = Pattern.compile( + "^
.*?Meaning.*?:(.*)", Pattern.CASE_INSENSITIVE); + final Pattern clean_re = Pattern.compile("<.*?>"); + final Pattern close_p_re = Pattern.compile("

", + Pattern.CASE_INSENSITIVE); + + String line = in.readLine(); + this.ret = new TreeMap(); + try + { + String head = null; + String meaning = null; + boolean capturing_meaning = false; + while (line != null) + { + if (monitor != null && monitor.isCancelled()) + { + throw new CancelledByUser(); + } + line = line.trim(); + + if ((line.length() > 0) && !xml_re.matcher(line).find()) + { + // We have a line which contains more than white space + // and is not the + // longest) + { + longest = head.length(); + } + head = null; + meaning = null; + capturing_meaning = false; + } + } + } + line = in.readLine(); + } + } + // Any exception means we were unsuccessful so we flush ret + catch (IOException e) + { + this.ret = null; + throw e; + } + catch (CancelledByUser e) + { + this.ret = null; + throw e; + } + catch (Exception e) + { + this.ret = null; + } + } + + @Override + public void writeToCache(DataOutputStream out) throws IOException + { + out.writeInt(CACHE_FILE_VERSION); + out.writeLong(date); + out.writeInt(longest); + out.writeInt(ret.size()); + for (Map.Entry entry : this.ret.entrySet()) + { + out.writeUTF(entry.getKey()); + out.writeUTF(entry.getValue()); + } + } + + @Override + public boolean checkVersion() + { + return (this.version == CACHE_FILE_VERSION); + } + + @Override + public void loadPreamble(DataInputStream in) + { + try + { + this.version = in.readInt(); + if (this.checkVersion()) + { + this.date = in.readLong(); + } + } + catch (IOException e) + { + // Ignore the exception: act as if the cache is corrupted. + } + } + + } + + private static final Util.Store store = new Util.Store(); + + private static boolean exists(URL url, ProgressMonitor monitor, String term) + throws CancelledByUser + { + return store.getDatedData(url, monitor, new DatedMapBuilder()).map + .containsKey(Util.normalize(term)); + } + + private static int getLongestTermLength(URL url, ProgressMonitor monitor) + throws CancelledByUser + { + return store.getDatedData(url, monitor, new DatedMapBuilder()).longest; + } + + private static String getTermData(URL url, ProgressMonitor monitor, + String term) throws CancelledByUser + { + return store.getDatedData(url, monitor, new DatedMapBuilder()).map + .get(Util.normalize(term)); + } + + public static void main(String [] argv) + { + // This ought to be used for testing only + try + { + Util.CACHE_PATH = new File(argv[0]); + BasicLexicon lex = new BasicLexicon(argv[1]); + System.out.println(lex.getTermData(argv[2])); + } + catch (Exception e) + { + throw new Error(e); + } + } + +} diff -Nru java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/DataBuilder.java java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/DataBuilder.java --- java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/DataBuilder.java 1970-01-01 00:00:00.000000000 +0000 +++ java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/DataBuilder.java 2010-07-14 14:14:12.000000000 +0000 @@ -0,0 +1,25 @@ +package com.lddubeau.ddb; + +import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; + +interface DataBuilder +{ + void loadPreamble(DataInputStream in); + + boolean checkVersion(); + + long getDate(); + + void readFromCache(DataInputStream in); + + void writeToCache(DataOutputStream out) throws IOException; + + void setDate(long date); + + void readFromWeb(BufferedReader in, ProgressMonitor monitor) throws CancelledByUser, IOException; + + T getData(); +} diff -Nru java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/DatedData.java java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/DatedData.java --- java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/DatedData.java 1970-01-01 00:00:00.000000000 +0000 +++ java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/DatedData.java 2010-07-14 15:51:27.000000000 +0000 @@ -0,0 +1,6 @@ +package com.lddubeau.ddb; + +interface DatedData +{ + long getDate(); +} diff -Nru java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/Index.java java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/Index.java --- java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/Index.java 2009-02-07 08:49:49.000000000 +0000 +++ java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/Index.java 2010-07-14 19:29:10.000000000 +0000 @@ -4,488 +4,27 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.net.HttpURLConnection; import java.net.MalformedURLException; -import java.net.URISyntaxException; import java.net.URL; import java.util.HashSet; -import java.util.Hashtable; -import java.util.Locale; -import java.util.Map; import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; import java.util.regex.Pattern; -import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; /** * This class is designed to model simple indices that some web dictionaries * export. These indices are expected to contain only a list of the words * present in the dictionary. Internally, the class caches the indices so if two * Index objects are created with the same URL, only one data structure exists - * internally. The index object itself merely knows what its URL is.? + * internally. The index object itself merely knows what its URL is. * * @author ldd */ public final class Index { - private static final Logger logger = Logger - .getLogger(Index.class.getName()); - private static final Hashtable normalize_table = new Hashtable(); private static final int CACHE_FILE_VERSION = 1; - private static File CACHE_PATH = null; - static - { - String os_name = System.getProperty("os.name").toLowerCase(Locale.US); - if ((os_name.indexOf("linux") != -1) || (os_name.indexOf("unix") != -1) - || (os_name.indexOf("solaris") != -1)) - { - CACHE_PATH = new File(System.getProperty("user.home"), - ".webdict-lib"); - } - else if (os_name.indexOf("windows") != -1) - { - CACHE_PATH = new File(System.getenv("USERPROFILE"), "WebDict-Lib"); - } - else - { - throw new Error( - "cannot parse operating system name and adjust parameters: " - + os_name); - } - normalize_table.put("万", "萬"); - normalize_table.put("与", "與"); - normalize_table.put("両", "兩"); - normalize_table.put("並", "竝"); - normalize_table.put("乗", "乘"); - normalize_table.put("乱", "亂"); - normalize_table.put("亀", "龜"); - normalize_table.put("予", "豫"); - normalize_table.put("争", "爭"); - normalize_table.put("亘", "亙"); - normalize_table.put("亜", "亞"); - normalize_table.put("仏", "佛"); - normalize_table.put("仮", "假"); - normalize_table.put("会", "會"); - normalize_table.put("伜", "倅"); - normalize_table.put("伝", "傳"); - normalize_table.put("体", "體"); - normalize_table.put("余", "餘"); - normalize_table.put("佞", "侫"); - normalize_table.put("你", "儞"); - normalize_table.put("併", "倂"); - normalize_table.put("価", "價"); - normalize_table.put("侭", "儘"); - normalize_table.put("俱", "倶"); - normalize_table.put("値", "值"); - normalize_table.put("倹", "儉"); - normalize_table.put("偷", "偸"); - normalize_table.put("偽", "僞"); - normalize_table.put("僣", "僭"); - normalize_table.put("児", "兒"); - normalize_table.put("党", "黨"); - normalize_table.put("内", "內"); - normalize_table.put("円", "圓"); - normalize_table.put("冊", "册"); - normalize_table.put("写", "寫"); - normalize_table.put("処", "處"); - normalize_table.put("函", "凾"); - normalize_table.put("剎", "刹"); - normalize_table.put("剣", "劍"); - normalize_table.put("剤", "劑"); - normalize_table.put("剰", "剩"); - normalize_table.put("励", "勵"); - normalize_table.put("労", "勞"); - normalize_table.put("効", "效"); - normalize_table.put("勅", "敕"); - normalize_table.put("勧", "勸"); - normalize_table.put("勲", "勳"); - normalize_table.put("区", "區"); - normalize_table.put("医", "醫"); - normalize_table.put("卆", "卒"); - normalize_table.put("単", "單"); - normalize_table.put("即", "卽"); - normalize_table.put("历", "歷"); - normalize_table.put("厨", "廚"); - normalize_table.put("厩", "廐"); - normalize_table.put("厳", "嚴"); - normalize_table.put("参", "參"); - normalize_table.put("双", "雙"); - normalize_table.put("収", "收"); - normalize_table.put("叙", "敍"); - normalize_table.put("号", "號"); - normalize_table.put("告", "吿"); - normalize_table.put("呑", "吞"); - normalize_table.put("唖", "啞"); - normalize_table.put("営", "營"); - normalize_table.put("啟", "啓"); - normalize_table.put("喻", "喩"); - normalize_table.put("嘱", "囑"); - normalize_table.put("嚢", "囊"); - normalize_table.put("回", "囘"); - normalize_table.put("団", "團"); - normalize_table.put("囲", "圍"); - normalize_table.put("図", "圖"); - normalize_table.put("国", "國"); - normalize_table.put("圏", "圈"); - normalize_table.put("圧", "壓"); - normalize_table.put("堕", "墮"); - normalize_table.put("塁", "壘"); - normalize_table.put("塩", "鹽"); - normalize_table.put("増", "增"); - normalize_table.put("壊", "壞"); - normalize_table.put("壌", "壤"); - normalize_table.put("壮", "壯"); - normalize_table.put("声", "聲"); - normalize_table.put("壱", "壹"); - normalize_table.put("売", "賣"); - normalize_table.put("壷", "壺"); - normalize_table.put("変", "變"); - normalize_table.put("奨", "奬"); - normalize_table.put("娛", "娯"); - normalize_table.put("嬢", "孃"); - normalize_table.put("学", "學"); - normalize_table.put("宝", "寶"); - normalize_table.put("実", "實"); - normalize_table.put("寝", "寢"); - normalize_table.put("対", "對"); - normalize_table.put("寿", "壽"); - normalize_table.put("専", "專"); - normalize_table.put("将", "將"); - normalize_table.put("尓", "爾"); - normalize_table.put("戾", "戻"); - normalize_table.put("尭", "堯"); - normalize_table.put("尽", "盡"); - normalize_table.put("届", "屆"); - normalize_table.put("屏", "屛"); - normalize_table.put("属", "屬"); - normalize_table.put("岳", "嶽"); - normalize_table.put("峡", "峽"); - normalize_table.put("島", "嶋"); - normalize_table.put("巌", "巖"); - normalize_table.put("巣", "巢"); - normalize_table.put("巻", "卷"); - normalize_table.put("帯", "帶"); - normalize_table.put("帰", "歸"); - normalize_table.put("并", "幷"); - normalize_table.put("庁", "廳"); - normalize_table.put("広", "廣"); - normalize_table.put("廃", "廢"); - normalize_table.put("廄", "廏"); - normalize_table.put("弁", "辯"); - normalize_table.put("弐", "貳"); - normalize_table.put("弑", "弒"); - normalize_table.put("弥", "彌"); - normalize_table.put("弯", "彎"); - normalize_table.put("弾", "彈"); - normalize_table.put("当", "當"); - normalize_table.put("径", "徑"); - normalize_table.put("従", "從"); - normalize_table.put("徳", "德"); - normalize_table.put("徴", "徵"); - normalize_table.put("応", "應"); - normalize_table.put("忰", "悴"); - normalize_table.put("怛", "憺"); - normalize_table.put("恒", "恆"); - normalize_table.put("恋", "戀"); - normalize_table.put("恵", "惠"); - normalize_table.put("悦", "悅"); - normalize_table.put("悩", "惱"); - normalize_table.put("悪", "惡"); - normalize_table.put("惨", "慘"); - normalize_table.put("愠", "慍"); - normalize_table.put("慎", "愼"); - normalize_table.put("懐", "懷"); - normalize_table.put("懴", "懺"); - normalize_table.put("戦", "戰"); - normalize_table.put("戯", "戲"); - normalize_table.put("戱", "戲"); - normalize_table.put("戸", "戶"); - normalize_table.put("払", "拂"); - normalize_table.put("抜", "拔"); - normalize_table.put("択", "擇"); - normalize_table.put("抬", "擡"); - normalize_table.put("担", "擔"); - normalize_table.put("拝", "拜"); - normalize_table.put("拠", "據"); - normalize_table.put("拡", "擴"); - normalize_table.put("挙", "擧"); - normalize_table.put("挟", "挾"); - normalize_table.put("捜", "搜"); - normalize_table.put("掲", "揭"); - normalize_table.put("摂", "攝"); - normalize_table.put("撹", "攪"); - normalize_table.put("敎", "教"); - normalize_table.put("数", "數"); - normalize_table.put("斉", "齊"); - normalize_table.put("斎", "齋"); - normalize_table.put("断", "斷"); - normalize_table.put("既", "旣"); - normalize_table.put("旧", "舊"); - normalize_table.put("昼", "晝"); - normalize_table.put("昿", "曠"); - normalize_table.put("晋", "晉"); - normalize_table.put("暁", "曉"); - normalize_table.put("暦", "曆"); - normalize_table.put("曽", "曾"); - normalize_table.put("条", "條"); - normalize_table.put("来", "來"); - normalize_table.put("枢", "樞"); - normalize_table.put("枦", "櫨"); - normalize_table.put("栄", "榮"); - normalize_table.put("桜", "櫻"); - normalize_table.put("桟", "棧"); - normalize_table.put("桧", "檜"); - normalize_table.put("梹", "檳"); - normalize_table.put("梼", "檮"); - normalize_table.put("検", "檢"); - normalize_table.put("楼", "樓"); - normalize_table.put("楽", "樂"); - normalize_table.put("様", "樣"); - normalize_table.put("槙", "槇"); - normalize_table.put("権", "權"); - normalize_table.put("横", "橫"); - normalize_table.put("檪", "櫟"); - normalize_table.put("櫺", "欞"); - normalize_table.put("欧", "歐"); - normalize_table.put("歓", "歡"); - normalize_table.put("歩", "步"); - normalize_table.put("歯", "齒"); - normalize_table.put("歳", "歲"); - normalize_table.put("歴", "歷"); - normalize_table.put("残", "殘"); - normalize_table.put("殱", "殲"); - normalize_table.put("殴", "毆"); - normalize_table.put("殻", "殼"); - normalize_table.put("毁", "毀"); - normalize_table.put("毎", "每"); - normalize_table.put("毗", "毘"); - normalize_table.put("気", "氣"); - normalize_table.put("氷", "冰"); - normalize_table.put("污", "汚"); - normalize_table.put("沢", "澤"); - normalize_table.put("浄", "淨"); - normalize_table.put("浅", "淺"); - normalize_table.put("浜", "濱"); - normalize_table.put("涙", "淚"); - normalize_table.put("涛", "濤"); - normalize_table.put("涜", "瀆"); - normalize_table.put("清", "淸"); - normalize_table.put("渇", "渴"); - normalize_table.put("済", "濟"); - normalize_table.put("渋", "澁"); - normalize_table.put("渓", "溪"); - normalize_table.put("温", "溫"); - normalize_table.put("湾", "灣"); - normalize_table.put("湿", "濕"); - normalize_table.put("満", "滿"); - normalize_table.put("滞", "滯"); - normalize_table.put("潅", "灌"); - normalize_table.put("潜", "潛"); - normalize_table.put("灯", "燈"); - normalize_table.put("炉", "爐"); - normalize_table.put("点", "點"); - normalize_table.put("為", "爲"); - normalize_table.put("焔", "焰"); - normalize_table.put("焼", "燒"); - normalize_table.put("状", "狀"); - normalize_table.put("独", "獨"); - normalize_table.put("狭", "狹"); - normalize_table.put("猟", "獵"); - normalize_table.put("猪", "猪"); - normalize_table.put("献", "獻"); - normalize_table.put("獣", "獸"); - normalize_table.put("珱", "瓔"); - normalize_table.put("瑶", "瑤"); - normalize_table.put("瓶", "甁"); - normalize_table.put("産", "產"); - normalize_table.put("画", "畫"); - normalize_table.put("畳", "疊"); - normalize_table.put("畴", "疇"); - normalize_table.put("痴", "癡"); - normalize_table.put("発", "發"); - normalize_table.put("皋", "皐"); - normalize_table.put("盗", "盜"); - normalize_table.put("県", "縣"); - normalize_table.put("真", "眞"); - normalize_table.put("眾", "衆"); - normalize_table.put("砕", "碎"); - normalize_table.put("砺", "礪"); - normalize_table.put("砿", "礦"); - normalize_table.put("碍", "礙"); - normalize_table.put("礼", "禮"); - normalize_table.put("祢", "禰"); - normalize_table.put("祷", "禱"); - normalize_table.put("禄", "祿"); - normalize_table.put("禅", "禪"); - normalize_table.put("秘", "祕"); - normalize_table.put("称", "稱"); - normalize_table.put("稲", "稻"); - normalize_table.put("穂", "穗"); - normalize_table.put("穏", "穩"); - normalize_table.put("穐", "龝"); - normalize_table.put("穣", "穰"); - normalize_table.put("窃", "竊"); - normalize_table.put("窓", "窗"); - normalize_table.put("竜", "龍"); - normalize_table.put("競", "竸"); - normalize_table.put("筝", "箏"); - normalize_table.put("篭", "籠"); - normalize_table.put("簑", "簔"); - normalize_table.put("籖", "籤"); - normalize_table.put("粋", "粹"); - normalize_table.put("粛", "肅"); - normalize_table.put("粤", "粵"); - normalize_table.put("糸", "絲"); - normalize_table.put("経", "經"); - normalize_table.put("絵", "繪"); - normalize_table.put("絶", "絕"); - normalize_table.put("継", "繼"); - normalize_table.put("続", "續"); - normalize_table.put("総", "總"); - normalize_table.put("緑", "綠"); - normalize_table.put("緒", "緖"); - normalize_table.put("緕", "纃"); - normalize_table.put("縁", "緣"); - normalize_table.put("纒", "纏"); - normalize_table.put("縄", "繩"); - normalize_table.put("縦", "縱"); - normalize_table.put("繊", "纖"); - normalize_table.put("繋", "繫"); - normalize_table.put("翆", "翠"); - normalize_table.put("聡", "聰"); - normalize_table.put("聴", "聽"); - normalize_table.put("胆", "膽"); - normalize_table.put("脱", "脫"); - normalize_table.put("脳", "腦"); - normalize_table.put("臓", "臟"); - normalize_table.put("舉", "擧"); - normalize_table.put("舎", "舍"); - normalize_table.put("舮", "艫"); - normalize_table.put("艶", "艷"); - normalize_table.put("芦", "蘆"); - normalize_table.put("芸", "藝"); - normalize_table.put("苔", "薹"); - normalize_table.put("茎", "莖"); - normalize_table.put("荘", "莊"); - normalize_table.put("蔵", "藏"); - normalize_table.put("薫", "薰"); - normalize_table.put("薬", "藥"); - normalize_table.put("薮", "藪"); - normalize_table.put("虚", "虛"); - normalize_table.put("虫", "蟲"); - normalize_table.put("虱", "蝨"); - normalize_table.put("蚕", "蠶"); - normalize_table.put("蛍", "螢"); - normalize_table.put("蛎", "蠣"); - normalize_table.put("蛮", "蠻"); - normalize_table.put("蝿", "蠅"); - normalize_table.put("衛", "衞"); - normalize_table.put("装", "裝"); - normalize_table.put("褒", "襃"); - normalize_table.put("褝", "襌"); - normalize_table.put("覚", "覺"); - normalize_table.put("覧", "覽"); - normalize_table.put("観", "觀"); - normalize_table.put("触", "觸"); - normalize_table.put("訳", "譯"); - normalize_table.put("証", "證"); - normalize_table.put("誉", "譽"); - normalize_table.put("説", "說"); - normalize_table.put("読", "讀"); - normalize_table.put("諌", "諫"); - normalize_table.put("謡", "謠"); - normalize_table.put("譛", "譖"); - normalize_table.put("譲", "讓"); - normalize_table.put("讃", "讚"); - normalize_table.put("豊", "豐"); - normalize_table.put("賎", "賤"); - normalize_table.put("賛", "贊"); - normalize_table.put("践", "踐"); - normalize_table.put("転", "轉"); - normalize_table.put("軽", "輕"); - normalize_table.put("輌", "輛"); - normalize_table.put("辞", "辭"); - normalize_table.put("辺", "邊"); - normalize_table.put("迩", "邇"); - normalize_table.put("逓", "遞"); - normalize_table.put("遅", "遲"); - normalize_table.put("遥", "遙"); - normalize_table.put("郎", "郞"); - normalize_table.put("郷", "鄕"); - normalize_table.put("鄉", "鄕"); - normalize_table.put("酔", "醉"); - normalize_table.put("醸", "釀"); - normalize_table.put("釈", "釋"); - normalize_table.put("鈎", "鉤"); - normalize_table.put("鈩", "鑪"); - normalize_table.put("鈬", "鐸"); - normalize_table.put("鉄", "鐵"); - normalize_table.put("鉱", "鑛"); - normalize_table.put("銭", "錢"); - normalize_table.put("鋭", "銳"); - normalize_table.put("鋳", "鑄"); - normalize_table.put("録", "錄"); - normalize_table.put("鎮", "鎭"); - normalize_table.put("鑚", "鑽"); - normalize_table.put("関", "關"); - normalize_table.put("閲", "閱"); - normalize_table.put("闘", "鬪"); - normalize_table.put("陥", "陷"); - normalize_table.put("険", "險"); - normalize_table.put("随", "隨"); - normalize_table.put("隠", "隱"); - normalize_table.put("隣", "鄰"); - normalize_table.put("隷", "隸"); - normalize_table.put("雑", "雜"); - normalize_table.put("霊", "靈"); - normalize_table.put("青", "靑"); - normalize_table.put("静", "靜"); - normalize_table.put("頚", "頸"); - normalize_table.put("頬", "頰"); - normalize_table.put("頼", "賴"); - normalize_table.put("顔", "顏"); - normalize_table.put("顕", "顯"); - normalize_table.put("顛", "顚"); - normalize_table.put("飲", "飮"); - normalize_table.put("餅", "餠"); - normalize_table.put("駄", "馱"); - normalize_table.put("駅", "驛"); - normalize_table.put("駆", "驅"); - normalize_table.put("騒", "騷"); - normalize_table.put("験", "驗"); - normalize_table.put("髄", "髓"); - normalize_table.put("髪", "髮"); - normalize_table.put("鯵", "鰺"); - normalize_table.put("鰛", "鰮"); - normalize_table.put("鴎", "鷗"); - normalize_table.put("鴬", "鶯"); - normalize_table.put("鶏", "鷄"); - normalize_table.put("鷏", "鷆"); - normalize_table.put("麁", "麤"); - normalize_table.put("麦", "麥"); - normalize_table.put("麸", "麩"); - normalize_table.put("麺", "麪"); - normalize_table.put("麽", "麼"); - normalize_table.put("黄", "黃"); - normalize_table.put("黒", "黑"); - normalize_table.put("黙", "默"); - normalize_table.put("鼡", "鼠"); - normalize_table.put("齢", "齡"); - normalize_table.put("朗", "朗"); - normalize_table.put("猪", "猪"); - - } - private final URL url; private final ProgressMonitor monitor; @@ -552,7 +91,21 @@ return getLongestTermLength(this.url, this.monitor); } - private static final class DatedSet + + private static boolean exists(URL url, ProgressMonitor monitor, String term) + throws CancelledByUser + { + return store.getDatedData(url, monitor, new DatedSetBuilder()).set + .contains(Util.normalize(term)); + } + + private static int getLongestTermLength(URL url, ProgressMonitor monitor) + throws CancelledByUser + { + return store.getDatedData(url, monitor, new DatedSetBuilder()).longest; + } + + private static final class DatedSet implements DatedData { public final long date; @@ -566,286 +119,160 @@ this.set = set; this.longest = longest; } - } - - private static Hashtable indices = new Hashtable(); - - private static boolean exists(URL url, ProgressMonitor monitor, String term) throws CancelledByUser - { - for (Map.Entry entry : normalize_table.entrySet()) - { - term = term.replace(entry.getKey().charAt(0), entry.getValue().charAt(0)); - } - - return getDatedSet(url, monitor).set.contains(term); - } - - private static int getLongestTermLength(URL url, ProgressMonitor monitor) throws CancelledByUser - { - return getDatedSet(url, monitor).longest; - } - private static DatedSet getDatedSet(URL url, ProgressMonitor monitor) throws CancelledByUser - { - String url_str = url.toString().intern(); - /* - * Although there is no support for concurrent access of Index objects, - * we need to synchronize at this point. This is required because - * multiple libraries which do not talk to each other could be using - * this code simultaneously. If library A access an Index object with - * URL U at the same time library B access its own Index object with URL - * U then, because the two URLs are the same, there is a risk of - * concurrent access here. + /** + * @see com.lddubeau.ddb.DatedData#getDate() */ - synchronized (indices) + @Override + public long getDate() { - DatedSet ds = indices.get(url_str); - - if (ds == null) - { - ds = loadIndex(url, monitor); - indices.put(url_str, ds); - } - - return ds; + return this.date; } - } - private static final Pattern head_clean_re = Pattern.compile("<.*?>"); + } - private static DatedSet loadIndex(URL url, ProgressMonitor monitor) throws CancelledByUser + private static final class DatedSetBuilder implements DataBuilder { - // The goal is as follow: - // - If there is no cached data, load the index from the web site. - // - If there is cached data but the index on the web site is newer, - // load the index from the web site. - // - If the web site is not responding, used the cached data. - // - If the web site is not responding and there is no cached data, - // return the empty set. + private int version = 0; - CACHE_PATH.mkdirs(); + private long date = 0; - int longest = 0; - long date = 0; - Set ret = null; + private int longest = -1; - HttpURLConnection conn = null; + private Set ret; - // First try to see whether it is cached. - File cache_path = null; - try + @Override + public DatedSet getData() { - cache_path = new File(CACHE_PATH, url.toURI().toASCIIString() - .replaceAll("[:/]", "_")); - + return (this.ret == null) ? null : new DatedSet(this.date, + this.ret, this.longest); } - catch (URISyntaxException e) + + @Override + public long getDate() { - throw new Error("cannot convert URL to URI: " + url.toString(), e); + return this.date; } - DataInputStream in_file = null; - try + @Override + public void readFromCache(DataInputStream in) { - in_file = new DataInputStream(new GZIPInputStream( - new FileInputStream(cache_path))); - if (in_file.readInt() == CACHE_FILE_VERSION) + try { - date = in_file.readLong(); + longest = in.readInt(); + int num = in.readInt(); - // Check whether the data on the web site is newer - boolean read_the_cache = true; - try + this.ret = new HashSet(num); + for (int i = 0; i < num; i++) { - conn = (HttpURLConnection) url.openConnection(); - conn.setIfModifiedSince(date); - - conn.connect(); - - // If we get the response HTTP_NOT_MODIFIED we want to read - // the cache. - // If we get any response other than HTTP_OK we want to read - // the cache - // because those are likely errors of communication. - // If we get HTTP_OK, it means that the data on the web site - // is newer. - read_the_cache = (conn.getResponseCode() != HttpURLConnection.HTTP_OK); - logger.log(Level.FINE, "HTTP response: " - + conn.getResponseCode()); - } - catch (IOException e) - { - // The web site is not accessible... read the cache. + this.ret.add(in.readUTF()); } - if (read_the_cache) + if (in.read() > -1) { - logger.log(Level.FINE, "reading cache: " - + cache_path.toString()); - // HTTP_OK would mean that the data on the server is newer - longest = in_file.readInt(); - int num = in_file.readInt(); - - ret = new HashSet(num); - for (int i = 0; i < num; i++) - { - ret.add(in_file.readUTF()); - } - - if (in_file.read() > -1) - { - // Eek, we did not read all the data. - // Flush... - ret = null; - } + // Eek, we did not read all the data. + // Flush... + this.ret = null; } - in_file.close(); } - else + catch (Exception e) { - // Else, act as if there were no cache. - logger.log(Level.FINE, "older cache: " + cache_path); + // Assume a corrupted cache + this.ret = null; } } - catch (FileNotFoundException e) - { - // Ok, the cache does not exist. Just ignore. - } - catch (IOException e) + + /** + * @see com.lddubeau.ddb.DataBuilder#setDate(long) + */ + @Override + public void setDate(long date) { - // Ok, there was a problem reading the cache. - // Flush and ignore. - ret = null; + this.date = date; + } - if (ret == null) + @Override + public void readFromWeb(BufferedReader in, ProgressMonitor monitor) + throws CancelledByUser, IOException { - logger.log(Level.FINE, "fetching data from web site" + url); + final Pattern head_clean_re = Pattern.compile("<.*?>"); + String line = in.readLine(); + this.ret = new HashSet(); try { - // It may happen that conn is null. - if (conn == null) + while (line != null) { - conn = (HttpURLConnection) url.openConnection(); - conn.connect(); - } - - // Fail silently if we get anything else than HTTP_OK. - if (conn.getResponseCode() == HttpURLConnection.HTTP_OK) - { - if (monitor != null) - { - monitor.setMinimum(0); - monitor.setMaximum(conn.getContentLength()); - } - date = conn.getDate(); - InputStream raw_stream = (monitor == null) ? conn - .getInputStream() : new MonitoredInputStream(conn - .getInputStream(), monitor); - BufferedReader reader = new BufferedReader( - new InputStreamReader(new GZIPInputStream( - raw_stream), "UTF-8")); - try + if (monitor != null && monitor.isCancelled()) { - String line = reader.readLine(); - ret = new HashSet(); - while (line != null) - { - if (monitor != null && monitor.isCancelled()) - { - throw new CancelledByUser(); - } - line = head_clean_re.matcher(line).replaceAll("") - .intern(); - ret.add(line); - if (line.length() > longest) - { - longest = line.length(); - } - line = reader.readLine(); - } + throw new CancelledByUser(); } - finally + line = head_clean_re.matcher(line).replaceAll("").intern(); + this.ret.add(line); + if (line.length() > longest) { - reader.close(); - if (monitor != null) - { - monitor.close(); - } + longest = line.length(); } - } - else - { - logger.log(Level.FINE, - "fetching data from web site failed with code: " - + conn.getResponseCode()); + line = in.readLine(); } } + // Any exception means we were unsuccessful so we flush ret catch (IOException e) { - throw new Error(e); + this.ret = null; + throw e; + } + catch (CancelledByUser e) + { + this.ret = null; + throw e; } + catch (Exception e) + { + this.ret = null; + } + } + + @Override + public void writeToCache(DataOutputStream out) throws IOException + { + out.writeInt(CACHE_FILE_VERSION); + out.writeLong(date); + out.writeInt(longest); + out.writeInt(ret.size()); + for (String entry : ret) + { + out.writeUTF(entry); + } + } + + @Override + public boolean checkVersion() + { + return (this.version == CACHE_FILE_VERSION); } - if (ret != null) + @Override + public void loadPreamble(DataInputStream in) { - // Save the Set in the cache - DataOutputStream out_file = null; try { - out_file = new DataOutputStream(new GZIPOutputStream( - new FileOutputStream(cache_path))); - out_file.writeInt(CACHE_FILE_VERSION); - out_file.writeLong(date); - out_file.writeInt(longest); - out_file.writeInt(ret.size()); - for (String entry : ret) + this.version = in.readInt(); + if (this.checkVersion()) { - out_file.writeUTF(entry); + this.date = in.readLong(); } - out_file.flush(); - out_file.close(); - } - catch (FileNotFoundException e) - { - // Ignore. It means there is some architectural issue which - // prevents - // us to cache. - // This is not a fatal problem. } catch (IOException e) { - // Cleanup and otherwise ignore as above. - if (out_file != null) - { - try - { - out_file.close(); - } - catch (IOException e1) - { - // This is strange and would most likely be the result - // of a - // serious system problem. This is not the place to - // report - // serious system problems. - } - cache_path.delete(); - } + // Ignore the exception: act as if the cache is corrupted. } } - else - { - logger.log(Level.FINE, "unable to fetch from cache or web site: " - + url); - ret = new HashSet(); - date = 0; - longest = 0; - } - return (ret == null)?null:new DatedSet(date, ret, longest); } + private static final Util.Store store = new Util.Store(); + public static String getVersion() { return LastBuild.getVersion(); @@ -876,8 +303,8 @@ // This ought to be used for testing only try { - CACHE_PATH = new File(argv[0]); - loadIndex(new URL(argv[1]), null); + Util.CACHE_PATH = new File(argv[0]); + new Util.Store().getDatedData(new URL(argv[1]), null, new DatedSetBuilder()); } catch (Exception e) { @@ -885,65 +312,4 @@ } } - private static final class MonitoredInputStream extends InputStream - { - private final InputStream stream; - - private final ProgressMonitor monitor; - - private int count = 0; - - public MonitoredInputStream(InputStream in, ProgressMonitor monitor) - { - this.stream = in; - this.monitor = monitor; - } - - @Override - public int read() throws IOException - { - int ret = this.stream.read(); - this.count += ret; - this.monitor.setProgress(this.count); - return ret; - } - - @Override - public int read(byte [] b, int off, int len) throws IOException - { - int ret = this.stream.read(b, off, len); - this.count += ret; - this.monitor.setProgress(this.count); - return ret; - } - - @Override - public int available() throws IOException - { - return this.stream.available(); - } - - @Override - public void close() throws IOException - { - this.stream.close(); - this.monitor.close(); - } - - @Override - public boolean markSupported() - { - return false; - } - - @Override - public long skip(long n) throws IOException - { - long ret = this.stream.skip(n); - this.count += ret; - this.monitor.setProgress(this.count); - return ret; - } - } - } diff -Nru java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/Util.java java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/Util.java --- java-webdict-lib-0.3ubuntu3/jsrc/com/lddubeau/ddb/Util.java 1970-01-01 00:00:00.000000000 +0000 +++ java-webdict-lib-0.4/jsrc/com/lddubeau/ddb/Util.java 2010-07-14 19:28:01.000000000 +0000 @@ -0,0 +1,782 @@ +package com.lddubeau.ddb; + +import java.io.BufferedReader; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.Hashtable; +import java.util.Locale; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +final class Util +{ + private static final Logger logger = Logger.getLogger(Util.class.getName()); + + static final class MonitoredInputStream extends InputStream + { + private final InputStream stream; + + private final ProgressMonitor monitor; + + private int count = 0; + + public MonitoredInputStream(InputStream in, ProgressMonitor monitor) + { + this.stream = in; + this.monitor = monitor; + } + + @Override + public int read() throws IOException + { + int ret = this.stream.read(); + this.count += ret; + this.monitor.setProgress(this.count); + return ret; + } + + @Override + public int read(byte [] b, int off, int len) throws IOException + { + int ret = this.stream.read(b, off, len); + this.count += ret; + this.monitor.setProgress(this.count); + return ret; + } + + @Override + public int available() throws IOException + { + return this.stream.available(); + } + + @Override + public void close() throws IOException + { + this.stream.close(); + this.monitor.close(); + } + + @Override + public boolean markSupported() + { + return false; + } + + @Override + public long skip(long n) throws IOException + { + long ret = this.stream.skip(n); + this.count += ret; + this.monitor.setProgress(this.count); + return ret; + } + } + + private static final Hashtable normalize_table = new Hashtable(); + + static File CACHE_PATH = null; + + static + { + String os_name = System.getProperty("os.name").toLowerCase(Locale.US); + if ((os_name.indexOf("linux") != -1) || (os_name.indexOf("unix") != -1) + || (os_name.indexOf("solaris") != -1)) + { + Util.CACHE_PATH = new File(System.getProperty("user.home"), + ".webdict-lib"); + } + else if (os_name.indexOf("windows") != -1) + { + Util.CACHE_PATH = new File(System.getenv("USERPROFILE"), + "WebDict-Lib"); + } + else + { + throw new Error( + "cannot parse operating system name and adjust parameters: " + + os_name); + } + normalize_table.put("万", "萬"); + normalize_table.put("与", "與"); + normalize_table.put("両", "兩"); + normalize_table.put("並", "竝"); + normalize_table.put("乗", "乘"); + normalize_table.put("乱", "亂"); + normalize_table.put("亀", "龜"); + normalize_table.put("予", "豫"); + normalize_table.put("争", "爭"); + normalize_table.put("亘", "亙"); + normalize_table.put("亜", "亞"); + normalize_table.put("仏", "佛"); + normalize_table.put("仮", "假"); + normalize_table.put("会", "會"); + normalize_table.put("伜", "倅"); + normalize_table.put("伝", "傳"); + normalize_table.put("体", "體"); + normalize_table.put("余", "餘"); + normalize_table.put("佞", "侫"); + normalize_table.put("你", "儞"); + normalize_table.put("併", "倂"); + normalize_table.put("価", "價"); + normalize_table.put("侭", "儘"); + normalize_table.put("俱", "倶"); + normalize_table.put("値", "值"); + normalize_table.put("倹", "儉"); + normalize_table.put("偷", "偸"); + normalize_table.put("偽", "僞"); + normalize_table.put("僣", "僭"); + normalize_table.put("児", "兒"); + normalize_table.put("党", "黨"); + normalize_table.put("内", "內"); + normalize_table.put("円", "圓"); + normalize_table.put("冊", "册"); + normalize_table.put("写", "寫"); + normalize_table.put("処", "處"); + normalize_table.put("函", "凾"); + normalize_table.put("剎", "刹"); + normalize_table.put("剣", "劍"); + normalize_table.put("剤", "劑"); + normalize_table.put("剰", "剩"); + normalize_table.put("励", "勵"); + normalize_table.put("労", "勞"); + normalize_table.put("効", "效"); + normalize_table.put("勅", "敕"); + normalize_table.put("勧", "勸"); + normalize_table.put("勲", "勳"); + normalize_table.put("区", "區"); + normalize_table.put("医", "醫"); + normalize_table.put("卆", "卒"); + normalize_table.put("単", "單"); + normalize_table.put("即", "卽"); + normalize_table.put("历", "歷"); + normalize_table.put("厨", "廚"); + normalize_table.put("厩", "廐"); + normalize_table.put("厳", "嚴"); + normalize_table.put("参", "參"); + normalize_table.put("双", "雙"); + normalize_table.put("収", "收"); + normalize_table.put("叙", "敍"); + normalize_table.put("号", "號"); + normalize_table.put("告", "吿"); + normalize_table.put("呑", "吞"); + normalize_table.put("唖", "啞"); + normalize_table.put("営", "營"); + normalize_table.put("啟", "啓"); + normalize_table.put("喻", "喩"); + normalize_table.put("嘱", "囑"); + normalize_table.put("嚢", "囊"); + normalize_table.put("回", "囘"); + normalize_table.put("団", "團"); + normalize_table.put("囲", "圍"); + normalize_table.put("図", "圖"); + normalize_table.put("国", "國"); + normalize_table.put("圏", "圈"); + normalize_table.put("圧", "壓"); + normalize_table.put("堕", "墮"); + normalize_table.put("塁", "壘"); + normalize_table.put("塩", "鹽"); + normalize_table.put("増", "增"); + normalize_table.put("壊", "壞"); + normalize_table.put("壌", "壤"); + normalize_table.put("壮", "壯"); + normalize_table.put("声", "聲"); + normalize_table.put("壱", "壹"); + normalize_table.put("売", "賣"); + normalize_table.put("壷", "壺"); + normalize_table.put("変", "變"); + normalize_table.put("奨", "奬"); + normalize_table.put("娛", "娯"); + normalize_table.put("嬢", "孃"); + normalize_table.put("学", "學"); + normalize_table.put("宝", "寶"); + normalize_table.put("実", "實"); + normalize_table.put("寝", "寢"); + normalize_table.put("対", "對"); + normalize_table.put("寿", "壽"); + normalize_table.put("専", "專"); + normalize_table.put("将", "將"); + normalize_table.put("尓", "爾"); + normalize_table.put("戾", "戻"); + normalize_table.put("尭", "堯"); + normalize_table.put("尽", "盡"); + normalize_table.put("届", "屆"); + normalize_table.put("屏", "屛"); + normalize_table.put("属", "屬"); + normalize_table.put("岳", "嶽"); + normalize_table.put("峡", "峽"); + normalize_table.put("島", "嶋"); + normalize_table.put("巌", "巖"); + normalize_table.put("巣", "巢"); + normalize_table.put("巻", "卷"); + normalize_table.put("帯", "帶"); + normalize_table.put("帰", "歸"); + normalize_table.put("并", "幷"); + normalize_table.put("庁", "廳"); + normalize_table.put("広", "廣"); + normalize_table.put("廃", "廢"); + normalize_table.put("廄", "廏"); + normalize_table.put("弁", "辯"); + normalize_table.put("弐", "貳"); + normalize_table.put("弑", "弒"); + normalize_table.put("弥", "彌"); + normalize_table.put("弯", "彎"); + normalize_table.put("弾", "彈"); + normalize_table.put("当", "當"); + normalize_table.put("径", "徑"); + normalize_table.put("従", "從"); + normalize_table.put("徳", "德"); + normalize_table.put("徴", "徵"); + normalize_table.put("応", "應"); + normalize_table.put("忰", "悴"); + normalize_table.put("怛", "憺"); + normalize_table.put("恒", "恆"); + normalize_table.put("恋", "戀"); + normalize_table.put("恵", "惠"); + normalize_table.put("悦", "悅"); + normalize_table.put("悩", "惱"); + normalize_table.put("悪", "惡"); + normalize_table.put("惨", "慘"); + normalize_table.put("愠", "慍"); + normalize_table.put("慎", "愼"); + normalize_table.put("懐", "懷"); + normalize_table.put("懴", "懺"); + normalize_table.put("戦", "戰"); + normalize_table.put("戯", "戲"); + normalize_table.put("戱", "戲"); + normalize_table.put("戸", "戶"); + normalize_table.put("払", "拂"); + normalize_table.put("抜", "拔"); + normalize_table.put("択", "擇"); + normalize_table.put("抬", "擡"); + normalize_table.put("担", "擔"); + normalize_table.put("拝", "拜"); + normalize_table.put("拠", "據"); + normalize_table.put("拡", "擴"); + normalize_table.put("挙", "擧"); + normalize_table.put("挟", "挾"); + normalize_table.put("捜", "搜"); + normalize_table.put("掲", "揭"); + normalize_table.put("摂", "攝"); + normalize_table.put("撹", "攪"); + normalize_table.put("敎", "教"); + normalize_table.put("数", "數"); + normalize_table.put("斉", "齊"); + normalize_table.put("斎", "齋"); + normalize_table.put("断", "斷"); + normalize_table.put("既", "旣"); + normalize_table.put("旧", "舊"); + normalize_table.put("昼", "晝"); + normalize_table.put("昿", "曠"); + normalize_table.put("晋", "晉"); + normalize_table.put("暁", "曉"); + normalize_table.put("暦", "曆"); + normalize_table.put("曽", "曾"); + normalize_table.put("条", "條"); + normalize_table.put("来", "來"); + normalize_table.put("枢", "樞"); + normalize_table.put("枦", "櫨"); + normalize_table.put("栄", "榮"); + normalize_table.put("桜", "櫻"); + normalize_table.put("桟", "棧"); + normalize_table.put("桧", "檜"); + normalize_table.put("梹", "檳"); + normalize_table.put("梼", "檮"); + normalize_table.put("検", "檢"); + normalize_table.put("楼", "樓"); + normalize_table.put("楽", "樂"); + normalize_table.put("様", "樣"); + normalize_table.put("槙", "槇"); + normalize_table.put("権", "權"); + normalize_table.put("横", "橫"); + normalize_table.put("檪", "櫟"); + normalize_table.put("櫺", "欞"); + normalize_table.put("欧", "歐"); + normalize_table.put("歓", "歡"); + normalize_table.put("歩", "步"); + normalize_table.put("歯", "齒"); + normalize_table.put("歳", "歲"); + normalize_table.put("歴", "歷"); + normalize_table.put("残", "殘"); + normalize_table.put("殱", "殲"); + normalize_table.put("殴", "毆"); + normalize_table.put("殻", "殼"); + normalize_table.put("毁", "毀"); + normalize_table.put("毎", "每"); + normalize_table.put("毗", "毘"); + normalize_table.put("気", "氣"); + normalize_table.put("氷", "冰"); + normalize_table.put("污", "汚"); + normalize_table.put("沢", "澤"); + normalize_table.put("浄", "淨"); + normalize_table.put("浅", "淺"); + normalize_table.put("浜", "濱"); + normalize_table.put("涙", "淚"); + normalize_table.put("涛", "濤"); + normalize_table.put("涜", "瀆"); + normalize_table.put("清", "淸"); + normalize_table.put("渇", "渴"); + normalize_table.put("済", "濟"); + normalize_table.put("渋", "澁"); + normalize_table.put("渓", "溪"); + normalize_table.put("温", "溫"); + normalize_table.put("湾", "灣"); + normalize_table.put("湿", "濕"); + normalize_table.put("満", "滿"); + normalize_table.put("滞", "滯"); + normalize_table.put("潅", "灌"); + normalize_table.put("潜", "潛"); + normalize_table.put("灯", "燈"); + normalize_table.put("炉", "爐"); + normalize_table.put("点", "點"); + normalize_table.put("為", "爲"); + normalize_table.put("焔", "焰"); + normalize_table.put("焼", "燒"); + normalize_table.put("状", "狀"); + normalize_table.put("独", "獨"); + normalize_table.put("狭", "狹"); + normalize_table.put("猟", "獵"); + normalize_table.put("猪", "猪"); + normalize_table.put("献", "獻"); + normalize_table.put("獣", "獸"); + normalize_table.put("珱", "瓔"); + normalize_table.put("瑶", "瑤"); + normalize_table.put("瓶", "甁"); + normalize_table.put("産", "產"); + normalize_table.put("画", "畫"); + normalize_table.put("畳", "疊"); + normalize_table.put("畴", "疇"); + normalize_table.put("痴", "癡"); + normalize_table.put("発", "發"); + normalize_table.put("皋", "皐"); + normalize_table.put("盗", "盜"); + normalize_table.put("県", "縣"); + normalize_table.put("真", "眞"); + normalize_table.put("眾", "衆"); + normalize_table.put("砕", "碎"); + normalize_table.put("砺", "礪"); + normalize_table.put("砿", "礦"); + normalize_table.put("碍", "礙"); + normalize_table.put("礼", "禮"); + normalize_table.put("祢", "禰"); + normalize_table.put("祷", "禱"); + normalize_table.put("禄", "祿"); + normalize_table.put("禅", "禪"); + normalize_table.put("秘", "祕"); + normalize_table.put("称", "稱"); + normalize_table.put("稲", "稻"); + normalize_table.put("穂", "穗"); + normalize_table.put("穏", "穩"); + normalize_table.put("穐", "龝"); + normalize_table.put("穣", "穰"); + normalize_table.put("窃", "竊"); + normalize_table.put("窓", "窗"); + normalize_table.put("竜", "龍"); + normalize_table.put("競", "竸"); + normalize_table.put("筝", "箏"); + normalize_table.put("篭", "籠"); + normalize_table.put("簑", "簔"); + normalize_table.put("籖", "籤"); + normalize_table.put("粋", "粹"); + normalize_table.put("粛", "肅"); + normalize_table.put("粤", "粵"); + normalize_table.put("糸", "絲"); + normalize_table.put("経", "經"); + normalize_table.put("絵", "繪"); + normalize_table.put("絶", "絕"); + normalize_table.put("継", "繼"); + normalize_table.put("続", "續"); + normalize_table.put("総", "總"); + normalize_table.put("緑", "綠"); + normalize_table.put("緒", "緖"); + normalize_table.put("緕", "纃"); + normalize_table.put("縁", "緣"); + normalize_table.put("纒", "纏"); + normalize_table.put("縄", "繩"); + normalize_table.put("縦", "縱"); + normalize_table.put("繊", "纖"); + normalize_table.put("繋", "繫"); + normalize_table.put("翆", "翠"); + normalize_table.put("聡", "聰"); + normalize_table.put("聴", "聽"); + normalize_table.put("胆", "膽"); + normalize_table.put("脱", "脫"); + normalize_table.put("脳", "腦"); + normalize_table.put("臓", "臟"); + normalize_table.put("舉", "擧"); + normalize_table.put("舎", "舍"); + normalize_table.put("舮", "艫"); + normalize_table.put("艶", "艷"); + normalize_table.put("芦", "蘆"); + normalize_table.put("芸", "藝"); + normalize_table.put("苔", "薹"); + normalize_table.put("茎", "莖"); + normalize_table.put("荘", "莊"); + normalize_table.put("蔵", "藏"); + normalize_table.put("薫", "薰"); + normalize_table.put("薬", "藥"); + normalize_table.put("薮", "藪"); + normalize_table.put("虚", "虛"); + normalize_table.put("虫", "蟲"); + normalize_table.put("虱", "蝨"); + normalize_table.put("蚕", "蠶"); + normalize_table.put("蛍", "螢"); + normalize_table.put("蛎", "蠣"); + normalize_table.put("蛮", "蠻"); + normalize_table.put("蝿", "蠅"); + normalize_table.put("衛", "衞"); + normalize_table.put("装", "裝"); + normalize_table.put("褒", "襃"); + normalize_table.put("褝", "襌"); + normalize_table.put("覚", "覺"); + normalize_table.put("覧", "覽"); + normalize_table.put("観", "觀"); + normalize_table.put("触", "觸"); + normalize_table.put("訳", "譯"); + normalize_table.put("証", "證"); + normalize_table.put("誉", "譽"); + normalize_table.put("説", "說"); + normalize_table.put("読", "讀"); + normalize_table.put("諌", "諫"); + normalize_table.put("謡", "謠"); + normalize_table.put("譛", "譖"); + normalize_table.put("譲", "讓"); + normalize_table.put("讃", "讚"); + normalize_table.put("豊", "豐"); + normalize_table.put("賎", "賤"); + normalize_table.put("賛", "贊"); + normalize_table.put("践", "踐"); + normalize_table.put("転", "轉"); + normalize_table.put("軽", "輕"); + normalize_table.put("輌", "輛"); + normalize_table.put("辞", "辭"); + normalize_table.put("辺", "邊"); + normalize_table.put("迩", "邇"); + normalize_table.put("逓", "遞"); + normalize_table.put("遅", "遲"); + normalize_table.put("遥", "遙"); + normalize_table.put("郎", "郞"); + normalize_table.put("郷", "鄕"); + normalize_table.put("鄉", "鄕"); + normalize_table.put("酔", "醉"); + normalize_table.put("醸", "釀"); + normalize_table.put("釈", "釋"); + normalize_table.put("鈎", "鉤"); + normalize_table.put("鈩", "鑪"); + normalize_table.put("鈬", "鐸"); + normalize_table.put("鉄", "鐵"); + normalize_table.put("鉱", "鑛"); + normalize_table.put("銭", "錢"); + normalize_table.put("鋭", "銳"); + normalize_table.put("鋳", "鑄"); + normalize_table.put("録", "錄"); + normalize_table.put("鎮", "鎭"); + normalize_table.put("鑚", "鑽"); + normalize_table.put("関", "關"); + normalize_table.put("閲", "閱"); + normalize_table.put("闘", "鬪"); + normalize_table.put("陥", "陷"); + normalize_table.put("険", "險"); + normalize_table.put("随", "隨"); + normalize_table.put("隠", "隱"); + normalize_table.put("隣", "鄰"); + normalize_table.put("隷", "隸"); + normalize_table.put("雑", "雜"); + normalize_table.put("霊", "靈"); + normalize_table.put("青", "靑"); + normalize_table.put("静", "靜"); + normalize_table.put("頚", "頸"); + normalize_table.put("頬", "頰"); + normalize_table.put("頼", "賴"); + normalize_table.put("顔", "顏"); + normalize_table.put("顕", "顯"); + normalize_table.put("顛", "顚"); + normalize_table.put("飲", "飮"); + normalize_table.put("餅", "餠"); + normalize_table.put("駄", "馱"); + normalize_table.put("駅", "驛"); + normalize_table.put("駆", "驅"); + normalize_table.put("騒", "騷"); + normalize_table.put("験", "驗"); + normalize_table.put("髄", "髓"); + normalize_table.put("髪", "髮"); + normalize_table.put("鯵", "鰺"); + normalize_table.put("鰛", "鰮"); + normalize_table.put("鴎", "鷗"); + normalize_table.put("鴬", "鶯"); + normalize_table.put("鶏", "鷄"); + normalize_table.put("鷏", "鷆"); + normalize_table.put("麁", "麤"); + normalize_table.put("麦", "麥"); + normalize_table.put("麸", "麩"); + normalize_table.put("麺", "麪"); + normalize_table.put("麽", "麼"); + normalize_table.put("黄", "黃"); + normalize_table.put("黒", "黑"); + normalize_table.put("黙", "默"); + normalize_table.put("鼡", "鼠"); + normalize_table.put("齢", "齡"); + normalize_table.put("朗", "朗"); + normalize_table.put("猪", "猪"); + + } + + static String normalize(String term) + { + for (Map.Entry entry : normalize_table.entrySet()) + { + term = term.replace(entry.getKey().charAt(0), entry.getValue() + .charAt(0)); + } + return term; + } + + static final class Store + { + private final Hashtable indices = new Hashtable(); + + public T getDatedData(URL url, ProgressMonitor monitor, + DataBuilder builder) throws CancelledByUser + { + String url_str = url.toString().intern(); + /* + * Although there is no support for concurrent access of Index + * objects, we need to synchronize at this point. This is required + * because multiple libraries which do not talk to each other could + * be using this code simultaneously. If library A access an Index + * object with URL U at the same time library B access its own Index + * object with URL U then, because the two URLs are the same, there + * is a risk of concurrent access here. + */ + synchronized (indices) + { + T ds = indices.get(url_str); + + if (ds == null) + { + ds = Util.loadData(url, monitor, builder); + indices.put(url_str, ds); + } + + return ds; + } + } + } + + + static T loadData(URL url, ProgressMonitor monitor, + DataBuilder builder) throws CancelledByUser + { + // The goal is as follow: + // - If there is no cached data, load the index from the web site. + // - If there is cached data but the index on the web site is newer, + // load the index from the web site. + // - If the web site is not responding, used the cached data. + // - If the web site is not responding and there is no cached data, + // return the empty set. + + CACHE_PATH.mkdirs(); + + long date = 0; + + HttpURLConnection conn = null; + + // First try to see whether it is cached. + File cache_path = null; + try + { + cache_path = new File(CACHE_PATH, url.toURI().toASCIIString() + .replaceAll("[:/]", "_")); + + } + catch (URISyntaxException e) + { + throw new Error("cannot convert URL to URI: " + url.toString(), e); + } + + DataInputStream in_file = null; + try + { + in_file = new DataInputStream(new GZIPInputStream( + new FileInputStream(cache_path))); + builder.loadPreamble(in_file); + if (builder.checkVersion()) + { + date = builder.getDate(); + + // Check whether the data on the web site is newer + boolean read_the_cache = true; + try + { + conn = (HttpURLConnection) url.openConnection(); + conn.setIfModifiedSince(date); + + conn.connect(); + + // If we get the response HTTP_NOT_MODIFIED we want to read + // the cache. + // If we get any response other than HTTP_OK we want to read + // the cache + // because those are likely errors of communication. + // If we get HTTP_OK, it means that the data on the web site + // is newer. + read_the_cache = (conn.getResponseCode() != HttpURLConnection.HTTP_OK); + logger.log(Level.FINE, "HTTP response: " + + conn.getResponseCode()); + } + catch (IOException e) + { + // The web site is not accessible... read the cache. + } + + if (read_the_cache) + { + logger.log(Level.FINE, "reading cache: " + + cache_path.toString()); + builder.readFromCache(in_file); + } + in_file.close(); + } + else + { + // Else, act as if there were no cache. + logger.log(Level.FINE, "older cache: " + cache_path); + } + } + catch (FileNotFoundException e) + { + // Ok, the cache does not exist. Just ignore. + } + catch (IOException e) + { + // Ok, there was a problem reading the cache. + // Flush and ignore. + } + + T ret = builder.getData(); + + if (ret != null) + { + // We got our data from the cache: no need to fetch from web or to + // save to cache: return now! + return ret; + } + + // Otherwise, fetch from web site. + + logger.log(Level.FINE, "fetching data from web site: " + url); + try + { + // It may happen that conn is null. + if (conn == null) + { + conn = (HttpURLConnection) url.openConnection(); + conn.connect(); + } + + // Fail silently if we get anything else than HTTP_OK. + if (conn.getResponseCode() == HttpURLConnection.HTTP_OK) + { + if (monitor != null) + { + monitor.setMinimum(0); + monitor.setMaximum(conn.getContentLength()); + } + date = conn.getDate(); + InputStream raw_stream = (monitor == null) ? conn + .getInputStream() : new MonitoredInputStream(conn + .getInputStream(), monitor); + BufferedReader reader = new BufferedReader( + new InputStreamReader(new GZIPInputStream(raw_stream), + "UTF-8")); + try + { + builder.setDate(date); + builder.readFromWeb(reader, monitor); + } + finally + { + reader.close(); + if (monitor != null) + { + monitor.close(); + } + } + } + else + { + logger.log(Level.FINE, + "fetching data from web site failed with code: " + + conn.getResponseCode()); + } + } + catch (IOException e) + { + throw new Error(e); + } + + ret = builder.getData(); + + if (ret != null) + { + // Save the Set to the cache + DataOutputStream out_file = null; + try + { + out_file = new DataOutputStream(new GZIPOutputStream( + new FileOutputStream(cache_path))); + builder.writeToCache(out_file); + out_file.flush(); + out_file.close(); + } + catch (FileNotFoundException e) + { + // Ignore. It means there is some architectural issue which + // prevents + // us to cache. + // This is not a fatal problem. + } + catch (IOException e) + { + // Cleanup and otherwise ignore as above. + if (out_file != null) + { + try + { + out_file.close(); + } + catch (IOException e1) + { + // This is strange and would most likely be the result + // of a + // serious system problem. This is not the place to + // report + // serious system problems. + } + cache_path.delete(); + } + } + } + else + { + logger.log(Level.FINE, "unable to fetch from cache or web site: " + + url); + } + + return ret; + } + +} diff -Nru java-webdict-lib-0.3ubuntu3/logging.properties java-webdict-lib-0.4/logging.properties --- java-webdict-lib-0.3ubuntu3/logging.properties 2008-11-07 07:59:58.000000000 +0000 +++ java-webdict-lib-0.4/logging.properties 2010-07-14 15:01:00.000000000 +0000 @@ -1,3 +1,3 @@ handlers= java.util.logging.ConsoleHandler java.util.logging.ConsoleHandler.level=ALL -com.lddubeau.ddb.Index.level=FINEST +com.lddubeau.ddb.Util.level=FINEST diff -Nru java-webdict-lib-0.3ubuntu3/tests/fetch.lexicon.txt java-webdict-lib-0.4/tests/fetch.lexicon.txt --- java-webdict-lib-0.3ubuntu3/tests/fetch.lexicon.txt 1970-01-01 00:00:00.000000000 +0000 +++ java-webdict-lib-0.4/tests/fetch.lexicon.txt 2010-07-17 12:30:04.000000000 +0000 @@ -0,0 +1,3 @@ +@@@ com.lddubeau.ddb.Util loadData +FINE: fetching data from web site: http://www.acmuller.net/download/buddhdic.txt.gz +one diff -Nru java-webdict-lib-0.3ubuntu3/tests/fetch.txt java-webdict-lib-0.4/tests/fetch.txt --- java-webdict-lib-0.3ubuntu3/tests/fetch.txt 2008-11-07 08:04:44.000000000 +0000 +++ java-webdict-lib-0.4/tests/fetch.txt 2010-07-14 15:01:18.000000000 +0000 @@ -1,2 +1,2 @@ -@@@ com.lddubeau.ddb.Index loadIndex -FINE: fetching data from web sitehttp://www.acmuller.net/download/ddb-hdwd.txt.gz +@@@ com.lddubeau.ddb.Util loadData +FINE: fetching data from web site: http://www.acmuller.net/download/ddb-hdwd.txt.gz diff -Nru java-webdict-lib-0.3ubuntu3/tests/usecache.lexicon.txt java-webdict-lib-0.4/tests/usecache.lexicon.txt --- java-webdict-lib-0.3ubuntu3/tests/usecache.lexicon.txt 1970-01-01 00:00:00.000000000 +0000 +++ java-webdict-lib-0.4/tests/usecache.lexicon.txt 2010-07-17 12:29:55.000000000 +0000 @@ -0,0 +1,5 @@ +@@@ com.lddubeau.ddb.Util loadData +FINE: HTTP response: 304 +@@@ com.lddubeau.ddb.Util loadData +FINE: reading cache: tests/testcache/http___www.acmuller.net_download_buddhdic.txt.gz +Buddha diff -Nru java-webdict-lib-0.3ubuntu3/tests/usecache.txt java-webdict-lib-0.4/tests/usecache.txt --- java-webdict-lib-0.3ubuntu3/tests/usecache.txt 2008-11-07 08:08:27.000000000 +0000 +++ java-webdict-lib-0.4/tests/usecache.txt 2010-07-14 15:01:30.000000000 +0000 @@ -1,4 +1,4 @@ -@@@ com.lddubeau.ddb.Index loadIndex +@@@ com.lddubeau.ddb.Util loadData FINE: HTTP response: 304 -@@@ com.lddubeau.ddb.Index loadIndex +@@@ com.lddubeau.ddb.Util loadData FINE: reading cache: tests/testcache/http___www.acmuller.net_download_ddb-hdwd.txt.gz diff -Nru java-webdict-lib-0.3ubuntu3/test_webdict.py java-webdict-lib-0.4/test_webdict.py --- java-webdict-lib-0.3ubuntu3/test_webdict.py 2008-11-07 08:26:36.000000000 +0000 +++ java-webdict-lib-0.4/test_webdict.py 2010-07-15 21:16:05.000000000 +0000 @@ -1,16 +1,25 @@ +# coding=utf8 import pydunit import os import re -date_re = re.compile(r"^.*?( com.lddubeau.ddb.Index.*)", re.MULTILINE) +date_re = re.compile(r"^.*?( com.lddubeau.ddb.Util.*)", re.MULTILINE) def date_filter(str): return date_re.sub(r"@@@\1", str) -def test_load_from_website(): +def test_load_index_from_website(): os.system("rm -rf tests/testcache") pydunit.execTest(["java", "-Djava.util.logging.config.file=./logging.properties", "-classpath", "jobj", "com.lddubeau.ddb.Index", "tests/testcache", "http://www.acmuller.net/download/ddb-hdwd.txt.gz"], file_expected="tests/fetch.txt", status_expected=0, filter=date_filter) -def test_load_from_cache(): +def test_load_index_from_cache(): pydunit.execTest(["java", "-Djava.util.logging.config.file=./logging.properties", "-classpath", "jobj", "com.lddubeau.ddb.Index", "tests/testcache", "http://www.acmuller.net/download/ddb-hdwd.txt.gz"], file_expected="tests/usecache.txt", status_expected=0, filter=date_filter) os.system("rm -rf tests/testcache") + +def test_load_lexicon_from_website(): + os.system("rm -rf tests/testcache") + pydunit.execTest(["java", "-Djava.util.logging.config.file=./logging.properties", "-classpath", "jobj", "com.lddubeau.ddb.BasicLexicon", "tests/testcache", "http://www.acmuller.net/download/buddhdic.txt.gz", u"一"], file_expected="tests/fetch.lexicon.txt", status_expected=0, filter=date_filter) + +def test_load_lexicon_from_cache(): + pydunit.execTest(["java", "-Djava.util.logging.config.file=./logging.properties", "-classpath", "jobj", "com.lddubeau.ddb.BasicLexicon", "tests/testcache", "http://www.acmuller.net/download/buddhdic.txt.gz", u"佛"], file_expected="tests/usecache.lexicon.txt", status_expected=0, filter=date_filter) + os.system("rm -rf tests/testcache") Binary files /tmp/n5Z48W8xYy/java-webdict-lib-0.3ubuntu3/test_webdict.pyc and /tmp/w3PTQYlB4I/java-webdict-lib-0.4/test_webdict.pyc differ